drm/radeon/kms: add r600 KMS support
Jerome Glisse [Tue, 8 Sep 2009 00:10:24 +0000 (10:10 +1000)]
This adds the r600 KMS + CS support to the Linux kernel.

The r600 TTM support is quite basic and still needs more
work esp around using interrupts, but the polled fencing
should work okay for now.

Also currently TTM is using memcpy to do VRAM moves,
the code is here to use a 3D blit to do this, but
isn't fully debugged yet.

Authors:
Alex Deucher <alexdeucher@gmail.com>
Dave Airlie <airlied@redhat.com>
Jerome Glisse <jglisse@redhat.com>

Signed-off-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>

34 files changed:
drivers/gpu/drm/radeon/Makefile
drivers/gpu/drm/radeon/atombios_crtc.c
drivers/gpu/drm/radeon/avivod.h [new file with mode: 0644]
drivers/gpu/drm/radeon/r100.c
drivers/gpu/drm/radeon/r100d.h [new file with mode: 0644]
drivers/gpu/drm/radeon/r300.c
drivers/gpu/drm/radeon/r300.h [deleted file]
drivers/gpu/drm/radeon/r300d.h [new file with mode: 0644]
drivers/gpu/drm/radeon/r600.c
drivers/gpu/drm/radeon/r600_blit.c [new file with mode: 0644]
drivers/gpu/drm/radeon/r600_blit_kms.c [new file with mode: 0644]
drivers/gpu/drm/radeon/r600_blit_shaders.c [new file with mode: 0644]
drivers/gpu/drm/radeon/r600_blit_shaders.h [new file with mode: 0644]
drivers/gpu/drm/radeon/r600_cp.c
drivers/gpu/drm/radeon/r600_cs.c [new file with mode: 0644]
drivers/gpu/drm/radeon/r600d.h [new file with mode: 0644]
drivers/gpu/drm/radeon/radeon.h
drivers/gpu/drm/radeon/radeon_asic.h
drivers/gpu/drm/radeon/radeon_atombios.c
drivers/gpu/drm/radeon/radeon_clocks.c
drivers/gpu/drm/radeon/radeon_device.c
drivers/gpu/drm/radeon/radeon_drv.h
drivers/gpu/drm/radeon/radeon_fence.c
drivers/gpu/drm/radeon/radeon_reg.h
drivers/gpu/drm/radeon/radeon_ring.c
drivers/gpu/drm/radeon/radeon_share.h
drivers/gpu/drm/radeon/radeon_state.c
drivers/gpu/drm/radeon/radeon_ttm.c
drivers/gpu/drm/radeon/rs400.c
drivers/gpu/drm/radeon/rs780.c [deleted file]
drivers/gpu/drm/radeon/rv515.c
drivers/gpu/drm/radeon/rv515d.h [moved from drivers/gpu/drm/radeon/rv515r.h with 78% similarity]
drivers/gpu/drm/radeon/rv770.c
drivers/gpu/drm/radeon/rv770d.h [new file with mode: 0644]

index c5db0c4..14c3fe6 100644 (file)
@@ -46,8 +46,9 @@ radeon-$(CONFIG_DRM_RADEON_KMS) += radeon_device.o radeon_kms.o \
        radeon_encoders.o radeon_display.o radeon_cursor.o radeon_i2c.o \
        radeon_clocks.o radeon_fb.o radeon_gem.o radeon_ring.o radeon_irq_kms.o \
        radeon_cs.o radeon_bios.o radeon_benchmark.o r100.o r300.o r420.o \
-       rs400.o rs600.o rs690.o rv515.o r520.o r600.o rs780.o rv770.o \
-       radeon_test.o r200.o radeon_legacy_tv.o
+       rs400.o rs600.o rs690.o rv515.o r520.o r600.o rv770.o radeon_test.o \
+       r200.o radeon_legacy_tv.o r600_cs.o r600_blit.o r600_blit_shaders.o \
+       r600_blit_kms.o
 
 radeon-$(CONFIG_COMPAT) += radeon_ioc32.o
 
index 8e31e99..a7edd0f 100644 (file)
@@ -389,6 +389,7 @@ void atombios_crtc_set_pll(struct drm_crtc *crtc, struct drm_display_mode *mode)
                                        pll_flags |= RADEON_PLL_USE_REF_DIV;
                        }
                        radeon_encoder = to_radeon_encoder(encoder);
+                       break;
                }
        }
 
diff --git a/drivers/gpu/drm/radeon/avivod.h b/drivers/gpu/drm/radeon/avivod.h
new file mode 100644 (file)
index 0000000..d4e6e6e
--- /dev/null
@@ -0,0 +1,60 @@
+/*
+ * Copyright 2009 Advanced Micro Devices, Inc.
+ * Copyright 2009 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Dave Airlie
+ *          Alex Deucher
+ *          Jerome Glisse
+ */
+#ifndef AVIVOD_H
+#define AVIVOD_H
+
+
+#define        D1CRTC_CONTROL                                  0x6080
+#define                CRTC_EN                                         (1 << 0)
+#define        D1CRTC_UPDATE_LOCK                              0x60E8
+#define        D1GRPH_PRIMARY_SURFACE_ADDRESS                  0x6110
+#define        D1GRPH_SECONDARY_SURFACE_ADDRESS                0x6118
+
+#define        D2CRTC_CONTROL                                  0x6880
+#define        D2CRTC_UPDATE_LOCK                              0x68E8
+#define        D2GRPH_PRIMARY_SURFACE_ADDRESS                  0x6910
+#define        D2GRPH_SECONDARY_SURFACE_ADDRESS                0x6918
+
+#define        D1VGA_CONTROL                                   0x0330
+#define                DVGA_CONTROL_MODE_ENABLE                        (1 << 0)
+#define                DVGA_CONTROL_TIMING_SELECT                      (1 << 8)
+#define                DVGA_CONTROL_SYNC_POLARITY_SELECT               (1 << 9)
+#define                DVGA_CONTROL_OVERSCAN_TIMING_SELECT             (1 << 10)
+#define                DVGA_CONTROL_OVERSCAN_COLOR_EN                  (1 << 16)
+#define                DVGA_CONTROL_ROTATE                             (1 << 24)
+#define D2VGA_CONTROL                                  0x0338
+
+#define        VGA_HDP_CONTROL                                 0x328
+#define                VGA_MEM_PAGE_SELECT_EN                          (1 << 0)
+#define                VGA_MEMORY_DISABLE                              (1 << 4)
+#define                VGA_RBBM_LOCK_DISABLE                           (1 << 8)
+#define                VGA_SOFT_RESET                                  (1 << 16)
+#define        VGA_MEMORY_BASE_ADDRESS                         0x0310
+#define        VGA_RENDER_CONTROL                              0x0300
+#define                VGA_VSTATUS_CNTL_MASK                           0x00030000
+
+#endif
index ee3ab62..5708c07 100644 (file)
@@ -31,6 +31,8 @@
 #include "radeon_drm.h"
 #include "radeon_reg.h"
 #include "radeon.h"
+#include "r100d.h"
+
 #include <linux/firmware.h>
 #include <linux/platform_device.h>
 
@@ -391,9 +393,9 @@ int r100_wb_init(struct radeon_device *rdev)
                        return r;
                }
        }
-       WREG32(0x774, rdev->wb.gpu_addr);
-       WREG32(0x70C, rdev->wb.gpu_addr + 1024);
-       WREG32(0x770, 0xff);
+       WREG32(RADEON_SCRATCH_ADDR, rdev->wb.gpu_addr);
+       WREG32(RADEON_CP_RB_RPTR_ADDR, rdev->wb.gpu_addr + 1024);
+       WREG32(RADEON_SCRATCH_UMSK, 0xff);
        return 0;
 }
 
@@ -559,18 +561,18 @@ static int r100_cp_init_microcode(struct radeon_device *rdev)
                fw_name = FIRMWARE_R520;
        }
 
-       err = request_firmware(&rdev->fw, fw_name, &pdev->dev);
+       err = request_firmware(&rdev->me_fw, fw_name, &pdev->dev);
        platform_device_unregister(pdev);
        if (err) {
                printk(KERN_ERR "radeon_cp: Failed to load firmware \"%s\"\n",
                       fw_name);
-       } else if (rdev->fw->size % 8) {
+       } else if (rdev->me_fw->size % 8) {
                printk(KERN_ERR
                       "radeon_cp: Bogus length %zu in firmware \"%s\"\n",
-                      rdev->fw->size, fw_name);
+                      rdev->me_fw->size, fw_name);
                err = -EINVAL;
-               release_firmware(rdev->fw);
-               rdev->fw = NULL;
+               release_firmware(rdev->me_fw);
+               rdev->me_fw = NULL;
        }
        return err;
 }
@@ -584,9 +586,9 @@ static void r100_cp_load_microcode(struct radeon_device *rdev)
                       "programming pipes. Bad things might happen.\n");
        }
 
-       if (rdev->fw) {
-               size = rdev->fw->size / 4;
-               fw_data = (const __be32 *)&rdev->fw->data[0];
+       if (rdev->me_fw) {
+               size = rdev->me_fw->size / 4;
+               fw_data = (const __be32 *)&rdev->me_fw->data[0];
                WREG32(RADEON_CP_ME_RAM_ADDR, 0);
                for (i = 0; i < size; i += 2) {
                        WREG32(RADEON_CP_ME_RAM_DATAH,
@@ -632,7 +634,7 @@ int r100_cp_init(struct radeon_device *rdev, unsigned ring_size)
                DRM_INFO("radeon: cp idle (0x%08X)\n", tmp);
        }
 
-       if (!rdev->fw) {
+       if (!rdev->me_fw) {
                r = r100_cp_init_microcode(rdev);
                if (r) {
                        DRM_ERROR("Failed to load firmware!\n");
@@ -765,6 +767,12 @@ int r100_cp_reset(struct radeon_device *rdev)
        return -1;
 }
 
+void r100_cp_commit(struct radeon_device *rdev)
+{
+       WREG32(RADEON_CP_RB_WPTR, rdev->cp.wptr);
+       (void)RREG32(RADEON_CP_RB_WPTR);
+}
+
 
 /*
  * CS functions
@@ -2954,3 +2962,106 @@ void r100_cs_track_clear(struct radeon_device *rdev, struct r100_cs_track *track
                        }
        }
 }
+
+int r100_ring_test(struct radeon_device *rdev)
+{
+       uint32_t scratch;
+       uint32_t tmp = 0;
+       unsigned i;
+       int r;
+
+       r = radeon_scratch_get(rdev, &scratch);
+       if (r) {
+               DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
+               return r;
+       }
+       WREG32(scratch, 0xCAFEDEAD);
+       r = radeon_ring_lock(rdev, 2);
+       if (r) {
+               DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
+               radeon_scratch_free(rdev, scratch);
+               return r;
+       }
+       radeon_ring_write(rdev, PACKET0(scratch, 0));
+       radeon_ring_write(rdev, 0xDEADBEEF);
+       radeon_ring_unlock_commit(rdev);
+       for (i = 0; i < rdev->usec_timeout; i++) {
+               tmp = RREG32(scratch);
+               if (tmp == 0xDEADBEEF) {
+                       break;
+               }
+               DRM_UDELAY(1);
+       }
+       if (i < rdev->usec_timeout) {
+               DRM_INFO("ring test succeeded in %d usecs\n", i);
+       } else {
+               DRM_ERROR("radeon: ring test failed (sracth(0x%04X)=0x%08X)\n",
+                         scratch, tmp);
+               r = -EINVAL;
+       }
+       radeon_scratch_free(rdev, scratch);
+       return r;
+}
+
+void r100_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
+{
+       radeon_ring_write(rdev, PACKET0(RADEON_CP_IB_BASE, 1));
+       radeon_ring_write(rdev, ib->gpu_addr);
+       radeon_ring_write(rdev, ib->length_dw);
+}
+
+int r100_ib_test(struct radeon_device *rdev)
+{
+       struct radeon_ib *ib;
+       uint32_t scratch;
+       uint32_t tmp = 0;
+       unsigned i;
+       int r;
+
+       r = radeon_scratch_get(rdev, &scratch);
+       if (r) {
+               DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
+               return r;
+       }
+       WREG32(scratch, 0xCAFEDEAD);
+       r = radeon_ib_get(rdev, &ib);
+       if (r) {
+               return r;
+       }
+       ib->ptr[0] = PACKET0(scratch, 0);
+       ib->ptr[1] = 0xDEADBEEF;
+       ib->ptr[2] = PACKET2(0);
+       ib->ptr[3] = PACKET2(0);
+       ib->ptr[4] = PACKET2(0);
+       ib->ptr[5] = PACKET2(0);
+       ib->ptr[6] = PACKET2(0);
+       ib->ptr[7] = PACKET2(0);
+       ib->length_dw = 8;
+       r = radeon_ib_schedule(rdev, ib);
+       if (r) {
+               radeon_scratch_free(rdev, scratch);
+               radeon_ib_free(rdev, &ib);
+               return r;
+       }
+       r = radeon_fence_wait(ib->fence, false);
+       if (r) {
+               return r;
+       }
+       for (i = 0; i < rdev->usec_timeout; i++) {
+               tmp = RREG32(scratch);
+               if (tmp == 0xDEADBEEF) {
+                       break;
+               }
+               DRM_UDELAY(1);
+       }
+       if (i < rdev->usec_timeout) {
+               DRM_INFO("ib test succeeded in %u usecs\n", i);
+       } else {
+               DRM_ERROR("radeon: ib test failed (sracth(0x%04X)=0x%08X)\n",
+                         scratch, tmp);
+               r = -EINVAL;
+       }
+       radeon_scratch_free(rdev, scratch);
+       radeon_ib_free(rdev, &ib);
+       return r;
+}
diff --git a/drivers/gpu/drm/radeon/r100d.h b/drivers/gpu/drm/radeon/r100d.h
new file mode 100644 (file)
index 0000000..6da7d92
--- /dev/null
@@ -0,0 +1,76 @@
+/*
+ * Copyright 2008 Advanced Micro Devices, Inc.
+ * Copyright 2008 Red Hat Inc.
+ * Copyright 2009 Jerome Glisse.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Dave Airlie
+ *          Alex Deucher
+ *          Jerome Glisse
+ */
+#ifndef __R100D_H__
+#define __R100D_H__
+
+#define CP_PACKET0                     0x00000000
+#define                PACKET0_BASE_INDEX_SHIFT        0
+#define                PACKET0_BASE_INDEX_MASK         (0x1ffff << 0)
+#define                PACKET0_COUNT_SHIFT             16
+#define                PACKET0_COUNT_MASK              (0x3fff << 16)
+#define CP_PACKET1                     0x40000000
+#define CP_PACKET2                     0x80000000
+#define                PACKET2_PAD_SHIFT               0
+#define                PACKET2_PAD_MASK                (0x3fffffff << 0)
+#define CP_PACKET3                     0xC0000000
+#define                PACKET3_IT_OPCODE_SHIFT         8
+#define                PACKET3_IT_OPCODE_MASK          (0xff << 8)
+#define                PACKET3_COUNT_SHIFT             16
+#define                PACKET3_COUNT_MASK              (0x3fff << 16)
+/* PACKET3 op code */
+#define                PACKET3_NOP                     0x10
+#define                PACKET3_3D_DRAW_VBUF            0x28
+#define                PACKET3_3D_DRAW_IMMD            0x29
+#define                PACKET3_3D_DRAW_INDX            0x2A
+#define                PACKET3_3D_LOAD_VBPNTR          0x2F
+#define                PACKET3_INDX_BUFFER             0x33
+#define                PACKET3_3D_DRAW_VBUF_2          0x34
+#define                PACKET3_3D_DRAW_IMMD_2          0x35
+#define                PACKET3_3D_DRAW_INDX_2          0x36
+#define                PACKET3_BITBLT_MULTI            0x9B
+
+#define PACKET0(reg, n)        (CP_PACKET0 |                                   \
+                        REG_SET(PACKET0_BASE_INDEX, (reg) >> 2) |      \
+                        REG_SET(PACKET0_COUNT, (n)))
+#define PACKET2(v)     (CP_PACKET2 | REG_SET(PACKET2_PAD, (v)))
+#define PACKET3(op, n) (CP_PACKET3 |                                   \
+                        REG_SET(PACKET3_IT_OPCODE, (op)) |             \
+                        REG_SET(PACKET3_COUNT, (n)))
+
+#define        PACKET_TYPE0    0
+#define        PACKET_TYPE1    1
+#define        PACKET_TYPE2    2
+#define        PACKET_TYPE3    3
+
+#define CP_PACKET_GET_TYPE(h) (((h) >> 30) & 3)
+#define CP_PACKET_GET_COUNT(h) (((h) >> 16) & 0x3FFF)
+#define CP_PACKET0_GET_REG(h) (((h) & 0x1FFF) << 2)
+#define CP_PACKET0_GET_ONE_REG_WR(h) (((h) >> 15) & 1)
+#define CP_PACKET3_GET_OPCODE(h) (((h) >> 8) & 0xFF)
+
+#endif
index 33a2c55..a5f82f7 100644 (file)
@@ -33,6 +33,7 @@
 #include "radeon_drm.h"
 #include "radeon_share.h"
 #include "r100_track.h"
+#include "r300d.h"
 
 #include "r300_reg_safe.h"
 
@@ -127,7 +128,7 @@ int rv370_pcie_gart_enable(struct radeon_device *rdev)
        WREG32_PCIE(RADEON_PCIE_TX_GART_CNTL, tmp);
        rv370_pcie_gart_tlb_flush(rdev);
        DRM_INFO("PCIE GART of %uM enabled (table at 0x%08X).\n",
-                rdev->mc.gtt_size >> 20, table_addr);
+                (unsigned)(rdev->mc.gtt_size >> 20), table_addr);
        rdev->gart.ready = true;
        return 0;
 }
diff --git a/drivers/gpu/drm/radeon/r300.h b/drivers/gpu/drm/radeon/r300.h
deleted file mode 100644 (file)
index 8486b4d..0000000
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * Copyright 2008 Advanced Micro Devices, Inc.
- * Copyright 2008 Red Hat Inc.
- * Copyright 2009 Jerome Glisse.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: Dave Airlie
- *          Alex Deucher
- *          Jerome Glisse
- */
-#ifndef R300_H
-#define R300_H
-
-struct r300_asic {
-       const unsigned  *reg_safe_bm;
-       unsigned        reg_safe_bm_size;
-};
-
-#endif
diff --git a/drivers/gpu/drm/radeon/r300d.h b/drivers/gpu/drm/radeon/r300d.h
new file mode 100644 (file)
index 0000000..63ec076
--- /dev/null
@@ -0,0 +1,76 @@
+/*
+ * Copyright 2008 Advanced Micro Devices, Inc.
+ * Copyright 2008 Red Hat Inc.
+ * Copyright 2009 Jerome Glisse.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Dave Airlie
+ *          Alex Deucher
+ *          Jerome Glisse
+ */
+#ifndef __R300D_H__
+#define __R300D_H__
+
+#define CP_PACKET0                     0x00000000
+#define                PACKET0_BASE_INDEX_SHIFT        0
+#define                PACKET0_BASE_INDEX_MASK         (0x1ffff << 0)
+#define                PACKET0_COUNT_SHIFT             16
+#define                PACKET0_COUNT_MASK              (0x3fff << 16)
+#define CP_PACKET1                     0x40000000
+#define CP_PACKET2                     0x80000000
+#define                PACKET2_PAD_SHIFT               0
+#define                PACKET2_PAD_MASK                (0x3fffffff << 0)
+#define CP_PACKET3                     0xC0000000
+#define                PACKET3_IT_OPCODE_SHIFT         8
+#define                PACKET3_IT_OPCODE_MASK          (0xff << 8)
+#define                PACKET3_COUNT_SHIFT             16
+#define                PACKET3_COUNT_MASK              (0x3fff << 16)
+/* PACKET3 op code */
+#define                PACKET3_NOP                     0x10
+#define                PACKET3_3D_DRAW_VBUF            0x28
+#define                PACKET3_3D_DRAW_IMMD            0x29
+#define                PACKET3_3D_DRAW_INDX            0x2A
+#define                PACKET3_3D_LOAD_VBPNTR          0x2F
+#define                PACKET3_INDX_BUFFER             0x33
+#define                PACKET3_3D_DRAW_VBUF_2          0x34
+#define                PACKET3_3D_DRAW_IMMD_2          0x35
+#define                PACKET3_3D_DRAW_INDX_2          0x36
+#define                PACKET3_BITBLT_MULTI            0x9B
+
+#define PACKET0(reg, n)        (CP_PACKET0 |                                   \
+                        REG_SET(PACKET0_BASE_INDEX, (reg) >> 2) |      \
+                        REG_SET(PACKET0_COUNT, (n)))
+#define PACKET2(v)     (CP_PACKET2 | REG_SET(PACKET2_PAD, (v)))
+#define PACKET3(op, n) (CP_PACKET3 |                                   \
+                        REG_SET(PACKET3_IT_OPCODE, (op)) |             \
+                        REG_SET(PACKET3_COUNT, (n)))
+
+#define        PACKET_TYPE0    0
+#define        PACKET_TYPE1    1
+#define        PACKET_TYPE2    2
+#define        PACKET_TYPE3    3
+
+#define CP_PACKET_GET_TYPE(h) (((h) >> 30) & 3)
+#define CP_PACKET_GET_COUNT(h) (((h) >> 16) & 0x3FFF)
+#define CP_PACKET0_GET_REG(h) (((h) & 0x1FFF) << 2)
+#define CP_PACKET0_GET_ONE_REG_WR(h) (((h) >> 15) & 1)
+#define CP_PACKET3_GET_OPCODE(h) (((h) >> 8) & 0xFF)
+
+#endif
index 538cd90..d8fcef4 100644 (file)
  *          Alex Deucher
  *          Jerome Glisse
  */
+#include <linux/seq_file.h>
+#include <linux/firmware.h>
+#include <linux/platform_device.h>
 #include "drmP.h"
-#include "radeon_reg.h"
+#include "radeon_drm.h"
 #include "radeon.h"
+#include "radeon_mode.h"
+#include "radeon_share.h"
+#include "r600d.h"
+#include "avivod.h"
+#include "atom.h"
 
-/* r600,rv610,rv630,rv620,rv635,rv670 depends on : */
-void rs600_mc_disable_clients(struct radeon_device *rdev);
+#define PFP_UCODE_SIZE 576
+#define PM4_UCODE_SIZE 1792
+#define R700_PFP_UCODE_SIZE 848
+#define R700_PM4_UCODE_SIZE 1360
+
+/* Firmware Names */
+MODULE_FIRMWARE("radeon/R600_pfp.bin");
+MODULE_FIRMWARE("radeon/R600_me.bin");
+MODULE_FIRMWARE("radeon/RV610_pfp.bin");
+MODULE_FIRMWARE("radeon/RV610_me.bin");
+MODULE_FIRMWARE("radeon/RV630_pfp.bin");
+MODULE_FIRMWARE("radeon/RV630_me.bin");
+MODULE_FIRMWARE("radeon/RV620_pfp.bin");
+MODULE_FIRMWARE("radeon/RV620_me.bin");
+MODULE_FIRMWARE("radeon/RV635_pfp.bin");
+MODULE_FIRMWARE("radeon/RV635_me.bin");
+MODULE_FIRMWARE("radeon/RV670_pfp.bin");
+MODULE_FIRMWARE("radeon/RV670_me.bin");
+MODULE_FIRMWARE("radeon/RS780_pfp.bin");
+MODULE_FIRMWARE("radeon/RS780_me.bin");
+MODULE_FIRMWARE("radeon/RV770_pfp.bin");
+MODULE_FIRMWARE("radeon/RV770_me.bin");
+MODULE_FIRMWARE("radeon/RV730_pfp.bin");
+MODULE_FIRMWARE("radeon/RV730_me.bin");
+MODULE_FIRMWARE("radeon/RV710_pfp.bin");
+MODULE_FIRMWARE("radeon/RV710_me.bin");
+
+int r600_debugfs_mc_info_init(struct radeon_device *rdev);
 
 /* This files gather functions specifics to:
  * r600,rv610,rv630,rv620,rv635,rv670
@@ -39,87 +73,270 @@ void rs600_mc_disable_clients(struct radeon_device *rdev);
  */
 int r600_mc_wait_for_idle(struct radeon_device *rdev);
 void r600_gpu_init(struct radeon_device *rdev);
+void r600_fini(struct radeon_device *rdev);
 
 
 /*
- * MC
+ * R600 PCIE GART
  */
-int r600_mc_init(struct radeon_device *rdev)
+int r600_gart_clear_page(struct radeon_device *rdev, int i)
 {
-       uint32_t tmp;
+       void __iomem *ptr = (void *)rdev->gart.table.vram.ptr;
+       u64 pte;
 
-       r600_gpu_init(rdev);
+       if (i < 0 || i > rdev->gart.num_gpu_pages)
+               return -EINVAL;
+       pte = 0;
+       writeq(pte, ((void __iomem *)ptr) + (i * 8));
+       return 0;
+}
 
-       /* setup the gart before changing location so we can ask to
-        * discard unmapped mc request
-        */
-       /* FIXME: disable out of gart access */
-       tmp = rdev->mc.gtt_location / 4096;
-       tmp = REG_SET(R600_LOGICAL_PAGE_NUMBER, tmp);
-       WREG32(R600_MC_VM_SYSTEM_APERTURE_LOW_ADDR, tmp);
-       tmp = (rdev->mc.gtt_location + rdev->mc.gtt_size) / 4096;
-       tmp = REG_SET(R600_LOGICAL_PAGE_NUMBER, tmp);
-       WREG32(R600_MC_VM_SYSTEM_APERTURE_HIGH_ADDR, tmp);
-
-       rs600_mc_disable_clients(rdev);
-       if (r600_mc_wait_for_idle(rdev)) {
-               printk(KERN_WARNING "Failed to wait MC idle while "
-                      "programming pipes. Bad things might happen.\n");
+void r600_pcie_gart_tlb_flush(struct radeon_device *rdev)
+{
+       unsigned i;
+       u32 tmp;
+
+       WREG32(VM_CONTEXT0_INVALIDATION_LOW_ADDR, rdev->mc.gtt_start >> 12);
+       WREG32(VM_CONTEXT0_INVALIDATION_HIGH_ADDR, (rdev->mc.gtt_end - 1) >> 12);
+       WREG32(VM_CONTEXT0_REQUEST_RESPONSE, REQUEST_TYPE(1));
+       for (i = 0; i < rdev->usec_timeout; i++) {
+               /* read MC_STATUS */
+               tmp = RREG32(VM_CONTEXT0_REQUEST_RESPONSE);
+               tmp = (tmp & RESPONSE_TYPE_MASK) >> RESPONSE_TYPE_SHIFT;
+               if (tmp == 2) {
+                       printk(KERN_WARNING "[drm] r600 flush TLB failed\n");
+                       return;
+               }
+               if (tmp) {
+                       return;
+               }
+               udelay(1);
+       }
+}
+
+int r600_pcie_gart_enable(struct radeon_device *rdev)
+{
+       u32 tmp;
+       int r, i;
+
+       /* Initialize common gart structure */
+       r = radeon_gart_init(rdev);
+       if (r) {
+               return r;
+       }
+       rdev->gart.table_size = rdev->gart.num_gpu_pages * 8;
+       r = radeon_gart_table_vram_alloc(rdev);
+       if (r) {
+               return r;
        }
+       for (i = 0; i < rdev->gart.num_gpu_pages; i++)
+               r600_gart_clear_page(rdev, i);
+       /* Setup L2 cache */
+       WREG32(VM_L2_CNTL, ENABLE_L2_CACHE | ENABLE_L2_FRAGMENT_PROCESSING |
+                               ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
+                               EFFECTIVE_L2_QUEUE_SIZE(7));
+       WREG32(VM_L2_CNTL2, 0);
+       WREG32(VM_L2_CNTL3, BANK_SELECT_0(0) | BANK_SELECT_1(1));
+       /* Setup TLB control */
+       tmp = ENABLE_L1_TLB | ENABLE_L1_FRAGMENT_PROCESSING |
+               SYSTEM_ACCESS_MODE_NOT_IN_SYS |
+               EFFECTIVE_L1_TLB_SIZE(5) | EFFECTIVE_L1_QUEUE_SIZE(5) |
+               ENABLE_WAIT_L2_QUERY;
+       WREG32(MC_VM_L1_TLB_MCB_RD_SYS_CNTL, tmp);
+       WREG32(MC_VM_L1_TLB_MCB_WR_SYS_CNTL, tmp);
+       WREG32(MC_VM_L1_TLB_MCB_RD_HDP_CNTL, tmp | ENABLE_L1_STRICT_ORDERING);
+       WREG32(MC_VM_L1_TLB_MCB_WR_HDP_CNTL, tmp);
+       WREG32(MC_VM_L1_TLB_MCD_RD_A_CNTL, tmp);
+       WREG32(MC_VM_L1_TLB_MCD_WR_A_CNTL, tmp);
+       WREG32(MC_VM_L1_TLB_MCD_RD_B_CNTL, tmp);
+       WREG32(MC_VM_L1_TLB_MCD_WR_B_CNTL, tmp);
+       WREG32(MC_VM_L1_TLB_MCB_RD_GFX_CNTL, tmp);
+       WREG32(MC_VM_L1_TLB_MCB_WR_GFX_CNTL, tmp);
+       WREG32(MC_VM_L1_TLB_MCB_RD_PDMA_CNTL, tmp);
+       WREG32(MC_VM_L1_TLB_MCB_WR_PDMA_CNTL, tmp);
+       WREG32(MC_VM_L1_TLB_MCB_RD_SEM_CNTL, tmp | ENABLE_SEMAPHORE_MODE);
+       WREG32(MC_VM_L1_TLB_MCB_WR_SEM_CNTL, tmp | ENABLE_SEMAPHORE_MODE);
+       WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
+       WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, (rdev->mc.gtt_end - 1) >> 12);
+       WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
+       WREG32(VM_CONTEXT0_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
+                               RANGE_PROTECTION_FAULT_ENABLE_DEFAULT);
+       WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
+                       (u32)(rdev->dummy_page.addr >> 12));
+       for (i = 1; i < 7; i++)
+               WREG32(VM_CONTEXT0_CNTL + (i * 4), 0);
 
-       tmp = rdev->mc.vram_location + rdev->mc.mc_vram_size - 1;
-       tmp = REG_SET(R600_MC_FB_TOP, tmp >> 24);
-       tmp |= REG_SET(R600_MC_FB_BASE, rdev->mc.vram_location >> 24);
-       WREG32(R600_MC_VM_FB_LOCATION, tmp);
-       tmp = rdev->mc.gtt_location + rdev->mc.gtt_size - 1;
-       tmp = REG_SET(R600_MC_AGP_TOP, tmp >> 22);
-       WREG32(R600_MC_VM_AGP_TOP, tmp);
-       tmp = REG_SET(R600_MC_AGP_BOT, rdev->mc.gtt_location >> 22);
-       WREG32(R600_MC_VM_AGP_BOT, tmp);
+       r600_pcie_gart_tlb_flush(rdev);
+       rdev->gart.ready = true;
        return 0;
 }
 
-void r600_mc_fini(struct radeon_device *rdev)
+void r600_pcie_gart_disable(struct radeon_device *rdev)
 {
-       /* FIXME: implement */
-}
+       u32 tmp;
+       int i;
 
+       /* Clear ptes*/
+       for (i = 0; i < rdev->gart.num_gpu_pages; i++)
+               r600_gart_clear_page(rdev, i);
+       r600_pcie_gart_tlb_flush(rdev);
+       /* Disable all tables */
+       for (i = 0; i < 7; i++)
+               WREG32(VM_CONTEXT0_CNTL + (i * 4), 0);
 
-/*
- * Global GPU functions
- */
-void r600_errata(struct radeon_device *rdev)
-{
-       rdev->pll_errata = 0;
+       /* Disable L2 cache */
+       WREG32(VM_L2_CNTL, ENABLE_L2_FRAGMENT_PROCESSING |
+                               EFFECTIVE_L2_QUEUE_SIZE(7));
+       WREG32(VM_L2_CNTL3, BANK_SELECT_0(0) | BANK_SELECT_1(1));
+       /* Setup L1 TLB control */
+       tmp = EFFECTIVE_L1_TLB_SIZE(5) | EFFECTIVE_L1_QUEUE_SIZE(5) |
+               ENABLE_WAIT_L2_QUERY;
+       WREG32(MC_VM_L1_TLB_MCD_RD_A_CNTL, tmp);
+       WREG32(MC_VM_L1_TLB_MCD_WR_A_CNTL, tmp);
+       WREG32(MC_VM_L1_TLB_MCD_RD_B_CNTL, tmp);
+       WREG32(MC_VM_L1_TLB_MCD_WR_B_CNTL, tmp);
+       WREG32(MC_VM_L1_TLB_MCB_RD_GFX_CNTL, tmp);
+       WREG32(MC_VM_L1_TLB_MCB_WR_GFX_CNTL, tmp);
+       WREG32(MC_VM_L1_TLB_MCB_RD_PDMA_CNTL, tmp);
+       WREG32(MC_VM_L1_TLB_MCB_WR_PDMA_CNTL, tmp);
+       WREG32(MC_VM_L1_TLB_MCB_RD_SEM_CNTL, tmp);
+       WREG32(MC_VM_L1_TLB_MCB_WR_SEM_CNTL, tmp);
+       WREG32(MC_VM_L1_TLB_MCB_RD_SYS_CNTL, tmp);
+       WREG32(MC_VM_L1_TLB_MCB_WR_SYS_CNTL, tmp);
+       WREG32(MC_VM_L1_TLB_MCB_RD_HDP_CNTL, tmp);
+       WREG32(MC_VM_L1_TLB_MCB_WR_HDP_CNTL, tmp);
 }
 
 int r600_mc_wait_for_idle(struct radeon_device *rdev)
 {
-       /* FIXME: implement */
-       return 0;
+       unsigned i;
+       u32 tmp;
+
+       for (i = 0; i < rdev->usec_timeout; i++) {
+               /* read MC_STATUS */
+               tmp = RREG32(R_000E50_SRBM_STATUS) & 0x3F00;
+               if (!tmp)
+                       return 0;
+               udelay(1);
+       }
+       return -1;
 }
 
-void r600_gpu_init(struct radeon_device *rdev)
+static void r600_mc_resume(struct radeon_device *rdev)
 {
-       /* FIXME: implement */
-}
+       u32 d1vga_control, d2vga_control;
+       u32 vga_render_control, vga_hdp_control;
+       u32 d1crtc_control, d2crtc_control;
+       u32 new_d1grph_primary, new_d1grph_secondary;
+       u32 new_d2grph_primary, new_d2grph_secondary;
+       u64 old_vram_start;
+       u32 tmp;
+       int i, j;
 
+       /* Initialize HDP */
+       for (i = 0, j = 0; i < 32; i++, j += 0x18) {
+               WREG32((0x2c14 + j), 0x00000000);
+               WREG32((0x2c18 + j), 0x00000000);
+               WREG32((0x2c1c + j), 0x00000000);
+               WREG32((0x2c20 + j), 0x00000000);
+               WREG32((0x2c24 + j), 0x00000000);
+       }
+       WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
 
-/*
- * VRAM info
- */
-void r600_vram_get_type(struct radeon_device *rdev)
+       d1vga_control = RREG32(D1VGA_CONTROL);
+       d2vga_control = RREG32(D2VGA_CONTROL);
+       vga_render_control = RREG32(VGA_RENDER_CONTROL);
+       vga_hdp_control = RREG32(VGA_HDP_CONTROL);
+       d1crtc_control = RREG32(D1CRTC_CONTROL);
+       d2crtc_control = RREG32(D2CRTC_CONTROL);
+       old_vram_start = (u64)(RREG32(MC_VM_FB_LOCATION) & 0xFFFF) << 24;
+       new_d1grph_primary = RREG32(D1GRPH_PRIMARY_SURFACE_ADDRESS);
+       new_d1grph_secondary = RREG32(D1GRPH_SECONDARY_SURFACE_ADDRESS);
+       new_d1grph_primary += rdev->mc.vram_start - old_vram_start;
+       new_d1grph_secondary += rdev->mc.vram_start - old_vram_start;
+       new_d2grph_primary = RREG32(D2GRPH_PRIMARY_SURFACE_ADDRESS);
+       new_d2grph_secondary = RREG32(D2GRPH_SECONDARY_SURFACE_ADDRESS);
+       new_d2grph_primary += rdev->mc.vram_start - old_vram_start;
+       new_d2grph_secondary += rdev->mc.vram_start - old_vram_start;
+
+       /* Stop all video */
+       WREG32(D1VGA_CONTROL, 0);
+       WREG32(D2VGA_CONTROL, 0);
+       WREG32(VGA_RENDER_CONTROL, 0);
+       WREG32(D1CRTC_UPDATE_LOCK, 1);
+       WREG32(D2CRTC_UPDATE_LOCK, 1);
+       WREG32(D1CRTC_CONTROL, 0);
+       WREG32(D2CRTC_CONTROL, 0);
+       WREG32(D1CRTC_UPDATE_LOCK, 0);
+       WREG32(D2CRTC_UPDATE_LOCK, 0);
+
+       mdelay(1);
+       if (r600_mc_wait_for_idle(rdev)) {
+               printk(KERN_WARNING "[drm] MC not idle !\n");
+       }
+
+       /* Lockout access through VGA aperture*/
+       WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
+
+       /* Update configuration */
+       WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR, rdev->mc.vram_start >> 12);
+       WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR, (rdev->mc.vram_end - 1) >> 12);
+       WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR, 0);
+       tmp = (((rdev->mc.vram_end - 1) >> 24) & 0xFFFF) << 16;
+       tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
+       WREG32(MC_VM_FB_LOCATION, tmp);
+       WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
+       WREG32(HDP_NONSURFACE_INFO, (2 << 7));
+       WREG32(HDP_NONSURFACE_SIZE, (rdev->mc.mc_vram_size - 1) | 0x3FF);
+       if (rdev->flags & RADEON_IS_AGP) {
+               WREG32(MC_VM_AGP_TOP, (rdev->mc.gtt_end - 1) >> 16);
+               WREG32(MC_VM_AGP_BOT, rdev->mc.gtt_start >> 16);
+               WREG32(MC_VM_AGP_BASE, rdev->mc.agp_base >> 22);
+       } else {
+               WREG32(MC_VM_AGP_BASE, 0);
+               WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
+               WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
+       }
+       WREG32(D1GRPH_PRIMARY_SURFACE_ADDRESS, new_d1grph_primary);
+       WREG32(D1GRPH_SECONDARY_SURFACE_ADDRESS, new_d1grph_secondary);
+       WREG32(D2GRPH_PRIMARY_SURFACE_ADDRESS, new_d2grph_primary);
+       WREG32(D2GRPH_SECONDARY_SURFACE_ADDRESS, new_d2grph_secondary);
+       WREG32(VGA_MEMORY_BASE_ADDRESS, rdev->mc.vram_start);
+
+       /* Unlock host access */
+       WREG32(VGA_HDP_CONTROL, vga_hdp_control);
+
+       mdelay(1);
+       if (r600_mc_wait_for_idle(rdev)) {
+               printk(KERN_WARNING "[drm] MC not idle !\n");
+       }
+
+       /* Restore video state */
+       WREG32(D1CRTC_UPDATE_LOCK, 1);
+       WREG32(D2CRTC_UPDATE_LOCK, 1);
+       WREG32(D1CRTC_CONTROL, d1crtc_control);
+       WREG32(D2CRTC_CONTROL, d2crtc_control);
+       WREG32(D1CRTC_UPDATE_LOCK, 0);
+       WREG32(D2CRTC_UPDATE_LOCK, 0);
+       WREG32(D1VGA_CONTROL, d1vga_control);
+       WREG32(D2VGA_CONTROL, d2vga_control);
+       WREG32(VGA_RENDER_CONTROL, vga_render_control);
+}
+
+int r600_mc_init(struct radeon_device *rdev)
 {
-       uint32_t tmp;
+       fixed20_12 a;
+       u32 tmp;
        int chansize;
+       int r;
 
+       /* Get VRAM informations */
        rdev->mc.vram_width = 128;
        rdev->mc.vram_is_ddr = true;
-
-       tmp = RREG32(R600_RAMCFG);
-       if (tmp & R600_CHANSIZE_OVERRIDE) {
+       tmp = RREG32(RAMCFG);
+       if (tmp & CHANSIZE_OVERRIDE) {
                chansize = 16;
-       } else if (tmp & R600_CHANSIZE) {
+       } else if (tmp & CHANSIZE_MASK) {
                chansize = 64;
        } else {
                chansize = 32;
@@ -135,36 +352,1391 @@ void r600_vram_get_type(struct radeon_device *rdev)
                        (rdev->family == CHIP_RV635)) {
                rdev->mc.vram_width = 2 * chansize;
        }
+       /* Could aper size report 0 ? */
+       rdev->mc.aper_base = drm_get_resource_start(rdev->ddev, 0);
+       rdev->mc.aper_size = drm_get_resource_len(rdev->ddev, 0);
+       /* Setup GPU memory space */
+       rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE);
+       rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE);
+       if (rdev->flags & RADEON_IS_AGP) {
+               r = radeon_agp_init(rdev);
+               if (r)
+                       return r;
+               /* gtt_size is setup by radeon_agp_init */
+               rdev->mc.gtt_location = rdev->mc.agp_base;
+               tmp = 0xFFFFFFFFUL - rdev->mc.agp_base - rdev->mc.gtt_size;
+               /* Try to put vram before or after AGP because we
+                * we want SYSTEM_APERTURE to cover both VRAM and
+                * AGP so that GPU can catch out of VRAM/AGP access
+                */
+               if (rdev->mc.gtt_location > rdev->mc.mc_vram_size) {
+                       /* Enought place before */
+                       rdev->mc.vram_location = rdev->mc.gtt_location -
+                                                       rdev->mc.mc_vram_size;
+               } else if (tmp > rdev->mc.mc_vram_size) {
+                       /* Enought place after */
+                       rdev->mc.vram_location = rdev->mc.gtt_location +
+                                                       rdev->mc.gtt_size;
+               } else {
+                       /* Try to setup VRAM then AGP might not
+                        * not work on some card
+                        */
+                       rdev->mc.vram_location = 0x00000000UL;
+                       rdev->mc.gtt_location = rdev->mc.mc_vram_size;
+               }
+       } else {
+               if (rdev->family == CHIP_RS780 || rdev->family == CHIP_RS880) {
+                       rdev->mc.vram_location = (RREG32(MC_VM_FB_LOCATION) &
+                                                               0xFFFF) << 24;
+                       rdev->mc.gtt_size = radeon_gart_size * 1024 * 1024;
+                       tmp = rdev->mc.vram_location + rdev->mc.mc_vram_size;
+                       if ((0xFFFFFFFFUL - tmp) >= rdev->mc.gtt_size) {
+                               /* Enough place after vram */
+                               rdev->mc.gtt_location = tmp;
+                       } else if (rdev->mc.vram_location >= rdev->mc.gtt_size) {
+                               /* Enough place before vram */
+                               rdev->mc.gtt_location = 0;
+                       } else {
+                               /* Not enough place after or before shrink
+                                * gart size
+                                */
+                               if (rdev->mc.vram_location > (0xFFFFFFFFUL - tmp)) {
+                                       rdev->mc.gtt_location = 0;
+                                       rdev->mc.gtt_size = rdev->mc.vram_location;
+                               } else {
+                                       rdev->mc.gtt_location = tmp;
+                                       rdev->mc.gtt_size = 0xFFFFFFFFUL - tmp;
+                               }
+                       }
+                       rdev->mc.gtt_location = rdev->mc.mc_vram_size;
+               } else {
+                       rdev->mc.vram_location = 0x00000000UL;
+                       rdev->mc.gtt_location = rdev->mc.mc_vram_size;
+                       rdev->mc.gtt_size = radeon_gart_size * 1024 * 1024;
+               }
+       }
+       rdev->mc.vram_start = rdev->mc.vram_location;
+       rdev->mc.vram_end = rdev->mc.vram_location + rdev->mc.mc_vram_size;
+       rdev->mc.gtt_start = rdev->mc.gtt_location;
+       rdev->mc.gtt_end = rdev->mc.gtt_location + rdev->mc.gtt_size;
+       /* FIXME: we should enforce default clock in case GPU is not in
+        * default setup
+        */
+       a.full = rfixed_const(100);
+       rdev->pm.sclk.full = rfixed_const(rdev->clock.default_sclk);
+       rdev->pm.sclk.full = rfixed_div(rdev->pm.sclk, a);
+       return 0;
 }
 
-void r600_vram_info(struct radeon_device *rdev)
+/* We doesn't check that the GPU really needs a reset we simply do the
+ * reset, it's up to the caller to determine if the GPU needs one. We
+ * might add an helper function to check that.
+ */
+int r600_gpu_soft_reset(struct radeon_device *rdev)
 {
-       r600_vram_get_type(rdev);
-       rdev->mc.real_vram_size = RREG32(R600_CONFIG_MEMSIZE);
-       rdev->mc.mc_vram_size = rdev->mc.real_vram_size;
+       u32 grbm_busy_mask = S_008010_VC_BUSY(1) | S_008010_VGT_BUSY_NO_DMA(1) |
+                               S_008010_VGT_BUSY(1) | S_008010_TA03_BUSY(1) |
+                               S_008010_TC_BUSY(1) | S_008010_SX_BUSY(1) |
+                               S_008010_SH_BUSY(1) | S_008010_SPI03_BUSY(1) |
+                               S_008010_SMX_BUSY(1) | S_008010_SC_BUSY(1) |
+                               S_008010_PA_BUSY(1) | S_008010_DB03_BUSY(1) |
+                               S_008010_CR_BUSY(1) | S_008010_CB03_BUSY(1) |
+                               S_008010_GUI_ACTIVE(1);
+       u32 grbm2_busy_mask = S_008014_SPI0_BUSY(1) | S_008014_SPI1_BUSY(1) |
+                       S_008014_SPI2_BUSY(1) | S_008014_SPI3_BUSY(1) |
+                       S_008014_TA0_BUSY(1) | S_008014_TA1_BUSY(1) |
+                       S_008014_TA2_BUSY(1) | S_008014_TA3_BUSY(1) |
+                       S_008014_DB0_BUSY(1) | S_008014_DB1_BUSY(1) |
+                       S_008014_DB2_BUSY(1) | S_008014_DB3_BUSY(1) |
+                       S_008014_CB0_BUSY(1) | S_008014_CB1_BUSY(1) |
+                       S_008014_CB2_BUSY(1) | S_008014_CB3_BUSY(1);
+       u32 srbm_reset = 0;
 
-       /* Could aper size report 0 ? */
-       rdev->mc.aper_base = drm_get_resource_start(rdev->ddev, 0);
-       rdev->mc.aper_size = drm_get_resource_len(rdev->ddev, 0);
+       /* Disable CP parsing/prefetching */
+       WREG32(R_0086D8_CP_ME_CNTL, S_0086D8_CP_ME_HALT(0xff));
+       /* Check if any of the rendering block is busy and reset it */
+       if ((RREG32(R_008010_GRBM_STATUS) & grbm_busy_mask) ||
+           (RREG32(R_008014_GRBM_STATUS2) & grbm2_busy_mask)) {
+               WREG32(R_008020_GRBM_SOFT_RESET, S_008020_SOFT_RESET_CR(1) |
+                       S_008020_SOFT_RESET_DB(1) |
+                       S_008020_SOFT_RESET_CB(1) |
+                       S_008020_SOFT_RESET_PA(1) |
+                       S_008020_SOFT_RESET_SC(1) |
+                       S_008020_SOFT_RESET_SMX(1) |
+                       S_008020_SOFT_RESET_SPI(1) |
+                       S_008020_SOFT_RESET_SX(1) |
+                       S_008020_SOFT_RESET_SH(1) |
+                       S_008020_SOFT_RESET_TC(1) |
+                       S_008020_SOFT_RESET_TA(1) |
+                       S_008020_SOFT_RESET_VC(1) |
+                       S_008020_SOFT_RESET_VGT(1));
+               (void)RREG32(R_008020_GRBM_SOFT_RESET);
+               udelay(50);
+               WREG32(R_008020_GRBM_SOFT_RESET, 0);
+               (void)RREG32(R_008020_GRBM_SOFT_RESET);
+       }
+       /* Reset CP (we always reset CP) */
+       WREG32(R_008020_GRBM_SOFT_RESET, S_008020_SOFT_RESET_CP(1));
+       (void)RREG32(R_008020_GRBM_SOFT_RESET);
+       udelay(50);
+       WREG32(R_008020_GRBM_SOFT_RESET, 0);
+       (void)RREG32(R_008020_GRBM_SOFT_RESET);
+       /* Reset others GPU block if necessary */
+       if (G_000E50_RLC_BUSY(RREG32(R_000E50_SRBM_STATUS)))
+               srbm_reset |= S_000E60_SOFT_RESET_RLC(1);
+       if (G_000E50_GRBM_RQ_PENDING(RREG32(R_000E50_SRBM_STATUS)))
+               srbm_reset |= S_000E60_SOFT_RESET_GRBM(1);
+       if (G_000E50_HI_RQ_PENDING(RREG32(R_000E50_SRBM_STATUS)))
+               srbm_reset |= S_000E60_SOFT_RESET_IH(1);
+       if (G_000E50_VMC_BUSY(RREG32(R_000E50_SRBM_STATUS)))
+               srbm_reset |= S_000E60_SOFT_RESET_VMC(1);
+       if (G_000E50_MCB_BUSY(RREG32(R_000E50_SRBM_STATUS)))
+               srbm_reset |= S_000E60_SOFT_RESET_MC(1);
+       if (G_000E50_MCDZ_BUSY(RREG32(R_000E50_SRBM_STATUS)))
+               srbm_reset |= S_000E60_SOFT_RESET_MC(1);
+       if (G_000E50_MCDY_BUSY(RREG32(R_000E50_SRBM_STATUS)))
+               srbm_reset |= S_000E60_SOFT_RESET_MC(1);
+       if (G_000E50_MCDX_BUSY(RREG32(R_000E50_SRBM_STATUS)))
+               srbm_reset |= S_000E60_SOFT_RESET_MC(1);
+       if (G_000E50_MCDW_BUSY(RREG32(R_000E50_SRBM_STATUS)))
+               srbm_reset |= S_000E60_SOFT_RESET_MC(1);
+       if (G_000E50_RLC_BUSY(RREG32(R_000E50_SRBM_STATUS)))
+               srbm_reset |= S_000E60_SOFT_RESET_RLC(1);
+       if (G_000E50_SEM_BUSY(RREG32(R_000E50_SRBM_STATUS)))
+               srbm_reset |= S_000E60_SOFT_RESET_SEM(1);
+       WREG32(R_000E60_SRBM_SOFT_RESET, srbm_reset);
+       (void)RREG32(R_000E60_SRBM_SOFT_RESET);
+       udelay(50);
+       WREG32(R_000E60_SRBM_SOFT_RESET, 0);
+       (void)RREG32(R_000E60_SRBM_SOFT_RESET);
+       /* Wait a little for things to settle down */
+       udelay(50);
+       return 0;
+}
+
+int r600_gpu_reset(struct radeon_device *rdev)
+{
+       return r600_gpu_soft_reset(rdev);
+}
+
+static u32 r600_get_tile_pipe_to_backend_map(u32 num_tile_pipes,
+                                            u32 num_backends,
+                                            u32 backend_disable_mask)
+{
+       u32 backend_map = 0;
+       u32 enabled_backends_mask;
+       u32 enabled_backends_count;
+       u32 cur_pipe;
+       u32 swizzle_pipe[R6XX_MAX_PIPES];
+       u32 cur_backend;
+       u32 i;
+
+       if (num_tile_pipes > R6XX_MAX_PIPES)
+               num_tile_pipes = R6XX_MAX_PIPES;
+       if (num_tile_pipes < 1)
+               num_tile_pipes = 1;
+       if (num_backends > R6XX_MAX_BACKENDS)
+               num_backends = R6XX_MAX_BACKENDS;
+       if (num_backends < 1)
+               num_backends = 1;
+
+       enabled_backends_mask = 0;
+       enabled_backends_count = 0;
+       for (i = 0; i < R6XX_MAX_BACKENDS; ++i) {
+               if (((backend_disable_mask >> i) & 1) == 0) {
+                       enabled_backends_mask |= (1 << i);
+                       ++enabled_backends_count;
+               }
+               if (enabled_backends_count == num_backends)
+                       break;
+       }
+
+       if (enabled_backends_count == 0) {
+               enabled_backends_mask = 1;
+               enabled_backends_count = 1;
+       }
+
+       if (enabled_backends_count != num_backends)
+               num_backends = enabled_backends_count;
+
+       memset((uint8_t *)&swizzle_pipe[0], 0, sizeof(u32) * R6XX_MAX_PIPES);
+       switch (num_tile_pipes) {
+       case 1:
+               swizzle_pipe[0] = 0;
+               break;
+       case 2:
+               swizzle_pipe[0] = 0;
+               swizzle_pipe[1] = 1;
+               break;
+       case 3:
+               swizzle_pipe[0] = 0;
+               swizzle_pipe[1] = 1;
+               swizzle_pipe[2] = 2;
+               break;
+       case 4:
+               swizzle_pipe[0] = 0;
+               swizzle_pipe[1] = 1;
+               swizzle_pipe[2] = 2;
+               swizzle_pipe[3] = 3;
+               break;
+       case 5:
+               swizzle_pipe[0] = 0;
+               swizzle_pipe[1] = 1;
+               swizzle_pipe[2] = 2;
+               swizzle_pipe[3] = 3;
+               swizzle_pipe[4] = 4;
+               break;
+       case 6:
+               swizzle_pipe[0] = 0;
+               swizzle_pipe[1] = 2;
+               swizzle_pipe[2] = 4;
+               swizzle_pipe[3] = 5;
+               swizzle_pipe[4] = 1;
+               swizzle_pipe[5] = 3;
+               break;
+       case 7:
+               swizzle_pipe[0] = 0;
+               swizzle_pipe[1] = 2;
+               swizzle_pipe[2] = 4;
+               swizzle_pipe[3] = 6;
+               swizzle_pipe[4] = 1;
+               swizzle_pipe[5] = 3;
+               swizzle_pipe[6] = 5;
+               break;
+       case 8:
+               swizzle_pipe[0] = 0;
+               swizzle_pipe[1] = 2;
+               swizzle_pipe[2] = 4;
+               swizzle_pipe[3] = 6;
+               swizzle_pipe[4] = 1;
+               swizzle_pipe[5] = 3;
+               swizzle_pipe[6] = 5;
+               swizzle_pipe[7] = 7;
+               break;
+       }
+
+       cur_backend = 0;
+       for (cur_pipe = 0; cur_pipe < num_tile_pipes; ++cur_pipe) {
+               while (((1 << cur_backend) & enabled_backends_mask) == 0)
+                       cur_backend = (cur_backend + 1) % R6XX_MAX_BACKENDS;
+
+               backend_map |= (u32)(((cur_backend & 3) << (swizzle_pipe[cur_pipe] * 2)));
+
+               cur_backend = (cur_backend + 1) % R6XX_MAX_BACKENDS;
+       }
+
+       return backend_map;
+}
+
+int r600_count_pipe_bits(uint32_t val)
+{
+       int i, ret = 0;
+
+       for (i = 0; i < 32; i++) {
+               ret += val & 1;
+               val >>= 1;
+       }
+       return ret;
 }
 
+void r600_gpu_init(struct radeon_device *rdev)
+{
+       u32 tiling_config;
+       u32 ramcfg;
+       u32 tmp;
+       int i, j;
+       u32 sq_config;
+       u32 sq_gpr_resource_mgmt_1 = 0;
+       u32 sq_gpr_resource_mgmt_2 = 0;
+       u32 sq_thread_resource_mgmt = 0;
+       u32 sq_stack_resource_mgmt_1 = 0;
+       u32 sq_stack_resource_mgmt_2 = 0;
+
+       /* FIXME: implement */
+       switch (rdev->family) {
+       case CHIP_R600:
+               rdev->config.r600.max_pipes = 4;
+               rdev->config.r600.max_tile_pipes = 8;
+               rdev->config.r600.max_simds = 4;
+               rdev->config.r600.max_backends = 4;
+               rdev->config.r600.max_gprs = 256;
+               rdev->config.r600.max_threads = 192;
+               rdev->config.r600.max_stack_entries = 256;
+               rdev->config.r600.max_hw_contexts = 8;
+               rdev->config.r600.max_gs_threads = 16;
+               rdev->config.r600.sx_max_export_size = 128;
+               rdev->config.r600.sx_max_export_pos_size = 16;
+               rdev->config.r600.sx_max_export_smx_size = 128;
+               rdev->config.r600.sq_num_cf_insts = 2;
+               break;
+       case CHIP_RV630:
+       case CHIP_RV635:
+               rdev->config.r600.max_pipes = 2;
+               rdev->config.r600.max_tile_pipes = 2;
+               rdev->config.r600.max_simds = 3;
+               rdev->config.r600.max_backends = 1;
+               rdev->config.r600.max_gprs = 128;
+               rdev->config.r600.max_threads = 192;
+               rdev->config.r600.max_stack_entries = 128;
+               rdev->config.r600.max_hw_contexts = 8;
+               rdev->config.r600.max_gs_threads = 4;
+               rdev->config.r600.sx_max_export_size = 128;
+               rdev->config.r600.sx_max_export_pos_size = 16;
+               rdev->config.r600.sx_max_export_smx_size = 128;
+               rdev->config.r600.sq_num_cf_insts = 2;
+               break;
+       case CHIP_RV610:
+       case CHIP_RV620:
+       case CHIP_RS780:
+       case CHIP_RS880:
+               rdev->config.r600.max_pipes = 1;
+               rdev->config.r600.max_tile_pipes = 1;
+               rdev->config.r600.max_simds = 2;
+               rdev->config.r600.max_backends = 1;
+               rdev->config.r600.max_gprs = 128;
+               rdev->config.r600.max_threads = 192;
+               rdev->config.r600.max_stack_entries = 128;
+               rdev->config.r600.max_hw_contexts = 4;
+               rdev->config.r600.max_gs_threads = 4;
+               rdev->config.r600.sx_max_export_size = 128;
+               rdev->config.r600.sx_max_export_pos_size = 16;
+               rdev->config.r600.sx_max_export_smx_size = 128;
+               rdev->config.r600.sq_num_cf_insts = 1;
+               break;
+       case CHIP_RV670:
+               rdev->config.r600.max_pipes = 4;
+               rdev->config.r600.max_tile_pipes = 4;
+               rdev->config.r600.max_simds = 4;
+               rdev->config.r600.max_backends = 4;
+               rdev->config.r600.max_gprs = 192;
+               rdev->config.r600.max_threads = 192;
+               rdev->config.r600.max_stack_entries = 256;
+               rdev->config.r600.max_hw_contexts = 8;
+               rdev->config.r600.max_gs_threads = 16;
+               rdev->config.r600.sx_max_export_size = 128;
+               rdev->config.r600.sx_max_export_pos_size = 16;
+               rdev->config.r600.sx_max_export_smx_size = 128;
+               rdev->config.r600.sq_num_cf_insts = 2;
+               break;
+       default:
+               break;
+       }
+
+       /* Initialize HDP */
+       for (i = 0, j = 0; i < 32; i++, j += 0x18) {
+               WREG32((0x2c14 + j), 0x00000000);
+               WREG32((0x2c18 + j), 0x00000000);
+               WREG32((0x2c1c + j), 0x00000000);
+               WREG32((0x2c20 + j), 0x00000000);
+               WREG32((0x2c24 + j), 0x00000000);
+       }
+
+       WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
+
+       /* Setup tiling */
+       tiling_config = 0;
+       ramcfg = RREG32(RAMCFG);
+       switch (rdev->config.r600.max_tile_pipes) {
+       case 1:
+               tiling_config |= PIPE_TILING(0);
+               break;
+       case 2:
+               tiling_config |= PIPE_TILING(1);
+               break;
+       case 4:
+               tiling_config |= PIPE_TILING(2);
+               break;
+       case 8:
+               tiling_config |= PIPE_TILING(3);
+               break;
+       default:
+               break;
+       }
+       tiling_config |= BANK_TILING((ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT);
+       tiling_config |= GROUP_SIZE(0);
+       tmp = (ramcfg & NOOFROWS_MASK) >> NOOFROWS_SHIFT;
+       if (tmp > 3) {
+               tiling_config |= ROW_TILING(3);
+               tiling_config |= SAMPLE_SPLIT(3);
+       } else {
+               tiling_config |= ROW_TILING(tmp);
+               tiling_config |= SAMPLE_SPLIT(tmp);
+       }
+       tiling_config |= BANK_SWAPS(1);
+       tmp = r600_get_tile_pipe_to_backend_map(rdev->config.r600.max_tile_pipes,
+                                               rdev->config.r600.max_backends,
+                                               (0xff << rdev->config.r600.max_backends) & 0xff);
+       tiling_config |= BACKEND_MAP(tmp);
+       WREG32(GB_TILING_CONFIG, tiling_config);
+       WREG32(DCP_TILING_CONFIG, tiling_config & 0xffff);
+       WREG32(HDP_TILING_CONFIG, tiling_config & 0xffff);
+
+       tmp = BACKEND_DISABLE((R6XX_MAX_BACKENDS_MASK << rdev->config.r600.max_backends) & R6XX_MAX_BACKENDS_MASK);
+       WREG32(CC_RB_BACKEND_DISABLE, tmp);
+
+       /* Setup pipes */
+       tmp = INACTIVE_QD_PIPES((R6XX_MAX_PIPES_MASK << rdev->config.r600.max_pipes) & R6XX_MAX_PIPES_MASK);
+       tmp |= INACTIVE_SIMDS((R6XX_MAX_SIMDS_MASK << rdev->config.r600.max_simds) & R6XX_MAX_SIMDS_MASK);
+       WREG32(CC_GC_SHADER_PIPE_CONFIG, tmp);
+       WREG32(GC_USER_SHADER_PIPE_CONFIG, tmp);
+
+       tmp = R6XX_MAX_BACKENDS - r600_count_pipe_bits(tmp & INACTIVE_QD_PIPES_MASK);
+       WREG32(VGT_OUT_DEALLOC_CNTL, (tmp * 4) & DEALLOC_DIST_MASK);
+       WREG32(VGT_VERTEX_REUSE_BLOCK_CNTL, ((tmp * 4) - 2) & VTX_REUSE_DEPTH_MASK);
+
+       /* Setup some CP states */
+       WREG32(CP_QUEUE_THRESHOLDS, (ROQ_IB1_START(0x16) | ROQ_IB2_START(0x2b)));
+       WREG32(CP_MEQ_THRESHOLDS, (MEQ_END(0x40) | ROQ_END(0x40)));
+
+       WREG32(TA_CNTL_AUX, (DISABLE_CUBE_ANISO | SYNC_GRADIENT |
+                            SYNC_WALKER | SYNC_ALIGNER));
+       /* Setup various GPU states */
+       if (rdev->family == CHIP_RV670)
+               WREG32(ARB_GDEC_RD_CNTL, 0x00000021);
+
+       tmp = RREG32(SX_DEBUG_1);
+       tmp |= SMX_EVENT_RELEASE;
+       if ((rdev->family > CHIP_R600))
+               tmp |= ENABLE_NEW_SMX_ADDRESS;
+       WREG32(SX_DEBUG_1, tmp);
+
+       if (((rdev->family) == CHIP_R600) ||
+           ((rdev->family) == CHIP_RV630) ||
+           ((rdev->family) == CHIP_RV610) ||
+           ((rdev->family) == CHIP_RV620) ||
+           ((rdev->family) == CHIP_RS780)) {
+               WREG32(DB_DEBUG, PREZ_MUST_WAIT_FOR_POSTZ_DONE);
+       } else {
+               WREG32(DB_DEBUG, 0);
+       }
+       WREG32(DB_WATERMARKS, (DEPTH_FREE(4) | DEPTH_CACHELINE_FREE(16) |
+                              DEPTH_FLUSH(16) | DEPTH_PENDING_FREE(4)));
+
+       WREG32(PA_SC_MULTI_CHIP_CNTL, 0);
+       WREG32(VGT_NUM_INSTANCES, 0);
+
+       WREG32(SPI_CONFIG_CNTL, GPR_WRITE_PRIORITY(0));
+       WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(0));
+
+       tmp = RREG32(SQ_MS_FIFO_SIZES);
+       if (((rdev->family) == CHIP_RV610) ||
+           ((rdev->family) == CHIP_RV620) ||
+           ((rdev->family) == CHIP_RS780)) {
+               tmp = (CACHE_FIFO_SIZE(0xa) |
+                      FETCH_FIFO_HIWATER(0xa) |
+                      DONE_FIFO_HIWATER(0xe0) |
+                      ALU_UPDATE_FIFO_HIWATER(0x8));
+       } else if (((rdev->family) == CHIP_R600) ||
+                  ((rdev->family) == CHIP_RV630)) {
+               tmp &= ~DONE_FIFO_HIWATER(0xff);
+               tmp |= DONE_FIFO_HIWATER(0x4);
+       }
+       WREG32(SQ_MS_FIFO_SIZES, tmp);
+
+       /* SQ_CONFIG, SQ_GPR_RESOURCE_MGMT, SQ_THREAD_RESOURCE_MGMT, SQ_STACK_RESOURCE_MGMT
+        * should be adjusted as needed by the 2D/3D drivers.  This just sets default values
+        */
+       sq_config = RREG32(SQ_CONFIG);
+       sq_config &= ~(PS_PRIO(3) |
+                      VS_PRIO(3) |
+                      GS_PRIO(3) |
+                      ES_PRIO(3));
+       sq_config |= (DX9_CONSTS |
+                     VC_ENABLE |
+                     PS_PRIO(0) |
+                     VS_PRIO(1) |
+                     GS_PRIO(2) |
+                     ES_PRIO(3));
+
+       if ((rdev->family) == CHIP_R600) {
+               sq_gpr_resource_mgmt_1 = (NUM_PS_GPRS(124) |
+                                         NUM_VS_GPRS(124) |
+                                         NUM_CLAUSE_TEMP_GPRS(4));
+               sq_gpr_resource_mgmt_2 = (NUM_GS_GPRS(0) |
+                                         NUM_ES_GPRS(0));
+               sq_thread_resource_mgmt = (NUM_PS_THREADS(136) |
+                                          NUM_VS_THREADS(48) |
+                                          NUM_GS_THREADS(4) |
+                                          NUM_ES_THREADS(4));
+               sq_stack_resource_mgmt_1 = (NUM_PS_STACK_ENTRIES(128) |
+                                           NUM_VS_STACK_ENTRIES(128));
+               sq_stack_resource_mgmt_2 = (NUM_GS_STACK_ENTRIES(0) |
+                                           NUM_ES_STACK_ENTRIES(0));
+       } else if (((rdev->family) == CHIP_RV610) ||
+                  ((rdev->family) == CHIP_RV620) ||
+                  ((rdev->family) == CHIP_RS780)) {
+               /* no vertex cache */
+               sq_config &= ~VC_ENABLE;
+
+               sq_gpr_resource_mgmt_1 = (NUM_PS_GPRS(44) |
+                                         NUM_VS_GPRS(44) |
+                                         NUM_CLAUSE_TEMP_GPRS(2));
+               sq_gpr_resource_mgmt_2 = (NUM_GS_GPRS(17) |
+                                         NUM_ES_GPRS(17));
+               sq_thread_resource_mgmt = (NUM_PS_THREADS(79) |
+                                          NUM_VS_THREADS(78) |
+                                          NUM_GS_THREADS(4) |
+                                          NUM_ES_THREADS(31));
+               sq_stack_resource_mgmt_1 = (NUM_PS_STACK_ENTRIES(40) |
+                                           NUM_VS_STACK_ENTRIES(40));
+               sq_stack_resource_mgmt_2 = (NUM_GS_STACK_ENTRIES(32) |
+                                           NUM_ES_STACK_ENTRIES(16));
+       } else if (((rdev->family) == CHIP_RV630) ||
+                  ((rdev->family) == CHIP_RV635)) {
+               sq_gpr_resource_mgmt_1 = (NUM_PS_GPRS(44) |
+                                         NUM_VS_GPRS(44) |
+                                         NUM_CLAUSE_TEMP_GPRS(2));
+               sq_gpr_resource_mgmt_2 = (NUM_GS_GPRS(18) |
+                                         NUM_ES_GPRS(18));
+               sq_thread_resource_mgmt = (NUM_PS_THREADS(79) |
+                                          NUM_VS_THREADS(78) |
+                                          NUM_GS_THREADS(4) |
+                                          NUM_ES_THREADS(31));
+               sq_stack_resource_mgmt_1 = (NUM_PS_STACK_ENTRIES(40) |
+                                           NUM_VS_STACK_ENTRIES(40));
+               sq_stack_resource_mgmt_2 = (NUM_GS_STACK_ENTRIES(32) |
+                                           NUM_ES_STACK_ENTRIES(16));
+       } else if ((rdev->family) == CHIP_RV670) {
+               sq_gpr_resource_mgmt_1 = (NUM_PS_GPRS(44) |
+                                         NUM_VS_GPRS(44) |
+                                         NUM_CLAUSE_TEMP_GPRS(2));
+               sq_gpr_resource_mgmt_2 = (NUM_GS_GPRS(17) |
+                                         NUM_ES_GPRS(17));
+               sq_thread_resource_mgmt = (NUM_PS_THREADS(79) |
+                                          NUM_VS_THREADS(78) |
+                                          NUM_GS_THREADS(4) |
+                                          NUM_ES_THREADS(31));
+               sq_stack_resource_mgmt_1 = (NUM_PS_STACK_ENTRIES(64) |
+                                           NUM_VS_STACK_ENTRIES(64));
+               sq_stack_resource_mgmt_2 = (NUM_GS_STACK_ENTRIES(64) |
+                                           NUM_ES_STACK_ENTRIES(64));
+       }
+
+       WREG32(SQ_CONFIG, sq_config);
+       WREG32(SQ_GPR_RESOURCE_MGMT_1,  sq_gpr_resource_mgmt_1);
+       WREG32(SQ_GPR_RESOURCE_MGMT_2,  sq_gpr_resource_mgmt_2);
+       WREG32(SQ_THREAD_RESOURCE_MGMT, sq_thread_resource_mgmt);
+       WREG32(SQ_STACK_RESOURCE_MGMT_1, sq_stack_resource_mgmt_1);
+       WREG32(SQ_STACK_RESOURCE_MGMT_2, sq_stack_resource_mgmt_2);
+
+       if (((rdev->family) == CHIP_RV610) ||
+           ((rdev->family) == CHIP_RV620) ||
+           ((rdev->family) == CHIP_RS780)) {
+               WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(TC_ONLY));
+       } else {
+               WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC));
+       }
+
+       /* More default values. 2D/3D driver should adjust as needed */
+       WREG32(PA_SC_AA_SAMPLE_LOCS_2S, (S0_X(0xc) | S0_Y(0x4) |
+                                        S1_X(0x4) | S1_Y(0xc)));
+       WREG32(PA_SC_AA_SAMPLE_LOCS_4S, (S0_X(0xe) | S0_Y(0xe) |
+                                        S1_X(0x2) | S1_Y(0x2) |
+                                        S2_X(0xa) | S2_Y(0x6) |
+                                        S3_X(0x6) | S3_Y(0xa)));
+       WREG32(PA_SC_AA_SAMPLE_LOCS_8S_WD0, (S0_X(0xe) | S0_Y(0xb) |
+                                            S1_X(0x4) | S1_Y(0xc) |
+                                            S2_X(0x1) | S2_Y(0x6) |
+                                            S3_X(0xa) | S3_Y(0xe)));
+       WREG32(PA_SC_AA_SAMPLE_LOCS_8S_WD1, (S4_X(0x6) | S4_Y(0x1) |
+                                            S5_X(0x0) | S5_Y(0x0) |
+                                            S6_X(0xb) | S6_Y(0x4) |
+                                            S7_X(0x7) | S7_Y(0x8)));
+
+       WREG32(VGT_STRMOUT_EN, 0);
+       tmp = rdev->config.r600.max_pipes * 16;
+       switch (rdev->family) {
+       case CHIP_RV610:
+       case CHIP_RS780:
+       case CHIP_RV620:
+               tmp += 32;
+               break;
+       case CHIP_RV670:
+               tmp += 128;
+               break;
+       default:
+               break;
+       }
+       if (tmp > 256) {
+               tmp = 256;
+       }
+       WREG32(VGT_ES_PER_GS, 128);
+       WREG32(VGT_GS_PER_ES, tmp);
+       WREG32(VGT_GS_PER_VS, 2);
+       WREG32(VGT_GS_VERTEX_REUSE, 16);
+
+       /* more default values. 2D/3D driver should adjust as needed */
+       WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
+       WREG32(VGT_STRMOUT_EN, 0);
+       WREG32(SX_MISC, 0);
+       WREG32(PA_SC_MODE_CNTL, 0);
+       WREG32(PA_SC_AA_CONFIG, 0);
+       WREG32(PA_SC_LINE_STIPPLE, 0);
+       WREG32(SPI_INPUT_Z, 0);
+       WREG32(SPI_PS_IN_CONTROL_0, NUM_INTERP(2));
+       WREG32(CB_COLOR7_FRAG, 0);
+
+       /* Clear render buffer base addresses */
+       WREG32(CB_COLOR0_BASE, 0);
+       WREG32(CB_COLOR1_BASE, 0);
+       WREG32(CB_COLOR2_BASE, 0);
+       WREG32(CB_COLOR3_BASE, 0);
+       WREG32(CB_COLOR4_BASE, 0);
+       WREG32(CB_COLOR5_BASE, 0);
+       WREG32(CB_COLOR6_BASE, 0);
+       WREG32(CB_COLOR7_BASE, 0);
+       WREG32(CB_COLOR7_FRAG, 0);
+
+       switch (rdev->family) {
+       case CHIP_RV610:
+       case CHIP_RS780:
+       case CHIP_RV620:
+               tmp = TC_L2_SIZE(8);
+               break;
+       case CHIP_RV630:
+       case CHIP_RV635:
+               tmp = TC_L2_SIZE(4);
+               break;
+       case CHIP_R600:
+               tmp = TC_L2_SIZE(0) | L2_DISABLE_LATE_HIT;
+               break;
+       default:
+               tmp = TC_L2_SIZE(0);
+               break;
+       }
+       WREG32(TC_CNTL, tmp);
+
+       tmp = RREG32(HDP_HOST_PATH_CNTL);
+       WREG32(HDP_HOST_PATH_CNTL, tmp);
+
+       tmp = RREG32(ARB_POP);
+       tmp |= ENABLE_TC128;
+       WREG32(ARB_POP, tmp);
+
+       WREG32(PA_SC_MULTI_CHIP_CNTL, 0);
+       WREG32(PA_CL_ENHANCE, (CLIP_VTX_REORDER_ENA |
+                              NUM_CLIP_SEQ(3)));
+       WREG32(PA_SC_ENHANCE, FORCE_EOV_MAX_CLK_CNT(4095));
+}
+
+
 /*
  * Indirect registers accessor
  */
-uint32_t r600_pciep_rreg(struct radeon_device *rdev, uint32_t reg)
+u32 r600_pciep_rreg(struct radeon_device *rdev, u32 reg)
+{
+       u32 r;
+
+       WREG32(PCIE_PORT_INDEX, ((reg) & 0xff));
+       (void)RREG32(PCIE_PORT_INDEX);
+       r = RREG32(PCIE_PORT_DATA);
+       return r;
+}
+
+void r600_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
+{
+       WREG32(PCIE_PORT_INDEX, ((reg) & 0xff));
+       (void)RREG32(PCIE_PORT_INDEX);
+       WREG32(PCIE_PORT_DATA, (v));
+       (void)RREG32(PCIE_PORT_DATA);
+}
+
+
+/*
+ * CP & Ring
+ */
+void r600_cp_stop(struct radeon_device *rdev)
+{
+       WREG32(R_0086D8_CP_ME_CNTL, S_0086D8_CP_ME_HALT(1));
+}
+
+int r600_cp_init_microcode(struct radeon_device *rdev)
+{
+       struct platform_device *pdev;
+       const char *chip_name;
+       size_t pfp_req_size, me_req_size;
+       char fw_name[30];
+       int err;
+
+       DRM_DEBUG("\n");
+
+       pdev = platform_device_register_simple("radeon_cp", 0, NULL, 0);
+       err = IS_ERR(pdev);
+       if (err) {
+               printk(KERN_ERR "radeon_cp: Failed to register firmware\n");
+               return -EINVAL;
+       }
+
+       switch (rdev->family) {
+       case CHIP_R600: chip_name = "R600"; break;
+       case CHIP_RV610: chip_name = "RV610"; break;
+       case CHIP_RV630: chip_name = "RV630"; break;
+       case CHIP_RV620: chip_name = "RV620"; break;
+       case CHIP_RV635: chip_name = "RV635"; break;
+       case CHIP_RV670: chip_name = "RV670"; break;
+       case CHIP_RS780:
+       case CHIP_RS880: chip_name = "RS780"; break;
+       case CHIP_RV770: chip_name = "RV770"; break;
+       case CHIP_RV730:
+       case CHIP_RV740: chip_name = "RV730"; break;
+       case CHIP_RV710: chip_name = "RV710"; break;
+       default: BUG();
+       }
+
+       if (rdev->family >= CHIP_RV770) {
+               pfp_req_size = R700_PFP_UCODE_SIZE * 4;
+               me_req_size = R700_PM4_UCODE_SIZE * 4;
+       } else {
+               pfp_req_size = PFP_UCODE_SIZE * 4;
+               me_req_size = PM4_UCODE_SIZE * 12;
+       }
+
+       DRM_INFO("Loading %s CP Microcode\n", chip_name);
+
+       snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
+       err = request_firmware(&rdev->pfp_fw, fw_name, &pdev->dev);
+       if (err)
+               goto out;
+       if (rdev->pfp_fw->size != pfp_req_size) {
+               printk(KERN_ERR
+                      "r600_cp: Bogus length %zu in firmware \"%s\"\n",
+                      rdev->pfp_fw->size, fw_name);
+               err = -EINVAL;
+               goto out;
+       }
+
+       snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
+       err = request_firmware(&rdev->me_fw, fw_name, &pdev->dev);
+       if (err)
+               goto out;
+       if (rdev->me_fw->size != me_req_size) {
+               printk(KERN_ERR
+                      "r600_cp: Bogus length %zu in firmware \"%s\"\n",
+                      rdev->me_fw->size, fw_name);
+               err = -EINVAL;
+       }
+out:
+       platform_device_unregister(pdev);
+
+       if (err) {
+               if (err != -EINVAL)
+                       printk(KERN_ERR
+                              "r600_cp: Failed to load firmware \"%s\"\n",
+                              fw_name);
+               release_firmware(rdev->pfp_fw);
+               rdev->pfp_fw = NULL;
+               release_firmware(rdev->me_fw);
+               rdev->me_fw = NULL;
+       }
+       return err;
+}
+
+static int r600_cp_load_microcode(struct radeon_device *rdev)
+{
+       const __be32 *fw_data;
+       int i;
+
+       if (!rdev->me_fw || !rdev->pfp_fw)
+               return -EINVAL;
+
+       r600_cp_stop(rdev);
+
+       WREG32(CP_RB_CNTL, RB_NO_UPDATE | RB_BLKSZ(15) | RB_BUFSZ(3));
+
+       /* Reset cp */
+       WREG32(GRBM_SOFT_RESET, SOFT_RESET_CP);
+       RREG32(GRBM_SOFT_RESET);
+       mdelay(15);
+       WREG32(GRBM_SOFT_RESET, 0);
+
+       WREG32(CP_ME_RAM_WADDR, 0);
+
+       fw_data = (const __be32 *)rdev->me_fw->data;
+       WREG32(CP_ME_RAM_WADDR, 0);
+       for (i = 0; i < PM4_UCODE_SIZE * 3; i++)
+               WREG32(CP_ME_RAM_DATA,
+                      be32_to_cpup(fw_data++));
+
+       fw_data = (const __be32 *)rdev->pfp_fw->data;
+       WREG32(CP_PFP_UCODE_ADDR, 0);
+       for (i = 0; i < PFP_UCODE_SIZE; i++)
+               WREG32(CP_PFP_UCODE_DATA,
+                      be32_to_cpup(fw_data++));
+
+       WREG32(CP_PFP_UCODE_ADDR, 0);
+       WREG32(CP_ME_RAM_WADDR, 0);
+       WREG32(CP_ME_RAM_RADDR, 0);
+       return 0;
+}
+
+int r600_cp_start(struct radeon_device *rdev)
+{
+       int r;
+       uint32_t cp_me;
+
+       r = radeon_ring_lock(rdev, 7);
+       if (r) {
+               DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
+               return r;
+       }
+       radeon_ring_write(rdev, PACKET3(PACKET3_ME_INITIALIZE, 5));
+       radeon_ring_write(rdev, 0x1);
+       if (rdev->family < CHIP_RV770) {
+               radeon_ring_write(rdev, 0x3);
+               radeon_ring_write(rdev, rdev->config.r600.max_hw_contexts - 1);
+       } else {
+               radeon_ring_write(rdev, 0x0);
+               radeon_ring_write(rdev, rdev->config.rv770.max_hw_contexts - 1);
+       }
+       radeon_ring_write(rdev, PACKET3_ME_INITIALIZE_DEVICE_ID(1));
+       radeon_ring_write(rdev, 0);
+       radeon_ring_write(rdev, 0);
+       radeon_ring_unlock_commit(rdev);
+
+       cp_me = 0xff;
+       WREG32(R_0086D8_CP_ME_CNTL, cp_me);
+       return 0;
+}
+
+int r600_cp_resume(struct radeon_device *rdev)
+{
+       u32 tmp;
+       u32 rb_bufsz;
+       int r;
+
+       /* Reset cp */
+       WREG32(GRBM_SOFT_RESET, SOFT_RESET_CP);
+       RREG32(GRBM_SOFT_RESET);
+       mdelay(15);
+       WREG32(GRBM_SOFT_RESET, 0);
+
+       /* Set ring buffer size */
+       rb_bufsz = drm_order(rdev->cp.ring_size / 8);
+#ifdef __BIG_ENDIAN
+       WREG32(CP_RB_CNTL, BUF_SWAP_32BIT | RB_NO_UPDATE |
+               (drm_order(4096/8) << 8) | rb_bufsz);
+#else
+       WREG32(CP_RB_CNTL, RB_NO_UPDATE | (drm_order(4096/8) << 8) | rb_bufsz);
+#endif
+       WREG32(CP_SEM_WAIT_TIMER, 0x4);
+
+       /* Set the write pointer delay */
+       WREG32(CP_RB_WPTR_DELAY, 0);
+
+       /* Initialize the ring buffer's read and write pointers */
+       tmp = RREG32(CP_RB_CNTL);
+       WREG32(CP_RB_CNTL, tmp | RB_RPTR_WR_ENA);
+       WREG32(CP_RB_RPTR_WR, 0);
+       WREG32(CP_RB_WPTR, 0);
+       WREG32(CP_RB_RPTR_ADDR, rdev->cp.gpu_addr & 0xFFFFFFFF);
+       WREG32(CP_RB_RPTR_ADDR_HI, upper_32_bits(rdev->cp.gpu_addr));
+       mdelay(1);
+       WREG32(CP_RB_CNTL, tmp);
+
+       WREG32(CP_RB_BASE, rdev->cp.gpu_addr >> 8);
+       WREG32(CP_DEBUG, (1 << 27) | (1 << 28));
+
+       rdev->cp.rptr = RREG32(CP_RB_RPTR);
+       rdev->cp.wptr = RREG32(CP_RB_WPTR);
+
+       r600_cp_start(rdev);
+       rdev->cp.ready = true;
+       r = radeon_ring_test(rdev);
+       if (r) {
+               rdev->cp.ready = false;
+               return r;
+       }
+       return 0;
+}
+
+void r600_cp_commit(struct radeon_device *rdev)
+{
+       WREG32(CP_RB_WPTR, rdev->cp.wptr);
+       (void)RREG32(CP_RB_WPTR);
+}
+
+void r600_ring_init(struct radeon_device *rdev, unsigned ring_size)
+{
+       u32 rb_bufsz;
+
+       /* Align ring size */
+       rb_bufsz = drm_order(ring_size / 8);
+       ring_size = (1 << (rb_bufsz + 1)) * 4;
+       rdev->cp.ring_size = ring_size;
+       rdev->cp.align_mask = 16 - 1;
+}
+
+
+/*
+ * GPU scratch registers helpers function.
+ */
+void r600_scratch_init(struct radeon_device *rdev)
+{
+       int i;
+
+       rdev->scratch.num_reg = 7;
+       for (i = 0; i < rdev->scratch.num_reg; i++) {
+               rdev->scratch.free[i] = true;
+               rdev->scratch.reg[i] = SCRATCH_REG0 + (i * 4);
+       }
+}
+
+int r600_ring_test(struct radeon_device *rdev)
+{
+       uint32_t scratch;
+       uint32_t tmp = 0;
+       unsigned i;
+       int r;
+
+       r = radeon_scratch_get(rdev, &scratch);
+       if (r) {
+               DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
+               return r;
+       }
+       WREG32(scratch, 0xCAFEDEAD);
+       r = radeon_ring_lock(rdev, 3);
+       if (r) {
+               DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
+               radeon_scratch_free(rdev, scratch);
+               return r;
+       }
+       radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 1));
+       radeon_ring_write(rdev, ((scratch - PACKET3_SET_CONFIG_REG_OFFSET) >> 2));
+       radeon_ring_write(rdev, 0xDEADBEEF);
+       radeon_ring_unlock_commit(rdev);
+       for (i = 0; i < rdev->usec_timeout; i++) {
+               tmp = RREG32(scratch);
+               if (tmp == 0xDEADBEEF)
+                       break;
+               DRM_UDELAY(1);
+       }
+       if (i < rdev->usec_timeout) {
+               DRM_INFO("ring test succeeded in %d usecs\n", i);
+       } else {
+               DRM_ERROR("radeon: ring test failed (scratch(0x%04X)=0x%08X)\n",
+                         scratch, tmp);
+               r = -EINVAL;
+       }
+       radeon_scratch_free(rdev, scratch);
+       return r;
+}
+
+/*
+ * Writeback
+ */
+int r600_wb_init(struct radeon_device *rdev)
+{
+       int r;
+
+       if (rdev->wb.wb_obj == NULL) {
+               r = radeon_object_create(rdev, NULL, 4096,
+                                        true,
+                                        RADEON_GEM_DOMAIN_GTT,
+                                        false, &rdev->wb.wb_obj);
+               if (r) {
+                       DRM_ERROR("radeon: failed to create WB buffer (%d).\n", r);
+                       return r;
+               }
+               r = radeon_object_pin(rdev->wb.wb_obj,
+                                     RADEON_GEM_DOMAIN_GTT,
+                                     &rdev->wb.gpu_addr);
+               if (r) {
+                       DRM_ERROR("radeon: failed to pin WB buffer (%d).\n", r);
+                       return r;
+               }
+               r = radeon_object_kmap(rdev->wb.wb_obj, (void **)&rdev->wb.wb);
+               if (r) {
+                       DRM_ERROR("radeon: failed to map WB buffer (%d).\n", r);
+                       return r;
+               }
+       }
+       WREG32(SCRATCH_ADDR, (rdev->wb.gpu_addr >> 8) & 0xFFFFFFFF);
+       WREG32(CP_RB_RPTR_ADDR, (rdev->wb.gpu_addr + 1024) & 0xFFFFFFFC);
+       WREG32(CP_RB_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + 1024) & 0xFF);
+       WREG32(SCRATCH_UMSK, 0xff);
+       return 0;
+}
+
+void r600_wb_fini(struct radeon_device *rdev)
+{
+       if (rdev->wb.wb_obj) {
+               radeon_object_kunmap(rdev->wb.wb_obj);
+               radeon_object_unpin(rdev->wb.wb_obj);
+               radeon_object_unref(&rdev->wb.wb_obj);
+               rdev->wb.wb = NULL;
+               rdev->wb.wb_obj = NULL;
+       }
+}
+
+
+/*
+ * CS
+ */
+void r600_fence_ring_emit(struct radeon_device *rdev,
+                         struct radeon_fence *fence)
+{
+       /* Emit fence sequence & fire IRQ */
+       radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 1));
+       radeon_ring_write(rdev, ((rdev->fence_drv.scratch_reg - PACKET3_SET_CONFIG_REG_OFFSET) >> 2));
+       radeon_ring_write(rdev, fence->seq);
+}
+
+int r600_copy_dma(struct radeon_device *rdev,
+                 uint64_t src_offset,
+                 uint64_t dst_offset,
+                 unsigned num_pages,
+                 struct radeon_fence *fence)
+{
+       /* FIXME: implement */
+       return 0;
+}
+
+int r600_copy_blit(struct radeon_device *rdev,
+                  uint64_t src_offset, uint64_t dst_offset,
+                  unsigned num_pages, struct radeon_fence *fence)
+{
+       r600_blit_prepare_copy(rdev, num_pages * 4096);
+       r600_kms_blit_copy(rdev, src_offset, dst_offset, num_pages * 4096);
+       r600_blit_done_copy(rdev, fence);
+       return 0;
+}
+
+int r600_irq_process(struct radeon_device *rdev)
+{
+       /* FIXME: implement */
+       return 0;
+}
+
+int r600_irq_set(struct radeon_device *rdev)
+{
+       /* FIXME: implement */
+       return 0;
+}
+
+int r600_set_surface_reg(struct radeon_device *rdev, int reg,
+                        uint32_t tiling_flags, uint32_t pitch,
+                        uint32_t offset, uint32_t obj_size)
+{
+       /* FIXME: implement */
+       return 0;
+}
+
+void r600_clear_surface_reg(struct radeon_device *rdev, int reg)
+{
+       /* FIXME: implement */
+}
+
+
+bool r600_card_posted(struct radeon_device *rdev)
+{
+       uint32_t reg;
+
+       /* first check CRTCs */
+       reg = RREG32(D1CRTC_CONTROL) |
+               RREG32(D2CRTC_CONTROL);
+       if (reg & CRTC_EN)
+               return true;
+
+       /* then check MEM_SIZE, in case the crtcs are off */
+       if (RREG32(CONFIG_MEMSIZE))
+               return true;
+
+       return false;
+}
+
+int r600_resume(struct radeon_device *rdev)
+{
+       int r;
+
+       r600_gpu_reset(rdev);
+       r600_mc_resume(rdev);
+       r = r600_pcie_gart_enable(rdev);
+       if (r)
+               return r;
+       r600_gpu_init(rdev);
+       r = radeon_ring_init(rdev, rdev->cp.ring_size);
+       if (r)
+               return r;
+       r = r600_cp_load_microcode(rdev);
+       if (r)
+               return r;
+       r = r600_cp_resume(rdev);
+       if (r)
+               return r;
+       r = r600_wb_init(rdev);
+       if (r)
+               return r;
+       return 0;
+}
+
+int r600_suspend(struct radeon_device *rdev)
+{
+       /* FIXME: we should wait for ring to be empty */
+       r600_cp_stop(rdev);
+       return 0;
+}
+
+/* Plan is to move initialization in that function and use
+ * helper function so that radeon_device_init pretty much
+ * do nothing more than calling asic specific function. This
+ * should also allow to remove a bunch of callback function
+ * like vram_info.
+ */
+int r600_init(struct radeon_device *rdev)
 {
-       uint32_t r;
+       int r;
 
-       WREG32(R600_PCIE_PORT_INDEX, ((reg) & 0xff));
-       (void)RREG32(R600_PCIE_PORT_INDEX);
-       r = RREG32(R600_PCIE_PORT_DATA);
+       rdev->new_init_path = true;
+       r = radeon_dummy_page_init(rdev);
+       if (r)
+               return r;
+       if (r600_debugfs_mc_info_init(rdev)) {
+               DRM_ERROR("Failed to register debugfs file for mc !\n");
+       }
+       /* This don't do much */
+       r = radeon_gem_init(rdev);
+       if (r)
+               return r;
+       /* Read BIOS */
+       if (!radeon_get_bios(rdev)) {
+               if (ASIC_IS_AVIVO(rdev))
+                       return -EINVAL;
+       }
+       /* Must be an ATOMBIOS */
+       if (!rdev->is_atom_bios)
+               return -EINVAL;
+       r = radeon_atombios_init(rdev);
+       if (r)
+               return r;
+       /* Post card if necessary */
+       if (!r600_card_posted(rdev) && rdev->bios) {
+               DRM_INFO("GPU not posted. posting now...\n");
+               atom_asic_init(rdev->mode_info.atom_context);
+       }
+       /* Initialize scratch registers */
+       r600_scratch_init(rdev);
+       /* Initialize surface registers */
+       radeon_surface_init(rdev);
+       r = radeon_clocks_init(rdev);
+       if (r)
+               return r;
+       /* Fence driver */
+       r = radeon_fence_driver_init(rdev);
+       if (r)
+               return r;
+       r = r600_mc_init(rdev);
+       if (r) {
+               if (rdev->flags & RADEON_IS_AGP) {
+                       /* Retry with disabling AGP */
+                       r600_fini(rdev);
+                       rdev->flags &= ~RADEON_IS_AGP;
+                       return r600_init(rdev);
+               }
+               return r;
+       }
+       /* Memory manager */
+       r = radeon_object_init(rdev);
+       if (r)
+               return r;
+       rdev->cp.ring_obj = NULL;
+       r600_ring_init(rdev, 1024 * 1024);
+
+       if (!rdev->me_fw || !rdev->pfp_fw) {
+               r = r600_cp_init_microcode(rdev);
+               if (r) {
+                       DRM_ERROR("Failed to load firmware!\n");
+                       return r;
+               }
+       }
+
+       r = r600_resume(rdev);
+       if (r) {
+               if (rdev->flags & RADEON_IS_AGP) {
+                       /* Retry with disabling AGP */
+                       r600_fini(rdev);
+                       rdev->flags &= ~RADEON_IS_AGP;
+                       return r600_init(rdev);
+               }
+               return r;
+       }
+       r = radeon_ib_pool_init(rdev);
+       if (r) {
+               DRM_ERROR("radeon: failled initializing IB pool (%d).\n", r);
+               return r;
+       }
+       r = r600_blit_init(rdev);
+       if (r) {
+               DRM_ERROR("radeon: failled blitter (%d).\n", r);
+               return r;
+       }
+       r = radeon_ib_test(rdev);
+       if (r) {
+               DRM_ERROR("radeon: failled testing IB (%d).\n", r);
+                       return r;
+       }
+       return 0;
+}
+
+void r600_fini(struct radeon_device *rdev)
+{
+       /* Suspend operations */
+       r600_suspend(rdev);
+
+       r600_blit_fini(rdev);
+       radeon_ring_fini(rdev);
+       r600_pcie_gart_disable(rdev);
+       radeon_gart_table_vram_free(rdev);
+       radeon_gart_fini(rdev);
+       radeon_gem_fini(rdev);
+       radeon_fence_driver_fini(rdev);
+       radeon_clocks_fini(rdev);
+#if __OS_HAS_AGP
+       if (rdev->flags & RADEON_IS_AGP)
+               radeon_agp_fini(rdev);
+#endif
+       radeon_object_fini(rdev);
+       if (rdev->is_atom_bios)
+               radeon_atombios_fini(rdev);
+       else
+               radeon_combios_fini(rdev);
+       kfree(rdev->bios);
+       rdev->bios = NULL;
+       radeon_dummy_page_fini(rdev);
+}
+
+
+/*
+ * CS stuff
+ */
+void r600_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
+{
+       /* FIXME: implement */
+       radeon_ring_write(rdev, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
+       radeon_ring_write(rdev, ib->gpu_addr & 0xFFFFFFFC);
+       radeon_ring_write(rdev, upper_32_bits(ib->gpu_addr) & 0xFF);
+       radeon_ring_write(rdev, ib->length_dw);
+}
+
+int r600_ib_test(struct radeon_device *rdev)
+{
+       struct radeon_ib *ib;
+       uint32_t scratch;
+       uint32_t tmp = 0;
+       unsigned i;
+       int r;
+
+       r = radeon_scratch_get(rdev, &scratch);
+       if (r) {
+               DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
+               return r;
+       }
+       WREG32(scratch, 0xCAFEDEAD);
+       r = radeon_ib_get(rdev, &ib);
+       if (r) {
+               DRM_ERROR("radeon: failed to get ib (%d).\n", r);
+               return r;
+       }
+       ib->ptr[0] = PACKET3(PACKET3_SET_CONFIG_REG, 1);
+       ib->ptr[1] = ((scratch - PACKET3_SET_CONFIG_REG_OFFSET) >> 2);
+       ib->ptr[2] = 0xDEADBEEF;
+       ib->ptr[3] = PACKET2(0);
+       ib->ptr[4] = PACKET2(0);
+       ib->ptr[5] = PACKET2(0);
+       ib->ptr[6] = PACKET2(0);
+       ib->ptr[7] = PACKET2(0);
+       ib->ptr[8] = PACKET2(0);
+       ib->ptr[9] = PACKET2(0);
+       ib->ptr[10] = PACKET2(0);
+       ib->ptr[11] = PACKET2(0);
+       ib->ptr[12] = PACKET2(0);
+       ib->ptr[13] = PACKET2(0);
+       ib->ptr[14] = PACKET2(0);
+       ib->ptr[15] = PACKET2(0);
+       ib->length_dw = 16;
+       r = radeon_ib_schedule(rdev, ib);
+       if (r) {
+               radeon_scratch_free(rdev, scratch);
+               radeon_ib_free(rdev, &ib);
+               DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
+               return r;
+       }
+       r = radeon_fence_wait(ib->fence, false);
+       if (r) {
+               DRM_ERROR("radeon: fence wait failed (%d).\n", r);
+               return r;
+       }
+       for (i = 0; i < rdev->usec_timeout; i++) {
+               tmp = RREG32(scratch);
+               if (tmp == 0xDEADBEEF)
+                       break;
+               DRM_UDELAY(1);
+       }
+       if (i < rdev->usec_timeout) {
+               DRM_INFO("ib test succeeded in %u usecs\n", i);
+       } else {
+               DRM_ERROR("radeon: ib test failed (sracth(0x%04X)=0x%08X)\n",
+                         scratch, tmp);
+               r = -EINVAL;
+       }
+       radeon_scratch_free(rdev, scratch);
+       radeon_ib_free(rdev, &ib);
        return r;
 }
 
-void r600_pciep_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v)
+
+
+
+/*
+ * Debugfs info
+ */
+#if defined(CONFIG_DEBUG_FS)
+
+static int r600_debugfs_cp_ring_info(struct seq_file *m, void *data)
 {
-       WREG32(R600_PCIE_PORT_INDEX, ((reg) & 0xff));
-       (void)RREG32(R600_PCIE_PORT_INDEX);
-       WREG32(R600_PCIE_PORT_DATA, (v));
-       (void)RREG32(R600_PCIE_PORT_DATA);
+       struct drm_info_node *node = (struct drm_info_node *) m->private;
+       struct drm_device *dev = node->minor->dev;
+       struct radeon_device *rdev = dev->dev_private;
+       uint32_t rdp, wdp;
+       unsigned count, i, j;
+
+       radeon_ring_free_size(rdev);
+       rdp = RREG32(CP_RB_RPTR);
+       wdp = RREG32(CP_RB_WPTR);
+       count = (rdp + rdev->cp.ring_size - wdp) & rdev->cp.ptr_mask;
+       seq_printf(m, "CP_STAT 0x%08x\n", RREG32(CP_STAT));
+       seq_printf(m, "CP_RB_WPTR 0x%08x\n", wdp);
+       seq_printf(m, "CP_RB_RPTR 0x%08x\n", rdp);
+       seq_printf(m, "%u free dwords in ring\n", rdev->cp.ring_free_dw);
+       seq_printf(m, "%u dwords in ring\n", count);
+       for (j = 0; j <= count; j++) {
+               i = (rdp + j) & rdev->cp.ptr_mask;
+               seq_printf(m, "r[%04d]=0x%08x\n", i, rdev->cp.ring[i]);
+       }
+       return 0;
+}
+
+static int r600_debugfs_mc_info(struct seq_file *m, void *data)
+{
+       struct drm_info_node *node = (struct drm_info_node *) m->private;
+       struct drm_device *dev = node->minor->dev;
+       struct radeon_device *rdev = dev->dev_private;
+
+       DREG32_SYS(m, rdev, R_000E50_SRBM_STATUS);
+       DREG32_SYS(m, rdev, VM_L2_STATUS);
+       return 0;
+}
+
+static struct drm_info_list r600_mc_info_list[] = {
+       {"r600_mc_info", r600_debugfs_mc_info, 0, NULL},
+       {"r600_ring_info", r600_debugfs_cp_ring_info, 0, NULL},
+};
+#endif
+
+int r600_debugfs_mc_info_init(struct radeon_device *rdev)
+{
+#if defined(CONFIG_DEBUG_FS)
+       return radeon_debugfs_add_files(rdev, r600_mc_info_list, ARRAY_SIZE(r600_mc_info_list));
+#else
+       return 0;
+#endif
 }
diff --git a/drivers/gpu/drm/radeon/r600_blit.c b/drivers/gpu/drm/radeon/r600_blit.c
new file mode 100644 (file)
index 0000000..c51402e
--- /dev/null
@@ -0,0 +1,855 @@
+/*
+ * Copyright 2009 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *     Alex Deucher <alexander.deucher@amd.com>
+ */
+#include "drmP.h"
+#include "drm.h"
+#include "radeon_drm.h"
+#include "radeon_drv.h"
+
+#include "r600_blit_shaders.h"
+
+#define DI_PT_RECTLIST        0x11
+#define DI_INDEX_SIZE_16_BIT  0x0
+#define DI_SRC_SEL_AUTO_INDEX 0x2
+
+#define FMT_8                 0x1
+#define FMT_5_6_5             0x8
+#define FMT_8_8_8_8           0x1a
+#define COLOR_8               0x1
+#define COLOR_5_6_5           0x8
+#define COLOR_8_8_8_8         0x1a
+
+static inline void
+set_render_target(drm_radeon_private_t *dev_priv, int format, int w, int h, u64 gpu_addr)
+{
+       u32 cb_color_info;
+       int pitch, slice;
+       RING_LOCALS;
+       DRM_DEBUG("\n");
+
+       h = (h + 7) & ~7;
+       if (h < 8)
+               h = 8;
+
+       cb_color_info = ((format << 2) | (1 << 27));
+       pitch = (w / 8) - 1;
+       slice = ((w * h) / 64) - 1;
+
+       if (((dev_priv->flags & RADEON_FAMILY_MASK) > CHIP_R600) &&
+           ((dev_priv->flags & RADEON_FAMILY_MASK) < CHIP_RV770)) {
+               BEGIN_RING(21 + 2);
+               OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
+               OUT_RING((R600_CB_COLOR0_BASE - R600_SET_CONTEXT_REG_OFFSET) >> 2);
+               OUT_RING(gpu_addr >> 8);
+               OUT_RING(CP_PACKET3(R600_IT_SURFACE_BASE_UPDATE, 0));
+               OUT_RING(2 << 0);
+       } else {
+               BEGIN_RING(21);
+               OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
+               OUT_RING((R600_CB_COLOR0_BASE - R600_SET_CONTEXT_REG_OFFSET) >> 2);
+               OUT_RING(gpu_addr >> 8);
+       }
+
+       OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
+       OUT_RING((R600_CB_COLOR0_SIZE - R600_SET_CONTEXT_REG_OFFSET) >> 2);
+       OUT_RING((pitch << 0) | (slice << 10));
+
+       OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
+       OUT_RING((R600_CB_COLOR0_VIEW - R600_SET_CONTEXT_REG_OFFSET) >> 2);
+       OUT_RING(0);
+
+       OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
+       OUT_RING((R600_CB_COLOR0_INFO - R600_SET_CONTEXT_REG_OFFSET) >> 2);
+       OUT_RING(cb_color_info);
+
+       OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
+       OUT_RING((R600_CB_COLOR0_TILE - R600_SET_CONTEXT_REG_OFFSET) >> 2);
+       OUT_RING(0);
+
+       OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
+       OUT_RING((R600_CB_COLOR0_FRAG - R600_SET_CONTEXT_REG_OFFSET) >> 2);
+       OUT_RING(0);
+
+       OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
+       OUT_RING((R600_CB_COLOR0_MASK - R600_SET_CONTEXT_REG_OFFSET) >> 2);
+       OUT_RING(0);
+
+       ADVANCE_RING();
+}
+
+static inline void
+cp_set_surface_sync(drm_radeon_private_t *dev_priv,
+                   u32 sync_type, u32 size, u64 mc_addr)
+{
+       u32 cp_coher_size;
+       RING_LOCALS;
+       DRM_DEBUG("\n");
+
+       if (size == 0xffffffff)
+               cp_coher_size = 0xffffffff;
+       else
+               cp_coher_size = ((size + 255) >> 8);
+
+       BEGIN_RING(5);
+       OUT_RING(CP_PACKET3(R600_IT_SURFACE_SYNC, 3));
+       OUT_RING(sync_type);
+       OUT_RING(cp_coher_size);
+       OUT_RING((mc_addr >> 8));
+       OUT_RING(10); /* poll interval */
+       ADVANCE_RING();
+}
+
+static inline void
+set_shaders(struct drm_device *dev)
+{
+       drm_radeon_private_t *dev_priv = dev->dev_private;
+       u64 gpu_addr;
+       int shader_size, i;
+       u32 *vs, *ps;
+       uint32_t sq_pgm_resources;
+       RING_LOCALS;
+       DRM_DEBUG("\n");
+
+       /* load shaders */
+       vs = (u32 *) ((char *)dev->agp_buffer_map->handle + dev_priv->blit_vb->offset);
+       ps = (u32 *) ((char *)dev->agp_buffer_map->handle + dev_priv->blit_vb->offset + 256);
+
+       shader_size = r6xx_vs_size;
+       for (i = 0; i < shader_size; i++)
+               vs[i] = r6xx_vs[i];
+       shader_size = r6xx_ps_size;
+       for (i = 0; i < shader_size; i++)
+               ps[i] = r6xx_ps[i];
+
+       dev_priv->blit_vb->used = 512;
+
+       gpu_addr = dev_priv->gart_buffers_offset + dev_priv->blit_vb->offset;
+
+       /* setup shader regs */
+       sq_pgm_resources = (1 << 0);
+
+       BEGIN_RING(9 + 12);
+       /* VS */
+       OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
+       OUT_RING((R600_SQ_PGM_START_VS - R600_SET_CONTEXT_REG_OFFSET) >> 2);
+       OUT_RING(gpu_addr >> 8);
+
+       OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
+       OUT_RING((R600_SQ_PGM_RESOURCES_VS - R600_SET_CONTEXT_REG_OFFSET) >> 2);
+       OUT_RING(sq_pgm_resources);
+
+       OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
+       OUT_RING((R600_SQ_PGM_CF_OFFSET_VS - R600_SET_CONTEXT_REG_OFFSET) >> 2);
+       OUT_RING(0);
+
+       /* PS */
+       OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
+       OUT_RING((R600_SQ_PGM_START_PS - R600_SET_CONTEXT_REG_OFFSET) >> 2);
+       OUT_RING((gpu_addr + 256) >> 8);
+
+       OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
+       OUT_RING((R600_SQ_PGM_RESOURCES_PS - R600_SET_CONTEXT_REG_OFFSET) >> 2);
+       OUT_RING(sq_pgm_resources | (1 << 28));
+
+       OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
+       OUT_RING((R600_SQ_PGM_EXPORTS_PS - R600_SET_CONTEXT_REG_OFFSET) >> 2);
+       OUT_RING(2);
+
+       OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
+       OUT_RING((R600_SQ_PGM_CF_OFFSET_PS - R600_SET_CONTEXT_REG_OFFSET) >> 2);
+       OUT_RING(0);
+       ADVANCE_RING();
+
+       cp_set_surface_sync(dev_priv,
+                           R600_SH_ACTION_ENA, 512, gpu_addr);
+}
+
+static inline void
+set_vtx_resource(drm_radeon_private_t *dev_priv, u64 gpu_addr)
+{
+       uint32_t sq_vtx_constant_word2;
+       RING_LOCALS;
+       DRM_DEBUG("\n");
+
+       sq_vtx_constant_word2 = (((gpu_addr >> 32) & 0xff) | (16 << 8));
+
+       BEGIN_RING(9);
+       OUT_RING(CP_PACKET3(R600_IT_SET_RESOURCE, 7));
+       OUT_RING(0x460);
+       OUT_RING(gpu_addr & 0xffffffff);
+       OUT_RING(48 - 1);
+       OUT_RING(sq_vtx_constant_word2);
+       OUT_RING(1 << 0);
+       OUT_RING(0);
+       OUT_RING(0);
+       OUT_RING(R600_SQ_TEX_VTX_VALID_BUFFER << 30);
+       ADVANCE_RING();
+
+       if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV610) ||
+           ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV620) ||
+           ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780) ||
+           ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS880) ||
+           ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV710))
+               cp_set_surface_sync(dev_priv,
+                                   R600_TC_ACTION_ENA, 48, gpu_addr);
+       else
+               cp_set_surface_sync(dev_priv,
+                                   R600_VC_ACTION_ENA, 48, gpu_addr);
+}
+
+static inline void
+set_tex_resource(drm_radeon_private_t *dev_priv,
+                int format, int w, int h, int pitch, u64 gpu_addr)
+{
+       uint32_t sq_tex_resource_word0, sq_tex_resource_word1, sq_tex_resource_word4;
+       RING_LOCALS;
+       DRM_DEBUG("\n");
+
+       if (h < 1)
+               h = 1;
+
+       sq_tex_resource_word0 = (1 << 0);
+       sq_tex_resource_word0 |= ((((pitch >> 3) - 1) << 8) |
+                                 ((w - 1) << 19));
+
+       sq_tex_resource_word1 = (format << 26);
+       sq_tex_resource_word1 |= ((h - 1) << 0);
+
+       sq_tex_resource_word4 = ((1 << 14) |
+                                (0 << 16) |
+                                (1 << 19) |
+                                (2 << 22) |
+                                (3 << 25));
+
+       BEGIN_RING(9);
+       OUT_RING(CP_PACKET3(R600_IT_SET_RESOURCE, 7));
+       OUT_RING(0);
+       OUT_RING(sq_tex_resource_word0);
+       OUT_RING(sq_tex_resource_word1);
+       OUT_RING(gpu_addr >> 8);
+       OUT_RING(gpu_addr >> 8);
+       OUT_RING(sq_tex_resource_word4);
+       OUT_RING(0);
+       OUT_RING(R600_SQ_TEX_VTX_VALID_TEXTURE << 30);
+       ADVANCE_RING();
+
+}
+
+static inline void
+set_scissors(drm_radeon_private_t *dev_priv, int x1, int y1, int x2, int y2)
+{
+       RING_LOCALS;
+       DRM_DEBUG("\n");
+
+       BEGIN_RING(12);
+       OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 2));
+       OUT_RING((R600_PA_SC_SCREEN_SCISSOR_TL - R600_SET_CONTEXT_REG_OFFSET) >> 2);
+       OUT_RING((x1 << 0) | (y1 << 16));
+       OUT_RING((x2 << 0) | (y2 << 16));
+
+       OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 2));
+       OUT_RING((R600_PA_SC_GENERIC_SCISSOR_TL - R600_SET_CONTEXT_REG_OFFSET) >> 2);
+       OUT_RING((x1 << 0) | (y1 << 16) | (1 << 31));
+       OUT_RING((x2 << 0) | (y2 << 16));
+
+       OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 2));
+       OUT_RING((R600_PA_SC_WINDOW_SCISSOR_TL - R600_SET_CONTEXT_REG_OFFSET) >> 2);
+       OUT_RING((x1 << 0) | (y1 << 16) | (1 << 31));
+       OUT_RING((x2 << 0) | (y2 << 16));
+       ADVANCE_RING();
+}
+
+static inline void
+draw_auto(drm_radeon_private_t *dev_priv)
+{
+       RING_LOCALS;
+       DRM_DEBUG("\n");
+
+       BEGIN_RING(10);
+       OUT_RING(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1));
+       OUT_RING((R600_VGT_PRIMITIVE_TYPE - R600_SET_CONFIG_REG_OFFSET) >> 2);
+       OUT_RING(DI_PT_RECTLIST);
+
+       OUT_RING(CP_PACKET3(R600_IT_INDEX_TYPE, 0));
+       OUT_RING(DI_INDEX_SIZE_16_BIT);
+
+       OUT_RING(CP_PACKET3(R600_IT_NUM_INSTANCES, 0));
+       OUT_RING(1);
+
+       OUT_RING(CP_PACKET3(R600_IT_DRAW_INDEX_AUTO, 1));
+       OUT_RING(3);
+       OUT_RING(DI_SRC_SEL_AUTO_INDEX);
+
+       ADVANCE_RING();
+       COMMIT_RING();
+}
+
+static inline void
+set_default_state(drm_radeon_private_t *dev_priv)
+{
+       int default_state_dw, i;
+       u32 sq_config, sq_gpr_resource_mgmt_1, sq_gpr_resource_mgmt_2;
+       u32 sq_thread_resource_mgmt, sq_stack_resource_mgmt_1, sq_stack_resource_mgmt_2;
+       int num_ps_gprs, num_vs_gprs, num_temp_gprs, num_gs_gprs, num_es_gprs;
+       int num_ps_threads, num_vs_threads, num_gs_threads, num_es_threads;
+       int num_ps_stack_entries, num_vs_stack_entries, num_gs_stack_entries, num_es_stack_entries;
+       RING_LOCALS;
+
+       switch ((dev_priv->flags & RADEON_FAMILY_MASK)) {
+       case CHIP_R600:
+               num_ps_gprs = 192;
+               num_vs_gprs = 56;
+               num_temp_gprs = 4;
+               num_gs_gprs = 0;
+               num_es_gprs = 0;
+               num_ps_threads = 136;
+               num_vs_threads = 48;
+               num_gs_threads = 4;
+               num_es_threads = 4;
+               num_ps_stack_entries = 128;
+               num_vs_stack_entries = 128;
+               num_gs_stack_entries = 0;
+               num_es_stack_entries = 0;
+               break;
+       case CHIP_RV630:
+       case CHIP_RV635:
+               num_ps_gprs = 84;
+               num_vs_gprs = 36;
+               num_temp_gprs = 4;
+               num_gs_gprs = 0;
+               num_es_gprs = 0;
+               num_ps_threads = 144;
+               num_vs_threads = 40;
+               num_gs_threads = 4;
+               num_es_threads = 4;
+               num_ps_stack_entries = 40;
+               num_vs_stack_entries = 40;
+               num_gs_stack_entries = 32;
+               num_es_stack_entries = 16;
+               break;
+       case CHIP_RV610:
+       case CHIP_RV620:
+       case CHIP_RS780:
+       case CHIP_RS880:
+       default:
+               num_ps_gprs = 84;
+               num_vs_gprs = 36;
+               num_temp_gprs = 4;
+               num_gs_gprs = 0;
+               num_es_gprs = 0;
+               num_ps_threads = 136;
+               num_vs_threads = 48;
+               num_gs_threads = 4;
+               num_es_threads = 4;
+               num_ps_stack_entries = 40;
+               num_vs_stack_entries = 40;
+               num_gs_stack_entries = 32;
+               num_es_stack_entries = 16;
+               break;
+       case CHIP_RV670:
+               num_ps_gprs = 144;
+               num_vs_gprs = 40;
+               num_temp_gprs = 4;
+               num_gs_gprs = 0;
+               num_es_gprs = 0;
+               num_ps_threads = 136;
+               num_vs_threads = 48;
+               num_gs_threads = 4;
+               num_es_threads = 4;
+               num_ps_stack_entries = 40;
+               num_vs_stack_entries = 40;
+               num_gs_stack_entries = 32;
+               num_es_stack_entries = 16;
+               break;
+       case CHIP_RV770:
+               num_ps_gprs = 192;
+               num_vs_gprs = 56;
+               num_temp_gprs = 4;
+               num_gs_gprs = 0;
+               num_es_gprs = 0;
+               num_ps_threads = 188;
+               num_vs_threads = 60;
+               num_gs_threads = 0;
+               num_es_threads = 0;
+               num_ps_stack_entries = 256;
+               num_vs_stack_entries = 256;
+               num_gs_stack_entries = 0;
+               num_es_stack_entries = 0;
+               break;
+       case CHIP_RV730:
+       case CHIP_RV740:
+               num_ps_gprs = 84;
+               num_vs_gprs = 36;
+               num_temp_gprs = 4;
+               num_gs_gprs = 0;
+               num_es_gprs = 0;
+               num_ps_threads = 188;
+               num_vs_threads = 60;
+               num_gs_threads = 0;
+               num_es_threads = 0;
+               num_ps_stack_entries = 128;
+               num_vs_stack_entries = 128;
+               num_gs_stack_entries = 0;
+               num_es_stack_entries = 0;
+               break;
+       case CHIP_RV710:
+               num_ps_gprs = 192;
+               num_vs_gprs = 56;
+               num_temp_gprs = 4;
+               num_gs_gprs = 0;
+               num_es_gprs = 0;
+               num_ps_threads = 144;
+               num_vs_threads = 48;
+               num_gs_threads = 0;
+               num_es_threads = 0;
+               num_ps_stack_entries = 128;
+               num_vs_stack_entries = 128;
+               num_gs_stack_entries = 0;
+               num_es_stack_entries = 0;
+               break;
+       }
+
+       if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV610) ||
+           ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV620) ||
+           ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780) ||
+           ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS880) ||
+           ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV710))
+               sq_config = 0;
+       else
+               sq_config = R600_VC_ENABLE;
+
+       sq_config |= (R600_DX9_CONSTS |
+                     R600_ALU_INST_PREFER_VECTOR |
+                     R600_PS_PRIO(0) |
+                     R600_VS_PRIO(1) |
+                     R600_GS_PRIO(2) |
+                     R600_ES_PRIO(3));
+
+       sq_gpr_resource_mgmt_1 = (R600_NUM_PS_GPRS(num_ps_gprs) |
+                                 R600_NUM_VS_GPRS(num_vs_gprs) |
+                                 R600_NUM_CLAUSE_TEMP_GPRS(num_temp_gprs));
+       sq_gpr_resource_mgmt_2 = (R600_NUM_GS_GPRS(num_gs_gprs) |
+                                 R600_NUM_ES_GPRS(num_es_gprs));
+       sq_thread_resource_mgmt = (R600_NUM_PS_THREADS(num_ps_threads) |
+                                  R600_NUM_VS_THREADS(num_vs_threads) |
+                                  R600_NUM_GS_THREADS(num_gs_threads) |
+                                  R600_NUM_ES_THREADS(num_es_threads));
+       sq_stack_resource_mgmt_1 = (R600_NUM_PS_STACK_ENTRIES(num_ps_stack_entries) |
+                                   R600_NUM_VS_STACK_ENTRIES(num_vs_stack_entries));
+       sq_stack_resource_mgmt_2 = (R600_NUM_GS_STACK_ENTRIES(num_gs_stack_entries) |
+                                   R600_NUM_ES_STACK_ENTRIES(num_es_stack_entries));
+
+       if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV770) {
+               default_state_dw = r7xx_default_size * 4;
+               BEGIN_RING(default_state_dw + 10);
+               for (i = 0; i < default_state_dw; i++)
+                       OUT_RING(r7xx_default_state[i]);
+       } else {
+               default_state_dw = r6xx_default_size * 4;
+               BEGIN_RING(default_state_dw + 10);
+               for (i = 0; i < default_state_dw; i++)
+                       OUT_RING(r6xx_default_state[i]);
+       }
+       OUT_RING(CP_PACKET3(R600_IT_EVENT_WRITE, 0));
+       OUT_RING(R600_CACHE_FLUSH_AND_INV_EVENT);
+       /* SQ config */
+       OUT_RING(CP_PACKET3(R600_IT_SET_CONFIG_REG, 6));
+       OUT_RING((R600_SQ_CONFIG - R600_SET_CONFIG_REG_OFFSET) >> 2);
+       OUT_RING(sq_config);
+       OUT_RING(sq_gpr_resource_mgmt_1);
+       OUT_RING(sq_gpr_resource_mgmt_2);
+       OUT_RING(sq_thread_resource_mgmt);
+       OUT_RING(sq_stack_resource_mgmt_1);
+       OUT_RING(sq_stack_resource_mgmt_2);
+       ADVANCE_RING();
+}
+
+static inline uint32_t i2f(uint32_t input)
+{
+       u32 result, i, exponent, fraction;
+
+       if ((input & 0x3fff) == 0)
+               result = 0; /* 0 is a special case */
+       else {
+               exponent = 140; /* exponent biased by 127; */
+               fraction = (input & 0x3fff) << 10; /* cheat and only
+                                                     handle numbers below 2^^15 */
+               for (i = 0; i < 14; i++) {
+                       if (fraction & 0x800000)
+                               break;
+                       else {
+                               fraction = fraction << 1; /* keep
+                                                            shifting left until top bit = 1 */
+                               exponent = exponent - 1;
+                       }
+               }
+               result = exponent << 23 | (fraction & 0x7fffff); /* mask
+                                                                   off top bit; assumed 1 */
+       }
+       return result;
+}
+
+
+int r600_nomm_get_vb(struct drm_device *dev)
+{
+       drm_radeon_private_t *dev_priv = dev->dev_private;
+       dev_priv->blit_vb = radeon_freelist_get(dev);
+       if (!dev_priv->blit_vb) {
+               DRM_ERROR("Unable to allocate vertex buffer for blit\n");
+               return -EAGAIN;
+       }
+       return 0;
+}
+
+void r600_nomm_put_vb(struct drm_device *dev)
+{
+       drm_radeon_private_t *dev_priv = dev->dev_private;
+
+       dev_priv->blit_vb->used = 0;
+       radeon_cp_discard_buffer(dev, dev_priv->blit_vb->file_priv->master, dev_priv->blit_vb);
+}
+
+void *r600_nomm_get_vb_ptr(struct drm_device *dev)
+{
+       drm_radeon_private_t *dev_priv = dev->dev_private;
+       return (((char *)dev->agp_buffer_map->handle +
+                dev_priv->blit_vb->offset + dev_priv->blit_vb->used));
+}
+
+int
+r600_prepare_blit_copy(struct drm_device *dev, struct drm_file *file_priv)
+{
+       drm_radeon_private_t *dev_priv = dev->dev_private;
+       DRM_DEBUG("\n");
+
+       r600_nomm_get_vb(dev);
+
+       dev_priv->blit_vb->file_priv = file_priv;
+
+       set_default_state(dev_priv);
+       set_shaders(dev);
+
+       return 0;
+}
+
+
+void
+r600_done_blit_copy(struct drm_device *dev)
+{
+       drm_radeon_private_t *dev_priv = dev->dev_private;
+       RING_LOCALS;
+       DRM_DEBUG("\n");
+
+       BEGIN_RING(5);
+       OUT_RING(CP_PACKET3(R600_IT_EVENT_WRITE, 0));
+       OUT_RING(R600_CACHE_FLUSH_AND_INV_EVENT);
+       /* wait for 3D idle clean */
+       OUT_RING(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1));
+       OUT_RING((R600_WAIT_UNTIL - R600_SET_CONFIG_REG_OFFSET) >> 2);
+       OUT_RING(RADEON_WAIT_3D_IDLE | RADEON_WAIT_3D_IDLECLEAN);
+
+       ADVANCE_RING();
+       COMMIT_RING();
+
+       r600_nomm_put_vb(dev);
+}
+
+void
+r600_blit_copy(struct drm_device *dev,
+              uint64_t src_gpu_addr, uint64_t dst_gpu_addr,
+              int size_bytes)
+{
+       drm_radeon_private_t *dev_priv = dev->dev_private;
+       int max_bytes;
+       u64 vb_addr;
+       u32 *vb;
+
+       vb = r600_nomm_get_vb_ptr(dev);
+
+       if ((size_bytes & 3) || (src_gpu_addr & 3) || (dst_gpu_addr & 3)) {
+               max_bytes = 8192;
+
+               while (size_bytes) {
+                       int cur_size = size_bytes;
+                       int src_x = src_gpu_addr & 255;
+                       int dst_x = dst_gpu_addr & 255;
+                       int h = 1;
+                       src_gpu_addr = src_gpu_addr & ~255;
+                       dst_gpu_addr = dst_gpu_addr & ~255;
+
+                       if (!src_x && !dst_x) {
+                               h = (cur_size / max_bytes);
+                               if (h > 8192)
+                                       h = 8192;
+                               if (h == 0)
+                                       h = 1;
+                               else
+                                       cur_size = max_bytes;
+                       } else {
+                               if (cur_size > max_bytes)
+                                       cur_size = max_bytes;
+                               if (cur_size > (max_bytes - dst_x))
+                                       cur_size = (max_bytes - dst_x);
+                               if (cur_size > (max_bytes - src_x))
+                                       cur_size = (max_bytes - src_x);
+                       }
+
+                       if ((dev_priv->blit_vb->used + 48) > dev_priv->blit_vb->total) {
+
+                               r600_nomm_put_vb(dev);
+                               r600_nomm_get_vb(dev);
+                               if (!dev_priv->blit_vb)
+                                       return;
+                               set_shaders(dev);
+                               vb = r600_nomm_get_vb_ptr(dev);
+                       }
+
+                       vb[0] = i2f(dst_x);
+                       vb[1] = 0;
+                       vb[2] = i2f(src_x);
+                       vb[3] = 0;
+
+                       vb[4] = i2f(dst_x);
+                       vb[5] = i2f(h);
+                       vb[6] = i2f(src_x);
+                       vb[7] = i2f(h);
+
+                       vb[8] = i2f(dst_x + cur_size);
+                       vb[9] = i2f(h);
+                       vb[10] = i2f(src_x + cur_size);
+                       vb[11] = i2f(h);
+
+                       /* src */
+                       set_tex_resource(dev_priv, FMT_8,
+                                        src_x + cur_size, h, src_x + cur_size,
+                                        src_gpu_addr);
+
+                       cp_set_surface_sync(dev_priv,
+                                           R600_TC_ACTION_ENA, (src_x + cur_size * h), src_gpu_addr);
+
+                       /* dst */
+                       set_render_target(dev_priv, COLOR_8,
+                                         dst_x + cur_size, h,
+                                         dst_gpu_addr);
+
+                       /* scissors */
+                       set_scissors(dev_priv, dst_x, 0, dst_x + cur_size, h);
+
+                       /* Vertex buffer setup */
+                       vb_addr = dev_priv->gart_buffers_offset +
+                               dev_priv->blit_vb->offset +
+                               dev_priv->blit_vb->used;
+                       set_vtx_resource(dev_priv, vb_addr);
+
+                       /* draw */
+                       draw_auto(dev_priv);
+
+                       cp_set_surface_sync(dev_priv,
+                                           R600_CB_ACTION_ENA | R600_CB0_DEST_BASE_ENA,
+                                           cur_size * h, dst_gpu_addr);
+
+                       vb += 12;
+                       dev_priv->blit_vb->used += 12 * 4;
+
+                       src_gpu_addr += cur_size * h;
+                       dst_gpu_addr += cur_size * h;
+                       size_bytes -= cur_size * h;
+               }
+       } else {
+               max_bytes = 8192 * 4;
+
+               while (size_bytes) {
+                       int cur_size = size_bytes;
+                       int src_x = (src_gpu_addr & 255);
+                       int dst_x = (dst_gpu_addr & 255);
+                       int h = 1;
+                       src_gpu_addr = src_gpu_addr & ~255;
+                       dst_gpu_addr = dst_gpu_addr & ~255;
+
+                       if (!src_x && !dst_x) {
+                               h = (cur_size / max_bytes);
+                               if (h > 8192)
+                                       h = 8192;
+                               if (h == 0)
+                                       h = 1;
+                               else
+                                       cur_size = max_bytes;
+                       } else {
+                               if (cur_size > max_bytes)
+                                       cur_size = max_bytes;
+                               if (cur_size > (max_bytes - dst_x))
+                                       cur_size = (max_bytes - dst_x);
+                               if (cur_size > (max_bytes - src_x))
+                                       cur_size = (max_bytes - src_x);
+                       }
+
+                       if ((dev_priv->blit_vb->used + 48) > dev_priv->blit_vb->total) {
+                               r600_nomm_put_vb(dev);
+                               r600_nomm_get_vb(dev);
+                               if (!dev_priv->blit_vb)
+                                       return;
+
+                               set_shaders(dev);
+                               vb = r600_nomm_get_vb_ptr(dev);
+                       }
+
+                       vb[0] = i2f(dst_x / 4);
+                       vb[1] = 0;
+                       vb[2] = i2f(src_x / 4);
+                       vb[3] = 0;
+
+                       vb[4] = i2f(dst_x / 4);
+                       vb[5] = i2f(h);
+                       vb[6] = i2f(src_x / 4);
+                       vb[7] = i2f(h);
+
+                       vb[8] = i2f((dst_x + cur_size) / 4);
+                       vb[9] = i2f(h);
+                       vb[10] = i2f((src_x + cur_size) / 4);
+                       vb[11] = i2f(h);
+
+                       /* src */
+                       set_tex_resource(dev_priv, FMT_8_8_8_8,
+                                        (src_x + cur_size) / 4,
+                                        h, (src_x + cur_size) / 4,
+                                        src_gpu_addr);
+
+                       cp_set_surface_sync(dev_priv,
+                                           R600_TC_ACTION_ENA, (src_x + cur_size * h), src_gpu_addr);
+
+                       /* dst */
+                       set_render_target(dev_priv, COLOR_8_8_8_8,
+                                         dst_x + cur_size, h,
+                                         dst_gpu_addr);
+
+                       /* scissors */
+                       set_scissors(dev_priv, (dst_x / 4), 0, (dst_x + cur_size / 4), h);
+
+                       /* Vertex buffer setup */
+                       vb_addr = dev_priv->gart_buffers_offset +
+                               dev_priv->blit_vb->offset +
+                               dev_priv->blit_vb->used;
+                       set_vtx_resource(dev_priv, vb_addr);
+
+                       /* draw */
+                       draw_auto(dev_priv);
+
+                       cp_set_surface_sync(dev_priv,
+                                           R600_CB_ACTION_ENA | R600_CB0_DEST_BASE_ENA,
+                                           cur_size * h, dst_gpu_addr);
+
+                       vb += 12;
+                       dev_priv->blit_vb->used += 12 * 4;
+
+                       src_gpu_addr += cur_size * h;
+                       dst_gpu_addr += cur_size * h;
+                       size_bytes -= cur_size * h;
+               }
+       }
+}
+
+void
+r600_blit_swap(struct drm_device *dev,
+              uint64_t src_gpu_addr, uint64_t dst_gpu_addr,
+              int sx, int sy, int dx, int dy,
+              int w, int h, int src_pitch, int dst_pitch, int cpp)
+{
+       drm_radeon_private_t *dev_priv = dev->dev_private;
+       int cb_format, tex_format;
+       u64 vb_addr;
+       u32 *vb;
+
+       vb = (u32 *) ((char *)dev->agp_buffer_map->handle +
+                     dev_priv->blit_vb->offset + dev_priv->blit_vb->used);
+
+       if ((dev_priv->blit_vb->used + 48) > dev_priv->blit_vb->total) {
+
+               r600_nomm_put_vb(dev);
+               r600_nomm_get_vb(dev);
+               if (!dev_priv->blit_vb)
+                       return;
+
+               set_shaders(dev);
+               vb = r600_nomm_get_vb_ptr(dev);
+       }
+
+       if (cpp == 4) {
+               cb_format = COLOR_8_8_8_8;
+               tex_format = FMT_8_8_8_8;
+       } else if (cpp == 2) {
+               cb_format = COLOR_5_6_5;
+               tex_format = FMT_5_6_5;
+       } else {
+               cb_format = COLOR_8;
+               tex_format = FMT_8;
+       }
+
+       vb[0] = i2f(dx);
+       vb[1] = i2f(dy);
+       vb[2] = i2f(sx);
+       vb[3] = i2f(sy);
+
+       vb[4] = i2f(dx);
+       vb[5] = i2f(dy + h);
+       vb[6] = i2f(sx);
+       vb[7] = i2f(sy + h);
+
+       vb[8] = i2f(dx + w);
+       vb[9] = i2f(dy + h);
+       vb[10] = i2f(sx + w);
+       vb[11] = i2f(sy + h);
+
+       /* src */
+       set_tex_resource(dev_priv, tex_format,
+                        src_pitch / cpp,
+                        sy + h, src_pitch / cpp,
+                        src_gpu_addr);
+
+       cp_set_surface_sync(dev_priv,
+                           R600_TC_ACTION_ENA, (src_pitch * (sy + h)), src_gpu_addr);
+
+       /* dst */
+       set_render_target(dev_priv, cb_format,
+                         dst_pitch / cpp, dy + h,
+                         dst_gpu_addr);
+
+       /* scissors */
+       set_scissors(dev_priv, dx, dy, dx + w, dy + h);
+
+       /* Vertex buffer setup */
+       vb_addr = dev_priv->gart_buffers_offset +
+               dev_priv->blit_vb->offset +
+               dev_priv->blit_vb->used;
+       set_vtx_resource(dev_priv, vb_addr);
+
+       /* draw */
+       draw_auto(dev_priv);
+
+       cp_set_surface_sync(dev_priv,
+                           R600_CB_ACTION_ENA | R600_CB0_DEST_BASE_ENA,
+                           dst_pitch * (dy + h), dst_gpu_addr);
+
+       dev_priv->blit_vb->used += 12 * 4;
+}
diff --git a/drivers/gpu/drm/radeon/r600_blit_kms.c b/drivers/gpu/drm/radeon/r600_blit_kms.c
new file mode 100644 (file)
index 0000000..5755647
--- /dev/null
@@ -0,0 +1,777 @@
+#include "drmP.h"
+#include "drm.h"
+#include "radeon_drm.h"
+#include "radeon.h"
+
+#include "r600d.h"
+#include "r600_blit_shaders.h"
+
+#define DI_PT_RECTLIST        0x11
+#define DI_INDEX_SIZE_16_BIT  0x0
+#define DI_SRC_SEL_AUTO_INDEX 0x2
+
+#define FMT_8                 0x1
+#define FMT_5_6_5             0x8
+#define FMT_8_8_8_8           0x1a
+#define COLOR_8               0x1
+#define COLOR_5_6_5           0x8
+#define COLOR_8_8_8_8         0x1a
+
+/* emits 21 on rv770+, 23 on r600 */
+static void
+set_render_target(struct radeon_device *rdev, int format,
+                 int w, int h, u64 gpu_addr)
+{
+       u32 cb_color_info;
+       int pitch, slice;
+
+       h = (h + 7) & ~7;
+       if (h < 8)
+               h = 8;
+
+       cb_color_info = ((format << 2) | (1 << 27));
+       pitch = (w / 8) - 1;
+       slice = ((w * h) / 64) - 1;
+
+       radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
+       radeon_ring_write(rdev, (CB_COLOR0_BASE - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
+       radeon_ring_write(rdev, gpu_addr >> 8);
+
+       if (rdev->family > CHIP_R600 && rdev->family < CHIP_RV770) {
+               radeon_ring_write(rdev, PACKET3(PACKET3_SURFACE_BASE_UPDATE, 0));
+               radeon_ring_write(rdev, 2 << 0);
+       }
+
+       radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
+       radeon_ring_write(rdev, (CB_COLOR0_SIZE - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
+       radeon_ring_write(rdev, (pitch << 0) | (slice << 10));
+
+       radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
+       radeon_ring_write(rdev, (CB_COLOR0_VIEW - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
+       radeon_ring_write(rdev, 0);
+
+       radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
+       radeon_ring_write(rdev, (CB_COLOR0_INFO - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
+       radeon_ring_write(rdev, cb_color_info);
+
+       radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
+       radeon_ring_write(rdev, (CB_COLOR0_TILE - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
+       radeon_ring_write(rdev, 0);
+
+       radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
+       radeon_ring_write(rdev, (CB_COLOR0_FRAG - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
+       radeon_ring_write(rdev, 0);
+
+       radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
+       radeon_ring_write(rdev, (CB_COLOR0_MASK - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
+       radeon_ring_write(rdev, 0);
+}
+
+/* emits 5dw */
+static void
+cp_set_surface_sync(struct radeon_device *rdev,
+                   u32 sync_type, u32 size,
+                   u64 mc_addr)
+{
+       u32 cp_coher_size;
+
+       if (size == 0xffffffff)
+               cp_coher_size = 0xffffffff;
+       else
+               cp_coher_size = ((size + 255) >> 8);
+
+       radeon_ring_write(rdev, PACKET3(PACKET3_SURFACE_SYNC, 3));
+       radeon_ring_write(rdev, sync_type);
+       radeon_ring_write(rdev, cp_coher_size);
+       radeon_ring_write(rdev, mc_addr >> 8);
+       radeon_ring_write(rdev, 10); /* poll interval */
+}
+
+/* emits 21dw + 1 surface sync = 26dw */
+static void
+set_shaders(struct radeon_device *rdev)
+{
+       u64 gpu_addr;
+       u32 sq_pgm_resources;
+
+       /* setup shader regs */
+       sq_pgm_resources = (1 << 0);
+
+       /* VS */
+       gpu_addr = rdev->r600_blit.shader_gpu_addr + rdev->r600_blit.vs_offset;
+       radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
+       radeon_ring_write(rdev, (SQ_PGM_START_VS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
+       radeon_ring_write(rdev, gpu_addr >> 8);
+
+       radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
+       radeon_ring_write(rdev, (SQ_PGM_RESOURCES_VS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
+       radeon_ring_write(rdev, sq_pgm_resources);
+
+       radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
+       radeon_ring_write(rdev, (SQ_PGM_CF_OFFSET_VS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
+       radeon_ring_write(rdev, 0);
+
+       /* PS */
+       gpu_addr = rdev->r600_blit.shader_gpu_addr + rdev->r600_blit.ps_offset;
+       radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
+       radeon_ring_write(rdev, (SQ_PGM_START_PS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
+       radeon_ring_write(rdev, gpu_addr >> 8);
+
+       radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
+       radeon_ring_write(rdev, (SQ_PGM_RESOURCES_PS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
+       radeon_ring_write(rdev, sq_pgm_resources | (1 << 28));
+
+       radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
+       radeon_ring_write(rdev, (SQ_PGM_EXPORTS_PS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
+       radeon_ring_write(rdev, 2);
+
+       radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
+       radeon_ring_write(rdev, (SQ_PGM_CF_OFFSET_PS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
+       radeon_ring_write(rdev, 0);
+
+       cp_set_surface_sync(rdev, PACKET3_SH_ACTION_ENA, 512, gpu_addr);
+}
+
+/* emits 9 + 1 sync (5) = 14*/
+static void
+set_vtx_resource(struct radeon_device *rdev, u64 gpu_addr)
+{
+       u32 sq_vtx_constant_word2;
+
+       sq_vtx_constant_word2 = ((upper_32_bits(gpu_addr) & 0xff) | (16 << 8));
+
+       radeon_ring_write(rdev, PACKET3(PACKET3_SET_RESOURCE, 7));
+       radeon_ring_write(rdev, 0x460);
+       radeon_ring_write(rdev, gpu_addr & 0xffffffff);
+       radeon_ring_write(rdev, 48 - 1);
+       radeon_ring_write(rdev, sq_vtx_constant_word2);
+       radeon_ring_write(rdev, 1 << 0);
+       radeon_ring_write(rdev, 0);
+       radeon_ring_write(rdev, 0);
+       radeon_ring_write(rdev, SQ_TEX_VTX_VALID_BUFFER << 30);
+
+       if ((rdev->family == CHIP_RV610) ||
+           (rdev->family == CHIP_RV620) ||
+           (rdev->family == CHIP_RS780) ||
+           (rdev->family == CHIP_RS880) ||
+           (rdev->family == CHIP_RV710))
+               cp_set_surface_sync(rdev,
+                                   PACKET3_TC_ACTION_ENA, 48, gpu_addr);
+       else
+               cp_set_surface_sync(rdev,
+                                   PACKET3_VC_ACTION_ENA, 48, gpu_addr);
+}
+
+/* emits 9 */
+static void
+set_tex_resource(struct radeon_device *rdev,
+                int format, int w, int h, int pitch,
+                u64 gpu_addr)
+{
+       uint32_t sq_tex_resource_word0, sq_tex_resource_word1, sq_tex_resource_word4;
+
+       if (h < 1)
+               h = 1;
+
+       sq_tex_resource_word0 = (1 << 0);
+       sq_tex_resource_word0 |= ((((pitch >> 3) - 1) << 8) |
+                                 ((w - 1) << 19));
+
+       sq_tex_resource_word1 = (format << 26);
+       sq_tex_resource_word1 |= ((h - 1) << 0);
+
+       sq_tex_resource_word4 = ((1 << 14) |
+                                (0 << 16) |
+                                (1 << 19) |
+                                (2 << 22) |
+                                (3 << 25));
+
+       radeon_ring_write(rdev, PACKET3(PACKET3_SET_RESOURCE, 7));
+       radeon_ring_write(rdev, 0);
+       radeon_ring_write(rdev, sq_tex_resource_word0);
+       radeon_ring_write(rdev, sq_tex_resource_word1);
+       radeon_ring_write(rdev, gpu_addr >> 8);
+       radeon_ring_write(rdev, gpu_addr >> 8);
+       radeon_ring_write(rdev, sq_tex_resource_word4);
+       radeon_ring_write(rdev, 0);
+       radeon_ring_write(rdev, SQ_TEX_VTX_VALID_TEXTURE << 30);
+}
+
+/* emits 12 */
+static void
+set_scissors(struct radeon_device *rdev, int x1, int y1,
+            int x2, int y2)
+{
+       radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
+       radeon_ring_write(rdev, (PA_SC_SCREEN_SCISSOR_TL - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
+       radeon_ring_write(rdev, (x1 << 0) | (y1 << 16));
+       radeon_ring_write(rdev, (x2 << 0) | (y2 << 16));
+
+       radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
+       radeon_ring_write(rdev, (PA_SC_GENERIC_SCISSOR_TL - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
+       radeon_ring_write(rdev, (x1 << 0) | (y1 << 16) | (1 << 31));
+       radeon_ring_write(rdev, (x2 << 0) | (y2 << 16));
+
+       radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
+       radeon_ring_write(rdev, (PA_SC_WINDOW_SCISSOR_TL - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
+       radeon_ring_write(rdev, (x1 << 0) | (y1 << 16) | (1 << 31));
+       radeon_ring_write(rdev, (x2 << 0) | (y2 << 16));
+}
+
+/* emits 10 */
+static void
+draw_auto(struct radeon_device *rdev)
+{
+       radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 1));
+       radeon_ring_write(rdev, (VGT_PRIMITIVE_TYPE - PACKET3_SET_CONFIG_REG_OFFSET) >> 2);
+       radeon_ring_write(rdev, DI_PT_RECTLIST);
+
+       radeon_ring_write(rdev, PACKET3(PACKET3_INDEX_TYPE, 0));
+       radeon_ring_write(rdev, DI_INDEX_SIZE_16_BIT);
+
+       radeon_ring_write(rdev, PACKET3(PACKET3_NUM_INSTANCES, 0));
+       radeon_ring_write(rdev, 1);
+
+       radeon_ring_write(rdev, PACKET3(PACKET3_DRAW_INDEX_AUTO, 1));
+       radeon_ring_write(rdev, 3);
+       radeon_ring_write(rdev, DI_SRC_SEL_AUTO_INDEX);
+
+}
+
+/* emits 14 */
+static void
+set_default_state(struct radeon_device *rdev)
+{
+       u32 sq_config, sq_gpr_resource_mgmt_1, sq_gpr_resource_mgmt_2;
+       u32 sq_thread_resource_mgmt, sq_stack_resource_mgmt_1, sq_stack_resource_mgmt_2;
+       int num_ps_gprs, num_vs_gprs, num_temp_gprs, num_gs_gprs, num_es_gprs;
+       int num_ps_threads, num_vs_threads, num_gs_threads, num_es_threads;
+       int num_ps_stack_entries, num_vs_stack_entries, num_gs_stack_entries, num_es_stack_entries;
+       u64 gpu_addr;
+
+       switch (rdev->family) {
+       case CHIP_R600:
+               num_ps_gprs = 192;
+               num_vs_gprs = 56;
+               num_temp_gprs = 4;
+               num_gs_gprs = 0;
+               num_es_gprs = 0;
+               num_ps_threads = 136;
+               num_vs_threads = 48;
+               num_gs_threads = 4;
+               num_es_threads = 4;
+               num_ps_stack_entries = 128;
+               num_vs_stack_entries = 128;
+               num_gs_stack_entries = 0;
+               num_es_stack_entries = 0;
+               break;
+       case CHIP_RV630:
+       case CHIP_RV635:
+               num_ps_gprs = 84;
+               num_vs_gprs = 36;
+               num_temp_gprs = 4;
+               num_gs_gprs = 0;
+               num_es_gprs = 0;
+               num_ps_threads = 144;
+               num_vs_threads = 40;
+               num_gs_threads = 4;
+               num_es_threads = 4;
+               num_ps_stack_entries = 40;
+               num_vs_stack_entries = 40;
+               num_gs_stack_entries = 32;
+               num_es_stack_entries = 16;
+               break;
+       case CHIP_RV610:
+       case CHIP_RV620:
+       case CHIP_RS780:
+       case CHIP_RS880:
+       default:
+               num_ps_gprs = 84;
+               num_vs_gprs = 36;
+               num_temp_gprs = 4;
+               num_gs_gprs = 0;
+               num_es_gprs = 0;
+               num_ps_threads = 136;
+               num_vs_threads = 48;
+               num_gs_threads = 4;
+               num_es_threads = 4;
+               num_ps_stack_entries = 40;
+               num_vs_stack_entries = 40;
+               num_gs_stack_entries = 32;
+               num_es_stack_entries = 16;
+               break;
+       case CHIP_RV670:
+               num_ps_gprs = 144;
+               num_vs_gprs = 40;
+               num_temp_gprs = 4;
+               num_gs_gprs = 0;
+               num_es_gprs = 0;
+               num_ps_threads = 136;
+               num_vs_threads = 48;
+               num_gs_threads = 4;
+               num_es_threads = 4;
+               num_ps_stack_entries = 40;
+               num_vs_stack_entries = 40;
+               num_gs_stack_entries = 32;
+               num_es_stack_entries = 16;
+               break;
+       case CHIP_RV770:
+               num_ps_gprs = 192;
+               num_vs_gprs = 56;
+               num_temp_gprs = 4;
+               num_gs_gprs = 0;
+               num_es_gprs = 0;
+               num_ps_threads = 188;
+               num_vs_threads = 60;
+               num_gs_threads = 0;
+               num_es_threads = 0;
+               num_ps_stack_entries = 256;
+               num_vs_stack_entries = 256;
+               num_gs_stack_entries = 0;
+               num_es_stack_entries = 0;
+               break;
+       case CHIP_RV730:
+       case CHIP_RV740:
+               num_ps_gprs = 84;
+               num_vs_gprs = 36;
+               num_temp_gprs = 4;
+               num_gs_gprs = 0;
+               num_es_gprs = 0;
+               num_ps_threads = 188;
+               num_vs_threads = 60;
+               num_gs_threads = 0;
+               num_es_threads = 0;
+               num_ps_stack_entries = 128;
+               num_vs_stack_entries = 128;
+               num_gs_stack_entries = 0;
+               num_es_stack_entries = 0;
+               break;
+       case CHIP_RV710:
+               num_ps_gprs = 192;
+               num_vs_gprs = 56;
+               num_temp_gprs = 4;
+               num_gs_gprs = 0;
+               num_es_gprs = 0;
+               num_ps_threads = 144;
+               num_vs_threads = 48;
+               num_gs_threads = 0;
+               num_es_threads = 0;
+               num_ps_stack_entries = 128;
+               num_vs_stack_entries = 128;
+               num_gs_stack_entries = 0;
+               num_es_stack_entries = 0;
+               break;
+       }
+
+       if ((rdev->family == CHIP_RV610) ||
+           (rdev->family == CHIP_RV620) ||
+           (rdev->family == CHIP_RS780) ||
+           (rdev->family == CHIP_RS780) ||
+           (rdev->family == CHIP_RV710))
+               sq_config = 0;
+       else
+               sq_config = VC_ENABLE;
+
+       sq_config |= (DX9_CONSTS |
+                     ALU_INST_PREFER_VECTOR |
+                     PS_PRIO(0) |
+                     VS_PRIO(1) |
+                     GS_PRIO(2) |
+                     ES_PRIO(3));
+
+       sq_gpr_resource_mgmt_1 = (NUM_PS_GPRS(num_ps_gprs) |
+                                 NUM_VS_GPRS(num_vs_gprs) |
+                                 NUM_CLAUSE_TEMP_GPRS(num_temp_gprs));
+       sq_gpr_resource_mgmt_2 = (NUM_GS_GPRS(num_gs_gprs) |
+                                 NUM_ES_GPRS(num_es_gprs));
+       sq_thread_resource_mgmt = (NUM_PS_THREADS(num_ps_threads) |
+                                  NUM_VS_THREADS(num_vs_threads) |
+                                  NUM_GS_THREADS(num_gs_threads) |
+                                  NUM_ES_THREADS(num_es_threads));
+       sq_stack_resource_mgmt_1 = (NUM_PS_STACK_ENTRIES(num_ps_stack_entries) |
+                                   NUM_VS_STACK_ENTRIES(num_vs_stack_entries));
+       sq_stack_resource_mgmt_2 = (NUM_GS_STACK_ENTRIES(num_gs_stack_entries) |
+                                   NUM_ES_STACK_ENTRIES(num_es_stack_entries));
+
+       /* emit an IB pointing at default state */
+       gpu_addr = rdev->r600_blit.shader_gpu_addr + rdev->r600_blit.state_offset;
+       radeon_ring_write(rdev, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
+       radeon_ring_write(rdev, gpu_addr & 0xFFFFFFFC);
+       radeon_ring_write(rdev, upper_32_bits(gpu_addr) & 0xFF);
+       radeon_ring_write(rdev, (rdev->r600_blit.state_len / 4));
+
+       radeon_ring_write(rdev, PACKET3(PACKET3_EVENT_WRITE, 0));
+       radeon_ring_write(rdev, CACHE_FLUSH_AND_INV_EVENT);
+       /* SQ config */
+       radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 6));
+       radeon_ring_write(rdev, (SQ_CONFIG - PACKET3_SET_CONFIG_REG_OFFSET) >> 2);
+       radeon_ring_write(rdev, sq_config);
+       radeon_ring_write(rdev, sq_gpr_resource_mgmt_1);
+       radeon_ring_write(rdev, sq_gpr_resource_mgmt_2);
+       radeon_ring_write(rdev, sq_thread_resource_mgmt);
+       radeon_ring_write(rdev, sq_stack_resource_mgmt_1);
+       radeon_ring_write(rdev, sq_stack_resource_mgmt_2);
+}
+
+static inline uint32_t i2f(uint32_t input)
+{
+       u32 result, i, exponent, fraction;
+
+       if ((input & 0x3fff) == 0)
+               result = 0; /* 0 is a special case */
+       else {
+               exponent = 140; /* exponent biased by 127; */
+               fraction = (input & 0x3fff) << 10; /* cheat and only
+                                                     handle numbers below 2^^15 */
+               for (i = 0; i < 14; i++) {
+                       if (fraction & 0x800000)
+                               break;
+                       else {
+                               fraction = fraction << 1; /* keep
+                                                            shifting left until top bit = 1 */
+                               exponent = exponent - 1;
+                       }
+               }
+               result = exponent << 23 | (fraction & 0x7fffff); /* mask
+                                                                   off top bit; assumed 1 */
+       }
+       return result;
+}
+
+int r600_blit_init(struct radeon_device *rdev)
+{
+       u32 obj_size;
+       int r;
+       void *ptr;
+
+       rdev->r600_blit.state_offset = 0;
+
+       if (rdev->family >= CHIP_RV770)
+               rdev->r600_blit.state_len = r7xx_default_size * 4;
+       else
+               rdev->r600_blit.state_len = r6xx_default_size * 4;
+
+       obj_size = rdev->r600_blit.state_len;
+       obj_size = ALIGN(obj_size, 256);
+
+       rdev->r600_blit.vs_offset = obj_size;
+       obj_size += r6xx_vs_size * 4;
+       obj_size = ALIGN(obj_size, 256);
+
+       rdev->r600_blit.ps_offset = obj_size;
+       obj_size += r6xx_ps_size * 4;
+       obj_size = ALIGN(obj_size, 256);
+
+       r = radeon_object_create(rdev, NULL, obj_size,
+                                true, RADEON_GEM_DOMAIN_VRAM,
+                                false, &rdev->r600_blit.shader_obj);
+       if (r) {
+               DRM_ERROR("r600 failed to allocate shader\n");
+               return r;
+       }
+
+       r = radeon_object_pin(rdev->r600_blit.shader_obj, RADEON_GEM_DOMAIN_VRAM,
+                            &rdev->r600_blit.shader_gpu_addr);
+       if (r) {
+               DRM_ERROR("failed to pin blit object %d\n", r);
+               return r;
+       }
+
+       DRM_DEBUG("r6xx blit allocated bo @ 0x%16llx %08x vs %08x ps %08x\n",
+                 rdev->r600_blit.shader_gpu_addr, obj_size,
+                 rdev->r600_blit.vs_offset, rdev->r600_blit.ps_offset);
+
+       r = radeon_object_kmap(rdev->r600_blit.shader_obj, &ptr);
+       if (r) {
+               DRM_ERROR("failed to map blit object %d\n", r);
+               return r;
+       }
+
+       if (rdev->family >= CHIP_RV770)
+               memcpy_toio(ptr + rdev->r600_blit.state_offset, r7xx_default_state, rdev->r600_blit.state_len);
+       else
+               memcpy_toio(ptr + rdev->r600_blit.state_offset, r6xx_default_state, rdev->r600_blit.state_len);
+
+       memcpy(ptr + rdev->r600_blit.vs_offset, r6xx_vs, r6xx_vs_size * 4);
+       memcpy(ptr + rdev->r600_blit.ps_offset, r6xx_ps, r6xx_ps_size * 4);
+
+       radeon_object_kunmap(rdev->r600_blit.shader_obj);
+       return 0;
+}
+
+void r600_blit_fini(struct radeon_device *rdev)
+{
+       radeon_object_unpin(rdev->r600_blit.shader_obj);
+       radeon_object_unref(&rdev->r600_blit.shader_obj);
+}
+
+int r600_vb_ib_get(struct radeon_device *rdev)
+{
+       int r;
+       r = radeon_ib_get(rdev, &rdev->r600_blit.vb_ib);
+       if (r) {
+               DRM_ERROR("failed to get IB for vertex buffer\n");
+               return r;
+       }
+
+       rdev->r600_blit.vb_total = 64*1024;
+       rdev->r600_blit.vb_used = 0;
+       return 0;
+}
+
+void r600_vb_ib_put(struct radeon_device *rdev)
+{
+       mutex_lock(&rdev->ib_pool.mutex);
+       radeon_fence_emit(rdev, rdev->r600_blit.vb_ib->fence);
+       list_add_tail(&rdev->r600_blit.vb_ib->list, &rdev->ib_pool.scheduled_ibs);
+       mutex_unlock(&rdev->ib_pool.mutex);
+       radeon_ib_free(rdev, &rdev->r600_blit.vb_ib);
+}
+
+int r600_blit_prepare_copy(struct radeon_device *rdev, int size_bytes)
+{
+       int r;
+       int ring_size;
+       const int max_size = 8192*8192;
+
+       r = r600_vb_ib_get(rdev);
+       WARN_ON(r);
+
+       /* loops of emits 64 + fence emit possible */
+       ring_size = ((size_bytes + max_size) / max_size) * 78;
+       /* set default  + shaders */
+       ring_size += 40; /* shaders + def state */
+       ring_size += 3; /* fence emit for VB IB */
+       ring_size += 5; /* done copy */
+       ring_size += 3; /* fence emit for done copy */
+       r = radeon_ring_lock(rdev, ring_size);
+       WARN_ON(r);
+
+       set_default_state(rdev); /* 14 */
+       set_shaders(rdev); /* 26 */
+       return 0;
+}
+
+void r600_blit_done_copy(struct radeon_device *rdev, struct radeon_fence *fence)
+{
+       int r;
+
+       radeon_ring_write(rdev, PACKET3(PACKET3_EVENT_WRITE, 0));
+       radeon_ring_write(rdev, CACHE_FLUSH_AND_INV_EVENT);
+       /* wait for 3D idle clean */
+       radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 1));
+       radeon_ring_write(rdev, (WAIT_UNTIL - PACKET3_SET_CONFIG_REG_OFFSET) >> 2);
+       radeon_ring_write(rdev, WAIT_3D_IDLE_bit | WAIT_3D_IDLECLEAN_bit);
+
+       if (rdev->r600_blit.vb_ib)
+               r600_vb_ib_put(rdev);
+
+       if (fence)
+               r = radeon_fence_emit(rdev, fence);
+
+       radeon_ring_unlock_commit(rdev);
+}
+
+void r600_kms_blit_copy(struct radeon_device *rdev,
+                       u64 src_gpu_addr, u64 dst_gpu_addr,
+                       int size_bytes)
+{
+       int max_bytes;
+       u64 vb_gpu_addr;
+       u32 *vb;
+
+       DRM_DEBUG("emitting copy %16llx %16llx %d %d\n", src_gpu_addr, dst_gpu_addr,
+                 size_bytes, rdev->r600_blit.vb_used);
+       vb = (u32 *)(rdev->r600_blit.vb_ib->ptr + rdev->r600_blit.vb_used);
+       if ((size_bytes & 3) || (src_gpu_addr & 3) || (dst_gpu_addr & 3)) {
+               max_bytes = 8192;
+
+               while (size_bytes) {
+                       int cur_size = size_bytes;
+                       int src_x = src_gpu_addr & 255;
+                       int dst_x = dst_gpu_addr & 255;
+                       int h = 1;
+                       src_gpu_addr = src_gpu_addr & ~255;
+                       dst_gpu_addr = dst_gpu_addr & ~255;
+
+                       if (!src_x && !dst_x) {
+                               h = (cur_size / max_bytes);
+                               if (h > 8192)
+                                       h = 8192;
+                               if (h == 0)
+                                       h = 1;
+                               else
+                                       cur_size = max_bytes;
+                       } else {
+                               if (cur_size > max_bytes)
+                                       cur_size = max_bytes;
+                               if (cur_size > (max_bytes - dst_x))
+                                       cur_size = (max_bytes - dst_x);
+                               if (cur_size > (max_bytes - src_x))
+                                       cur_size = (max_bytes - src_x);
+                       }
+
+                       if ((rdev->r600_blit.vb_used + 48) > rdev->r600_blit.vb_total) {
+                               WARN_ON(1);
+
+#if 0
+                               r600_vb_ib_put(rdev);
+
+                               r600_nomm_put_vb(dev);
+                               r600_nomm_get_vb(dev);
+                               if (!dev_priv->blit_vb)
+                                       return;
+                               set_shaders(dev);
+                               vb = r600_nomm_get_vb_ptr(dev);
+#endif
+                       }
+
+                       vb[0] = i2f(dst_x);
+                       vb[1] = 0;
+                       vb[2] = i2f(src_x);
+                       vb[3] = 0;
+
+                       vb[4] = i2f(dst_x);
+                       vb[5] = i2f(h);
+                       vb[6] = i2f(src_x);
+                       vb[7] = i2f(h);
+
+                       vb[8] = i2f(dst_x + cur_size);
+                       vb[9] = i2f(h);
+                       vb[10] = i2f(src_x + cur_size);
+                       vb[11] = i2f(h);
+
+                       /* src 9 */
+                       set_tex_resource(rdev, FMT_8,
+                                        src_x + cur_size, h, src_x + cur_size,
+                                        src_gpu_addr);
+
+                       /* 5 */
+                       cp_set_surface_sync(rdev,
+                                           PACKET3_TC_ACTION_ENA, (src_x + cur_size * h), src_gpu_addr);
+
+                       /* dst 23 */
+                       set_render_target(rdev, COLOR_8,
+                                         dst_x + cur_size, h,
+                                         dst_gpu_addr);
+
+                       /* scissors 12 */
+                       set_scissors(rdev, dst_x, 0, dst_x + cur_size, h);
+
+                       /* 14 */
+                       vb_gpu_addr = rdev->r600_blit.vb_ib->gpu_addr + rdev->r600_blit.vb_used;
+                       set_vtx_resource(rdev, vb_gpu_addr);
+
+                       /* draw 10 */
+                       draw_auto(rdev);
+
+                       /* 5 */
+                       cp_set_surface_sync(rdev,
+                                           PACKET3_CB_ACTION_ENA | PACKET3_CB0_DEST_BASE_ENA,
+                                           cur_size * h, dst_gpu_addr);
+
+                       vb += 12;
+                       rdev->r600_blit.vb_used += 12 * 4;
+
+                       src_gpu_addr += cur_size * h;
+                       dst_gpu_addr += cur_size * h;
+                       size_bytes -= cur_size * h;
+               }
+       } else {
+               max_bytes = 8192 * 4;
+
+               while (size_bytes) {
+                       int cur_size = size_bytes;
+                       int src_x = (src_gpu_addr & 255);
+                       int dst_x = (dst_gpu_addr & 255);
+                       int h = 1;
+                       src_gpu_addr = src_gpu_addr & ~255;
+                       dst_gpu_addr = dst_gpu_addr & ~255;
+
+                       if (!src_x && !dst_x) {
+                               h = (cur_size / max_bytes);
+                               if (h > 8192)
+                                       h = 8192;
+                               if (h == 0)
+                                       h = 1;
+                               else
+                                       cur_size = max_bytes;
+                       } else {
+                               if (cur_size > max_bytes)
+                                       cur_size = max_bytes;
+                               if (cur_size > (max_bytes - dst_x))
+                                       cur_size = (max_bytes - dst_x);
+                               if (cur_size > (max_bytes - src_x))
+                                       cur_size = (max_bytes - src_x);
+                       }
+
+                       if ((rdev->r600_blit.vb_used + 48) > rdev->r600_blit.vb_total) {
+                               WARN_ON(1);
+                       }
+#if 0
+                       if ((rdev->blit_vb->used + 48) > rdev->blit_vb->total) {
+                               r600_nomm_put_vb(dev);
+                               r600_nomm_get_vb(dev);
+                               if (!rdev->blit_vb)
+                                       return;
+
+                               set_shaders(dev);
+                               vb = r600_nomm_get_vb_ptr(dev);
+                       }
+#endif
+
+                       vb[0] = i2f(dst_x / 4);
+                       vb[1] = 0;
+                       vb[2] = i2f(src_x / 4);
+                       vb[3] = 0;
+
+                       vb[4] = i2f(dst_x / 4);
+                       vb[5] = i2f(h);
+                       vb[6] = i2f(src_x / 4);
+                       vb[7] = i2f(h);
+
+                       vb[8] = i2f((dst_x + cur_size) / 4);
+                       vb[9] = i2f(h);
+                       vb[10] = i2f((src_x + cur_size) / 4);
+                       vb[11] = i2f(h);
+
+                       /* src 9 */
+                       set_tex_resource(rdev, FMT_8_8_8_8,
+                                        (src_x + cur_size) / 4,
+                                        h, (src_x + cur_size) / 4,
+                                        src_gpu_addr);
+                       /* 5 */
+                       cp_set_surface_sync(rdev,
+                                           PACKET3_TC_ACTION_ENA, (src_x + cur_size * h), src_gpu_addr);
+
+                       /* dst 23 */
+                       set_render_target(rdev, COLOR_8_8_8_8,
+                                         dst_x + cur_size, h,
+                                         dst_gpu_addr);
+
+                       /* scissors 12  */
+                       set_scissors(rdev, (dst_x / 4), 0, (dst_x + cur_size / 4), h);
+
+                       /* Vertex buffer setup 14 */
+                       vb_gpu_addr = rdev->r600_blit.vb_ib->gpu_addr + rdev->r600_blit.vb_used;
+                       set_vtx_resource(rdev, vb_gpu_addr);
+
+                       /* draw 10 */
+                       draw_auto(rdev);
+
+                       /* 5 */
+                       cp_set_surface_sync(rdev,
+                                           PACKET3_CB_ACTION_ENA | PACKET3_CB0_DEST_BASE_ENA,
+                                           cur_size * h, dst_gpu_addr);
+
+                       /* 78 ring dwords per loop */
+                       vb += 12;
+                       rdev->r600_blit.vb_used += 12 * 4;
+
+                       src_gpu_addr += cur_size * h;
+                       dst_gpu_addr += cur_size * h;
+                       size_bytes -= cur_size * h;
+               }
+       }
+}
+
diff --git a/drivers/gpu/drm/radeon/r600_blit_shaders.c b/drivers/gpu/drm/radeon/r600_blit_shaders.c
new file mode 100644 (file)
index 0000000..d745e81
--- /dev/null
@@ -0,0 +1,1072 @@
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+
+const u32 r6xx_default_state[] =
+{
+       0xc0002400,
+       0x00000000,
+       0xc0012800,
+       0x80000000,
+       0x80000000,
+       0xc0004600,
+       0x00000016,
+       0xc0016800,
+       0x00000010,
+       0x00028000,
+       0xc0016800,
+       0x00000010,
+       0x00008000,
+       0xc0016800,
+       0x00000542,
+       0x07000003,
+       0xc0016800,
+       0x000005c5,
+       0x00000000,
+       0xc0016800,
+       0x00000363,
+       0x00000000,
+       0xc0016800,
+       0x0000060c,
+       0x82000000,
+       0xc0016800,
+       0x0000060e,
+       0x01020204,
+       0xc0016f00,
+       0x00000000,
+       0x00000000,
+       0xc0016f00,
+       0x00000001,
+       0x00000000,
+       0xc0096900,
+       0x0000022a,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0xc0016900,
+       0x00000004,
+       0x00000000,
+       0xc0016900,
+       0x0000000a,
+       0x00000000,
+       0xc0016900,
+       0x0000000b,
+       0x00000000,
+       0xc0016900,
+       0x0000010c,
+       0x00000000,
+       0xc0016900,
+       0x0000010d,
+       0x00000000,
+       0xc0016900,
+       0x00000200,
+       0x00000000,
+       0xc0016900,
+       0x00000343,
+       0x00000060,
+       0xc0016900,
+       0x00000344,
+       0x00000040,
+       0xc0016900,
+       0x00000351,
+       0x0000aa00,
+       0xc0016900,
+       0x00000104,
+       0x00000000,
+       0xc0016900,
+       0x0000010e,
+       0x00000000,
+       0xc0046900,
+       0x00000105,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0xc0036900,
+       0x00000109,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0xc0046900,
+       0x0000030c,
+       0x01000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0xc0046900,
+       0x00000048,
+       0x3f800000,
+       0x00000000,
+       0x3f800000,
+       0x3f800000,
+       0xc0016900,
+       0x0000008e,
+       0x0000000f,
+       0xc0016900,
+       0x00000080,
+       0x00000000,
+       0xc0016900,
+       0x00000083,
+       0x0000ffff,
+       0xc0016900,
+       0x00000084,
+       0x00000000,
+       0xc0016900,
+       0x00000085,
+       0x20002000,
+       0xc0016900,
+       0x00000086,
+       0x00000000,
+       0xc0016900,
+       0x00000087,
+       0x20002000,
+       0xc0016900,
+       0x00000088,
+       0x00000000,
+       0xc0016900,
+       0x00000089,
+       0x20002000,
+       0xc0016900,
+       0x0000008a,
+       0x00000000,
+       0xc0016900,
+       0x0000008b,
+       0x20002000,
+       0xc0016900,
+       0x0000008c,
+       0x00000000,
+       0xc0016900,
+       0x00000094,
+       0x80000000,
+       0xc0016900,
+       0x00000095,
+       0x20002000,
+       0xc0026900,
+       0x000000b4,
+       0x00000000,
+       0x3f800000,
+       0xc0016900,
+       0x00000096,
+       0x80000000,
+       0xc0016900,
+       0x00000097,
+       0x20002000,
+       0xc0026900,
+       0x000000b6,
+       0x00000000,
+       0x3f800000,
+       0xc0016900,
+       0x00000098,
+       0x80000000,
+       0xc0016900,
+       0x00000099,
+       0x20002000,
+       0xc0026900,
+       0x000000b8,
+       0x00000000,
+       0x3f800000,
+       0xc0016900,
+       0x0000009a,
+       0x80000000,
+       0xc0016900,
+       0x0000009b,
+       0x20002000,
+       0xc0026900,
+       0x000000ba,
+       0x00000000,
+       0x3f800000,
+       0xc0016900,
+       0x0000009c,
+       0x80000000,
+       0xc0016900,
+       0x0000009d,
+       0x20002000,
+       0xc0026900,
+       0x000000bc,
+       0x00000000,
+       0x3f800000,
+       0xc0016900,
+       0x0000009e,
+       0x80000000,
+       0xc0016900,
+       0x0000009f,
+       0x20002000,
+       0xc0026900,
+       0x000000be,
+       0x00000000,
+       0x3f800000,
+       0xc0016900,
+       0x000000a0,
+       0x80000000,
+       0xc0016900,
+       0x000000a1,
+       0x20002000,
+       0xc0026900,
+       0x000000c0,
+       0x00000000,
+       0x3f800000,
+       0xc0016900,
+       0x000000a2,
+       0x80000000,
+       0xc0016900,
+       0x000000a3,
+       0x20002000,
+       0xc0026900,
+       0x000000c2,
+       0x00000000,
+       0x3f800000,
+       0xc0016900,
+       0x000000a4,
+       0x80000000,
+       0xc0016900,
+       0x000000a5,
+       0x20002000,
+       0xc0026900,
+       0x000000c4,
+       0x00000000,
+       0x3f800000,
+       0xc0016900,
+       0x000000a6,
+       0x80000000,
+       0xc0016900,
+       0x000000a7,
+       0x20002000,
+       0xc0026900,
+       0x000000c6,
+       0x00000000,
+       0x3f800000,
+       0xc0016900,
+       0x000000a8,
+       0x80000000,
+       0xc0016900,
+       0x000000a9,
+       0x20002000,
+       0xc0026900,
+       0x000000c8,
+       0x00000000,
+       0x3f800000,
+       0xc0016900,
+       0x000000aa,
+       0x80000000,
+       0xc0016900,
+       0x000000ab,
+       0x20002000,
+       0xc0026900,
+       0x000000ca,
+       0x00000000,
+       0x3f800000,
+       0xc0016900,
+       0x000000ac,
+       0x80000000,
+       0xc0016900,
+       0x000000ad,
+       0x20002000,
+       0xc0026900,
+       0x000000cc,
+       0x00000000,
+       0x3f800000,
+       0xc0016900,
+       0x000000ae,
+       0x80000000,
+       0xc0016900,
+       0x000000af,
+       0x20002000,
+       0xc0026900,
+       0x000000ce,
+       0x00000000,
+       0x3f800000,
+       0xc0016900,
+       0x000000b0,
+       0x80000000,
+       0xc0016900,
+       0x000000b1,
+       0x20002000,
+       0xc0026900,
+       0x000000d0,
+       0x00000000,
+       0x3f800000,
+       0xc0016900,
+       0x000000b2,
+       0x80000000,
+       0xc0016900,
+       0x000000b3,
+       0x20002000,
+       0xc0026900,
+       0x000000d2,
+       0x00000000,
+       0x3f800000,
+       0xc0016900,
+       0x00000293,
+       0x00004010,
+       0xc0016900,
+       0x00000300,
+       0x00000000,
+       0xc0016900,
+       0x00000301,
+       0x00000000,
+       0xc0016900,
+       0x00000312,
+       0xffffffff,
+       0xc0016900,
+       0x00000307,
+       0x00000000,
+       0xc0016900,
+       0x00000308,
+       0x00000000,
+       0xc0016900,
+       0x00000283,
+       0x00000000,
+       0xc0016900,
+       0x00000292,
+       0x00000000,
+       0xc0066900,
+       0x0000010f,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0xc0016900,
+       0x00000206,
+       0x00000000,
+       0xc0016900,
+       0x00000207,
+       0x00000000,
+       0xc0016900,
+       0x00000208,
+       0x00000000,
+       0xc0046900,
+       0x00000303,
+       0x3f800000,
+       0x3f800000,
+       0x3f800000,
+       0x3f800000,
+       0xc0016900,
+       0x00000205,
+       0x00000004,
+       0xc0016900,
+       0x00000280,
+       0x00000000,
+       0xc0016900,
+       0x00000281,
+       0x00000000,
+       0xc0016900,
+       0x0000037e,
+       0x00000000,
+       0xc0016900,
+       0x00000382,
+       0x00000000,
+       0xc0016900,
+       0x00000380,
+       0x00000000,
+       0xc0016900,
+       0x00000383,
+       0x00000000,
+       0xc0016900,
+       0x00000381,
+       0x00000000,
+       0xc0016900,
+       0x00000282,
+       0x00000008,
+       0xc0016900,
+       0x00000302,
+       0x0000002d,
+       0xc0016900,
+       0x0000037f,
+       0x00000000,
+       0xc0016900,
+       0x000001b2,
+       0x00000000,
+       0xc0016900,
+       0x000001b6,
+       0x00000000,
+       0xc0016900,
+       0x000001b7,
+       0x00000000,
+       0xc0016900,
+       0x000001b8,
+       0x00000000,
+       0xc0016900,
+       0x000001b9,
+       0x00000000,
+       0xc0016900,
+       0x00000225,
+       0x00000000,
+       0xc0016900,
+       0x00000229,
+       0x00000000,
+       0xc0016900,
+       0x00000237,
+       0x00000000,
+       0xc0016900,
+       0x00000100,
+       0x00000800,
+       0xc0016900,
+       0x00000101,
+       0x00000000,
+       0xc0016900,
+       0x00000102,
+       0x00000000,
+       0xc0016900,
+       0x000002a8,
+       0x00000000,
+       0xc0016900,
+       0x000002a9,
+       0x00000000,
+       0xc0016900,
+       0x00000103,
+       0x00000000,
+       0xc0016900,
+       0x00000284,
+       0x00000000,
+       0xc0016900,
+       0x00000290,
+       0x00000000,
+       0xc0016900,
+       0x00000285,
+       0x00000000,
+       0xc0016900,
+       0x00000286,
+       0x00000000,
+       0xc0016900,
+       0x00000287,
+       0x00000000,
+       0xc0016900,
+       0x00000288,
+       0x00000000,
+       0xc0016900,
+       0x00000289,
+       0x00000000,
+       0xc0016900,
+       0x0000028a,
+       0x00000000,
+       0xc0016900,
+       0x0000028b,
+       0x00000000,
+       0xc0016900,
+       0x0000028c,
+       0x00000000,
+       0xc0016900,
+       0x0000028d,
+       0x00000000,
+       0xc0016900,
+       0x0000028e,
+       0x00000000,
+       0xc0016900,
+       0x0000028f,
+       0x00000000,
+       0xc0016900,
+       0x000002a1,
+       0x00000000,
+       0xc0016900,
+       0x000002a5,
+       0x00000000,
+       0xc0016900,
+       0x000002ac,
+       0x00000000,
+       0xc0016900,
+       0x000002ad,
+       0x00000000,
+       0xc0016900,
+       0x000002ae,
+       0x00000000,
+       0xc0016900,
+       0x000002c8,
+       0x00000000,
+       0xc0016900,
+       0x00000206,
+       0x00000100,
+       0xc0016900,
+       0x00000204,
+       0x00010000,
+       0xc0036e00,
+       0x00000000,
+       0x00000012,
+       0x00000000,
+       0x00000000,
+       0xc0016900,
+       0x0000008f,
+       0x0000000f,
+       0xc0016900,
+       0x000001e8,
+       0x00000001,
+       0xc0016900,
+       0x00000202,
+       0x00cc0000,
+       0xc0016900,
+       0x00000205,
+       0x00000244,
+       0xc0016900,
+       0x00000203,
+       0x00000210,
+       0xc0016900,
+       0x000001b1,
+       0x00000000,
+       0xc0016900,
+       0x00000185,
+       0x00000000,
+       0xc0016900,
+       0x000001b3,
+       0x00000001,
+       0xc0016900,
+       0x000001b4,
+       0x00000000,
+       0xc0016900,
+       0x00000191,
+       0x00000b00,
+       0xc0016900,
+       0x000001b5,
+       0x00000000,
+};
+
+const u32 r7xx_default_state[] =
+{
+       0xc0012800,
+       0x80000000,
+       0x80000000,
+       0xc0004600,
+       0x00000016,
+       0xc0016800,
+       0x00000010,
+       0x00028000,
+       0xc0016800,
+       0x00000010,
+       0x00008000,
+       0xc0016800,
+       0x00000542,
+       0x07000002,
+       0xc0016800,
+       0x000005c5,
+       0x00000000,
+       0xc0016800,
+       0x00000363,
+       0x00004000,
+       0xc0016800,
+       0x0000060c,
+       0x00000000,
+       0xc0016800,
+       0x0000060e,
+       0x00420204,
+       0xc0016f00,
+       0x00000000,
+       0x00000000,
+       0xc0016f00,
+       0x00000001,
+       0x00000000,
+       0xc0096900,
+       0x0000022a,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0xc0016900,
+       0x00000004,
+       0x00000000,
+       0xc0016900,
+       0x0000000a,
+       0x00000000,
+       0xc0016900,
+       0x0000000b,
+       0x00000000,
+       0xc0016900,
+       0x0000010c,
+       0x00000000,
+       0xc0016900,
+       0x0000010d,
+       0x00000000,
+       0xc0016900,
+       0x00000200,
+       0x00000000,
+       0xc0016900,
+       0x00000343,
+       0x00000060,
+       0xc0016900,
+       0x00000344,
+       0x00000000,
+       0xc0016900,
+       0x00000351,
+       0x0000aa00,
+       0xc0016900,
+       0x00000104,
+       0x00000000,
+       0xc0016900,
+       0x0000010e,
+       0x00000000,
+       0xc0046900,
+       0x00000105,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0xc0046900,
+       0x0000030c,
+       0x01000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0xc0016900,
+       0x0000008e,
+       0x0000000f,
+       0xc0016900,
+       0x00000080,
+       0x00000000,
+       0xc0016900,
+       0x00000083,
+       0x0000ffff,
+       0xc0016900,
+       0x00000084,
+       0x00000000,
+       0xc0016900,
+       0x00000085,
+       0x20002000,
+       0xc0016900,
+       0x00000086,
+       0x00000000,
+       0xc0016900,
+       0x00000087,
+       0x20002000,
+       0xc0016900,
+       0x00000088,
+       0x00000000,
+       0xc0016900,
+       0x00000089,
+       0x20002000,
+       0xc0016900,
+       0x0000008a,
+       0x00000000,
+       0xc0016900,
+       0x0000008b,
+       0x20002000,
+       0xc0016900,
+       0x0000008c,
+       0xaaaaaaaa,
+       0xc0016900,
+       0x00000094,
+       0x80000000,
+       0xc0016900,
+       0x00000095,
+       0x20002000,
+       0xc0026900,
+       0x000000b4,
+       0x00000000,
+       0x3f800000,
+       0xc0016900,
+       0x00000096,
+       0x80000000,
+       0xc0016900,
+       0x00000097,
+       0x20002000,
+       0xc0026900,
+       0x000000b6,
+       0x00000000,
+       0x3f800000,
+       0xc0016900,
+       0x00000098,
+       0x80000000,
+       0xc0016900,
+       0x00000099,
+       0x20002000,
+       0xc0026900,
+       0x000000b8,
+       0x00000000,
+       0x3f800000,
+       0xc0016900,
+       0x0000009a,
+       0x80000000,
+       0xc0016900,
+       0x0000009b,
+       0x20002000,
+       0xc0026900,
+       0x000000ba,
+       0x00000000,
+       0x3f800000,
+       0xc0016900,
+       0x0000009c,
+       0x80000000,
+       0xc0016900,
+       0x0000009d,
+       0x20002000,
+       0xc0026900,
+       0x000000bc,
+       0x00000000,
+       0x3f800000,
+       0xc0016900,
+       0x0000009e,
+       0x80000000,
+       0xc0016900,
+       0x0000009f,
+       0x20002000,
+       0xc0026900,
+       0x000000be,
+       0x00000000,
+       0x3f800000,
+       0xc0016900,
+       0x000000a0,
+       0x80000000,
+       0xc0016900,
+       0x000000a1,
+       0x20002000,
+       0xc0026900,
+       0x000000c0,
+       0x00000000,
+       0x3f800000,
+       0xc0016900,
+       0x000000a2,
+       0x80000000,
+       0xc0016900,
+       0x000000a3,
+       0x20002000,
+       0xc0026900,
+       0x000000c2,
+       0x00000000,
+       0x3f800000,
+       0xc0016900,
+       0x000000a4,
+       0x80000000,
+       0xc0016900,
+       0x000000a5,
+       0x20002000,
+       0xc0026900,
+       0x000000c4,
+       0x00000000,
+       0x3f800000,
+       0xc0016900,
+       0x000000a6,
+       0x80000000,
+       0xc0016900,
+       0x000000a7,
+       0x20002000,
+       0xc0026900,
+       0x000000c6,
+       0x00000000,
+       0x3f800000,
+       0xc0016900,
+       0x000000a8,
+       0x80000000,
+       0xc0016900,
+       0x000000a9,
+       0x20002000,
+       0xc0026900,
+       0x000000c8,
+       0x00000000,
+       0x3f800000,
+       0xc0016900,
+       0x000000aa,
+       0x80000000,
+       0xc0016900,
+       0x000000ab,
+       0x20002000,
+       0xc0026900,
+       0x000000ca,
+       0x00000000,
+       0x3f800000,
+       0xc0016900,
+       0x000000ac,
+       0x80000000,
+       0xc0016900,
+       0x000000ad,
+       0x20002000,
+       0xc0026900,
+       0x000000cc,
+       0x00000000,
+       0x3f800000,
+       0xc0016900,
+       0x000000ae,
+       0x80000000,
+       0xc0016900,
+       0x000000af,
+       0x20002000,
+       0xc0026900,
+       0x000000ce,
+       0x00000000,
+       0x3f800000,
+       0xc0016900,
+       0x000000b0,
+       0x80000000,
+       0xc0016900,
+       0x000000b1,
+       0x20002000,
+       0xc0026900,
+       0x000000d0,
+       0x00000000,
+       0x3f800000,
+       0xc0016900,
+       0x000000b2,
+       0x80000000,
+       0xc0016900,
+       0x000000b3,
+       0x20002000,
+       0xc0026900,
+       0x000000d2,
+       0x00000000,
+       0x3f800000,
+       0xc0016900,
+       0x00000293,
+       0x00514000,
+       0xc0016900,
+       0x00000300,
+       0x00000000,
+       0xc0016900,
+       0x00000301,
+       0x00000000,
+       0xc0016900,
+       0x00000312,
+       0xffffffff,
+       0xc0016900,
+       0x00000307,
+       0x00000000,
+       0xc0016900,
+       0x00000308,
+       0x00000000,
+       0xc0016900,
+       0x00000283,
+       0x00000000,
+       0xc0016900,
+       0x00000292,
+       0x00000000,
+       0xc0066900,
+       0x0000010f,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0xc0016900,
+       0x00000206,
+       0x00000000,
+       0xc0016900,
+       0x00000207,
+       0x00000000,
+       0xc0016900,
+       0x00000208,
+       0x00000000,
+       0xc0046900,
+       0x00000303,
+       0x3f800000,
+       0x3f800000,
+       0x3f800000,
+       0x3f800000,
+       0xc0016900,
+       0x00000205,
+       0x00000004,
+       0xc0016900,
+       0x00000280,
+       0x00000000,
+       0xc0016900,
+       0x00000281,
+       0x00000000,
+       0xc0016900,
+       0x0000037e,
+       0x00000000,
+       0xc0016900,
+       0x00000382,
+       0x00000000,
+       0xc0016900,
+       0x00000380,
+       0x00000000,
+       0xc0016900,
+       0x00000383,
+       0x00000000,
+       0xc0016900,
+       0x00000381,
+       0x00000000,
+       0xc0016900,
+       0x00000282,
+       0x00000008,
+       0xc0016900,
+       0x00000302,
+       0x0000002d,
+       0xc0016900,
+       0x0000037f,
+       0x00000000,
+       0xc0016900,
+       0x000001b2,
+       0x00000001,
+       0xc0016900,
+       0x000001b6,
+       0x00000000,
+       0xc0016900,
+       0x000001b7,
+       0x00000000,
+       0xc0016900,
+       0x000001b8,
+       0x00000000,
+       0xc0016900,
+       0x000001b9,
+       0x00000000,
+       0xc0016900,
+       0x00000225,
+       0x00000000,
+       0xc0016900,
+       0x00000229,
+       0x00000000,
+       0xc0016900,
+       0x00000237,
+       0x00000000,
+       0xc0016900,
+       0x00000100,
+       0x00000800,
+       0xc0016900,
+       0x00000101,
+       0x00000000,
+       0xc0016900,
+       0x00000102,
+       0x00000000,
+       0xc0016900,
+       0x000002a8,
+       0x00000000,
+       0xc0016900,
+       0x000002a9,
+       0x00000000,
+       0xc0016900,
+       0x00000103,
+       0x00000000,
+       0xc0016900,
+       0x00000284,
+       0x00000000,
+       0xc0016900,
+       0x00000290,
+       0x00000000,
+       0xc0016900,
+       0x00000285,
+       0x00000000,
+       0xc0016900,
+       0x00000286,
+       0x00000000,
+       0xc0016900,
+       0x00000287,
+       0x00000000,
+       0xc0016900,
+       0x00000288,
+       0x00000000,
+       0xc0016900,
+       0x00000289,
+       0x00000000,
+       0xc0016900,
+       0x0000028a,
+       0x00000000,
+       0xc0016900,
+       0x0000028b,
+       0x00000000,
+       0xc0016900,
+       0x0000028c,
+       0x00000000,
+       0xc0016900,
+       0x0000028d,
+       0x00000000,
+       0xc0016900,
+       0x0000028e,
+       0x00000000,
+       0xc0016900,
+       0x0000028f,
+       0x00000000,
+       0xc0016900,
+       0x000002a1,
+       0x00000000,
+       0xc0016900,
+       0x000002a5,
+       0x00000000,
+       0xc0016900,
+       0x000002ac,
+       0x00000000,
+       0xc0016900,
+       0x000002ad,
+       0x00000000,
+       0xc0016900,
+       0x000002ae,
+       0x00000000,
+       0xc0016900,
+       0x000002c8,
+       0x00000000,
+       0xc0016900,
+       0x00000206,
+       0x00000100,
+       0xc0016900,
+       0x00000204,
+       0x00010000,
+       0xc0036e00,
+       0x00000000,
+       0x00000012,
+       0x00000000,
+       0x00000000,
+       0xc0016900,
+       0x0000008f,
+       0x0000000f,
+       0xc0016900,
+       0x000001e8,
+       0x00000001,
+       0xc0016900,
+       0x00000202,
+       0x00cc0000,
+       0xc0016900,
+       0x00000205,
+       0x00000244,
+       0xc0016900,
+       0x00000203,
+       0x00000210,
+       0xc0016900,
+       0x000001b1,
+       0x00000000,
+       0xc0016900,
+       0x00000185,
+       0x00000000,
+       0xc0016900,
+       0x000001b3,
+       0x00000001,
+       0xc0016900,
+       0x000001b4,
+       0x00000000,
+       0xc0016900,
+       0x00000191,
+       0x00000b00,
+       0xc0016900,
+       0x000001b5,
+       0x00000000,
+};
+
+/* same for r6xx/r7xx */
+const u32 r6xx_vs[] =
+{
+       0x00000004,
+       0x81000000,
+       0x0000203c,
+       0x94000b08,
+       0x00004000,
+       0x14200b1a,
+       0x00000000,
+       0x00000000,
+       0x3c000000,
+       0x68cd1000,
+       0x00080000,
+       0x00000000,
+};
+
+const u32 r6xx_ps[] =
+{
+       0x00000002,
+       0x80800000,
+       0x00000000,
+       0x94200688,
+       0x00000010,
+       0x000d1000,
+       0xb0800000,
+       0x00000000,
+};
+
+const u32 r6xx_ps_size = ARRAY_SIZE(r6xx_ps);
+const u32 r6xx_vs_size = ARRAY_SIZE(r6xx_vs);
+const u32 r6xx_default_size = ARRAY_SIZE(r6xx_default_state);
+const u32 r7xx_default_size = ARRAY_SIZE(r7xx_default_state);
diff --git a/drivers/gpu/drm/radeon/r600_blit_shaders.h b/drivers/gpu/drm/radeon/r600_blit_shaders.h
new file mode 100644 (file)
index 0000000..fdc3b37
--- /dev/null
@@ -0,0 +1,14 @@
+
+#ifndef R600_BLIT_SHADERS_H
+#define R600_BLIT_SHADERS_H
+
+extern const u32 r6xx_ps[];
+extern const u32 r6xx_vs[];
+extern const u32 r7xx_default_state[];
+extern const u32 r6xx_default_state[];
+
+
+extern const u32 r6xx_ps_size, r6xx_vs_size;
+extern const u32 r6xx_default_size, r7xx_default_size;
+
+#endif
index 8327912..6d5a711 100644 (file)
@@ -58,6 +58,12 @@ MODULE_FIRMWARE("radeon/RV730_me.bin");
 MODULE_FIRMWARE("radeon/RV710_pfp.bin");
 MODULE_FIRMWARE("radeon/RV710_me.bin");
 
+
+int r600_cs_legacy(struct drm_device *dev, void *data, struct drm_file *filp,
+                       unsigned family, u32 *ib, int *l);
+void r600_cs_legacy_init(void);
+
+
 # define ATI_PCIGART_PAGE_SIZE         4096    /**< PCI GART page size */
 # define ATI_PCIGART_PAGE_MASK         (~(ATI_PCIGART_PAGE_SIZE-1))
 
@@ -1857,6 +1863,8 @@ int r600_do_init_cp(struct drm_device *dev, drm_radeon_init_t *init,
 
        DRM_DEBUG("\n");
 
+       mutex_init(&dev_priv->cs_mutex);
+       r600_cs_legacy_init();
        /* if we require new memory map but we don't have it fail */
        if ((dev_priv->flags & RADEON_NEW_MEMMAP) && !dev_priv->new_memmap) {
                DRM_ERROR("Cannot initialise DRM on this card\nThis card requires a new X.org DDX for 3D\n");
@@ -1888,7 +1896,7 @@ int r600_do_init_cp(struct drm_device *dev, drm_radeon_init_t *init,
        /* Enable vblank on CRTC1 for older X servers
         */
        dev_priv->vblank_crtc = DRM_RADEON_VBLANK_CRTC1;
-
+       dev_priv->do_boxes = 0;
        dev_priv->cp_mode = init->cp_mode;
 
        /* We don't support anything other than bus-mastering ring mode,
@@ -1974,11 +1982,11 @@ int r600_do_init_cp(struct drm_device *dev, drm_radeon_init_t *init,
        } else
 #endif
        {
-               dev_priv->cp_ring->handle = (void *)dev_priv->cp_ring->offset;
+               dev_priv->cp_ring->handle = (void *)(unsigned long)dev_priv->cp_ring->offset;
                dev_priv->ring_rptr->handle =
-                   (void *)dev_priv->ring_rptr->offset;
+                       (void *)(unsigned long)dev_priv->ring_rptr->offset;
                dev->agp_buffer_map->handle =
-                   (void *)dev->agp_buffer_map->offset;
+                       (void *)(unsigned long)dev->agp_buffer_map->offset;
 
                DRM_DEBUG("dev_priv->cp_ring->handle %p\n",
                          dev_priv->cp_ring->handle);
@@ -2282,3 +2290,239 @@ int r600_cp_dispatch_indirect(struct drm_device *dev,
 
        return 0;
 }
+
+void r600_cp_dispatch_swap(struct drm_device *dev, struct drm_file *file_priv)
+{
+       drm_radeon_private_t *dev_priv = dev->dev_private;
+       struct drm_master *master = file_priv->master;
+       struct drm_radeon_master_private *master_priv = master->driver_priv;
+       drm_radeon_sarea_t *sarea_priv = master_priv->sarea_priv;
+       int nbox = sarea_priv->nbox;
+       struct drm_clip_rect *pbox = sarea_priv->boxes;
+       int i, cpp, src_pitch, dst_pitch;
+       uint64_t src, dst;
+       RING_LOCALS;
+       DRM_DEBUG("\n");
+
+       if (dev_priv->color_fmt == RADEON_COLOR_FORMAT_ARGB8888)
+               cpp = 4;
+       else
+               cpp = 2;
+
+       if (sarea_priv->pfCurrentPage == 0) {
+               src_pitch = dev_priv->back_pitch;
+               dst_pitch = dev_priv->front_pitch;
+               src = dev_priv->back_offset + dev_priv->fb_location;
+               dst = dev_priv->front_offset + dev_priv->fb_location;
+       } else {
+               src_pitch = dev_priv->front_pitch;
+               dst_pitch = dev_priv->back_pitch;
+               src = dev_priv->front_offset + dev_priv->fb_location;
+               dst = dev_priv->back_offset + dev_priv->fb_location;
+       }
+
+       if (r600_prepare_blit_copy(dev, file_priv)) {
+               DRM_ERROR("unable to allocate vertex buffer for swap buffer\n");
+               return;
+       }
+       for (i = 0; i < nbox; i++) {
+               int x = pbox[i].x1;
+               int y = pbox[i].y1;
+               int w = pbox[i].x2 - x;
+               int h = pbox[i].y2 - y;
+
+               DRM_DEBUG("%d,%d-%d,%d\n", x, y, w, h);
+
+               r600_blit_swap(dev,
+                              src, dst,
+                              x, y, x, y, w, h,
+                              src_pitch, dst_pitch, cpp);
+       }
+       r600_done_blit_copy(dev);
+
+       /* Increment the frame counter.  The client-side 3D driver must
+        * throttle the framerate by waiting for this value before
+        * performing the swapbuffer ioctl.
+        */
+       sarea_priv->last_frame++;
+
+       BEGIN_RING(3);
+       R600_FRAME_AGE(sarea_priv->last_frame);
+       ADVANCE_RING();
+}
+
+int r600_cp_dispatch_texture(struct drm_device *dev,
+                            struct drm_file *file_priv,
+                            drm_radeon_texture_t *tex,
+                            drm_radeon_tex_image_t *image)
+{
+       drm_radeon_private_t *dev_priv = dev->dev_private;
+       struct drm_buf *buf;
+       u32 *buffer;
+       const u8 __user *data;
+       int size, pass_size;
+       u64 src_offset, dst_offset;
+
+       if (!radeon_check_offset(dev_priv, tex->offset)) {
+               DRM_ERROR("Invalid destination offset\n");
+               return -EINVAL;
+       }
+
+       /* this might fail for zero-sized uploads - are those illegal? */
+       if (!radeon_check_offset(dev_priv, tex->offset + tex->height * tex->pitch - 1)) {
+               DRM_ERROR("Invalid final destination offset\n");
+               return -EINVAL;
+       }
+
+       size = tex->height * tex->pitch;
+
+       if (size == 0)
+               return 0;
+
+       dst_offset = tex->offset;
+
+       if (r600_prepare_blit_copy(dev, file_priv)) {
+               DRM_ERROR("unable to allocate vertex buffer for swap buffer\n");
+               return -EAGAIN;
+       }
+       do {
+               data = (const u8 __user *)image->data;
+               pass_size = size;
+
+               buf = radeon_freelist_get(dev);
+               if (!buf) {
+                       DRM_DEBUG("EAGAIN\n");
+                       if (DRM_COPY_TO_USER(tex->image, image, sizeof(*image)))
+                               return -EFAULT;
+                       return -EAGAIN;
+               }
+
+               if (pass_size > buf->total)
+                       pass_size = buf->total;
+
+               /* Dispatch the indirect buffer.
+                */
+               buffer =
+                   (u32 *) ((char *)dev->agp_buffer_map->handle + buf->offset);
+
+               if (DRM_COPY_FROM_USER(buffer, data, pass_size)) {
+                       DRM_ERROR("EFAULT on pad, %d bytes\n", pass_size);
+                       return -EFAULT;
+               }
+
+               buf->file_priv = file_priv;
+               buf->used = pass_size;
+               src_offset = dev_priv->gart_buffers_offset + buf->offset;
+
+               r600_blit_copy(dev, src_offset, dst_offset, pass_size);
+
+               radeon_cp_discard_buffer(dev, file_priv->master, buf);
+
+               /* Update the input parameters for next time */
+               image->data = (const u8 __user *)image->data + pass_size;
+               dst_offset += pass_size;
+               size -= pass_size;
+       } while (size > 0);
+       r600_done_blit_copy(dev);
+
+       return 0;
+}
+
+/*
+ * Legacy cs ioctl
+ */
+static u32 radeon_cs_id_get(struct drm_radeon_private *radeon)
+{
+       /* FIXME: check if wrap affect last reported wrap & sequence */
+       radeon->cs_id_scnt = (radeon->cs_id_scnt + 1) & 0x00FFFFFF;
+       if (!radeon->cs_id_scnt) {
+               /* increment wrap counter */
+               radeon->cs_id_wcnt += 0x01000000;
+               /* valid sequence counter start at 1 */
+               radeon->cs_id_scnt = 1;
+       }
+       return (radeon->cs_id_scnt | radeon->cs_id_wcnt);
+}
+
+static void r600_cs_id_emit(drm_radeon_private_t *dev_priv, u32 *id)
+{
+       RING_LOCALS;
+
+       *id = radeon_cs_id_get(dev_priv);
+
+       /* SCRATCH 2 */
+       BEGIN_RING(3);
+       R600_CLEAR_AGE(*id);
+       ADVANCE_RING();
+       COMMIT_RING();
+}
+
+static int r600_ib_get(struct drm_device *dev,
+                       struct drm_file *fpriv,
+                       struct drm_buf **buffer)
+{
+       struct drm_buf *buf;
+
+       *buffer = NULL;
+       buf = radeon_freelist_get(dev);
+       if (!buf) {
+               return -EBUSY;
+       }
+       buf->file_priv = fpriv;
+       *buffer = buf;
+       return 0;
+}
+
+static void r600_ib_free(struct drm_device *dev, struct drm_buf *buf,
+                       struct drm_file *fpriv, int l, int r)
+{
+       drm_radeon_private_t *dev_priv = dev->dev_private;
+
+       if (buf) {
+               if (!r)
+                       r600_cp_dispatch_indirect(dev, buf, 0, l * 4);
+               radeon_cp_discard_buffer(dev, fpriv->master, buf);
+               COMMIT_RING();
+       }
+}
+
+int r600_cs_legacy_ioctl(struct drm_device *dev, void *data, struct drm_file *fpriv)
+{
+       struct drm_radeon_private *dev_priv = dev->dev_private;
+       struct drm_radeon_cs *cs = data;
+       struct drm_buf *buf;
+       unsigned family;
+       int l, r = 0;
+       u32 *ib, cs_id = 0;
+
+       if (dev_priv == NULL) {
+               DRM_ERROR("called with no initialization\n");
+               return -EINVAL;
+       }
+       family = dev_priv->flags & RADEON_FAMILY_MASK;
+       if (family < CHIP_R600) {
+               DRM_ERROR("cs ioctl valid only for R6XX & R7XX in legacy mode\n");
+               return -EINVAL;
+       }
+       mutex_lock(&dev_priv->cs_mutex);
+       /* get ib */
+       r = r600_ib_get(dev, fpriv, &buf);
+       if (r) {
+               DRM_ERROR("ib_get failed\n");
+               goto out;
+       }
+       ib = dev->agp_buffer_map->handle + buf->offset;
+       /* now parse command stream */
+       r = r600_cs_legacy(dev, data,  fpriv, family, ib, &l);
+       if (r) {
+               goto out;
+       }
+
+out:
+       r600_ib_free(dev, buf, fpriv, l, r);
+       /* emit cs id sequence */
+       r600_cs_id_emit(dev_priv, &cs_id);
+       cs->cs_id = cs_id;
+       mutex_unlock(&dev_priv->cs_mutex);
+       return r;
+}
diff --git a/drivers/gpu/drm/radeon/r600_cs.c b/drivers/gpu/drm/radeon/r600_cs.c
new file mode 100644 (file)
index 0000000..39bf634
--- /dev/null
@@ -0,0 +1,658 @@
+/*
+ * Copyright 2008 Advanced Micro Devices, Inc.
+ * Copyright 2008 Red Hat Inc.
+ * Copyright 2009 Jerome Glisse.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Dave Airlie
+ *          Alex Deucher
+ *          Jerome Glisse
+ */
+#include "drmP.h"
+#include "radeon.h"
+#include "radeon_share.h"
+#include "r600d.h"
+#include "avivod.h"
+
+static int r600_cs_packet_next_reloc_mm(struct radeon_cs_parser *p,
+                                       struct radeon_cs_reloc **cs_reloc);
+static int r600_cs_packet_next_reloc_nomm(struct radeon_cs_parser *p,
+                                       struct radeon_cs_reloc **cs_reloc);
+typedef int (*next_reloc_t)(struct radeon_cs_parser*, struct radeon_cs_reloc**);
+static next_reloc_t r600_cs_packet_next_reloc = &r600_cs_packet_next_reloc_mm;
+
+/**
+ * r600_cs_packet_parse() - parse cp packet and point ib index to next packet
+ * @parser:    parser structure holding parsing context.
+ * @pkt:       where to store packet informations
+ *
+ * Assume that chunk_ib_index is properly set. Will return -EINVAL
+ * if packet is bigger than remaining ib size. or if packets is unknown.
+ **/
+int r600_cs_packet_parse(struct radeon_cs_parser *p,
+                       struct radeon_cs_packet *pkt,
+                       unsigned idx)
+{
+       struct radeon_cs_chunk *ib_chunk = &p->chunks[p->chunk_ib_idx];
+       uint32_t header;
+
+       if (idx >= ib_chunk->length_dw) {
+               DRM_ERROR("Can not parse packet at %d after CS end %d !\n",
+                         idx, ib_chunk->length_dw);
+               return -EINVAL;
+       }
+       header = ib_chunk->kdata[idx];
+       pkt->idx = idx;
+       pkt->type = CP_PACKET_GET_TYPE(header);
+       pkt->count = CP_PACKET_GET_COUNT(header);
+       pkt->one_reg_wr = 0;
+       switch (pkt->type) {
+       case PACKET_TYPE0:
+               pkt->reg = CP_PACKET0_GET_REG(header);
+               break;
+       case PACKET_TYPE3:
+               pkt->opcode = CP_PACKET3_GET_OPCODE(header);
+               break;
+       case PACKET_TYPE2:
+               pkt->count = -1;
+               break;
+       default:
+               DRM_ERROR("Unknown packet type %d at %d !\n", pkt->type, idx);
+               return -EINVAL;
+       }
+       if ((pkt->count + 1 + pkt->idx) >= ib_chunk->length_dw) {
+               DRM_ERROR("Packet (%d:%d:%d) end after CS buffer (%d) !\n",
+                         pkt->idx, pkt->type, pkt->count, ib_chunk->length_dw);
+               return -EINVAL;
+       }
+       return 0;
+}
+
+/**
+ * r600_cs_packet_next_reloc_mm() - parse next packet which should be reloc packet3
+ * @parser:            parser structure holding parsing context.
+ * @data:              pointer to relocation data
+ * @offset_start:      starting offset
+ * @offset_mask:       offset mask (to align start offset on)
+ * @reloc:             reloc informations
+ *
+ * Check next packet is relocation packet3, do bo validation and compute
+ * GPU offset using the provided start.
+ **/
+static int r600_cs_packet_next_reloc_mm(struct radeon_cs_parser *p,
+                                       struct radeon_cs_reloc **cs_reloc)
+{
+       struct radeon_cs_chunk *ib_chunk;
+       struct radeon_cs_chunk *relocs_chunk;
+       struct radeon_cs_packet p3reloc;
+       unsigned idx;
+       int r;
+
+       if (p->chunk_relocs_idx == -1) {
+               DRM_ERROR("No relocation chunk !\n");
+               return -EINVAL;
+       }
+       *cs_reloc = NULL;
+       ib_chunk = &p->chunks[p->chunk_ib_idx];
+       relocs_chunk = &p->chunks[p->chunk_relocs_idx];
+       r = r600_cs_packet_parse(p, &p3reloc, p->idx);
+       if (r) {
+               return r;
+       }
+       p->idx += p3reloc.count + 2;
+       if (p3reloc.type != PACKET_TYPE3 || p3reloc.opcode != PACKET3_NOP) {
+               DRM_ERROR("No packet3 for relocation for packet at %d.\n",
+                         p3reloc.idx);
+               return -EINVAL;
+       }
+       idx = ib_chunk->kdata[p3reloc.idx + 1];
+       if (idx >= relocs_chunk->length_dw) {
+               DRM_ERROR("Relocs at %d after relocations chunk end %d !\n",
+                         idx, relocs_chunk->length_dw);
+               return -EINVAL;
+       }
+       /* FIXME: we assume reloc size is 4 dwords */
+       *cs_reloc = p->relocs_ptr[(idx / 4)];
+       return 0;
+}
+
+/**
+ * r600_cs_packet_next_reloc_nomm() - parse next packet which should be reloc packet3
+ * @parser:            parser structure holding parsing context.
+ * @data:              pointer to relocation data
+ * @offset_start:      starting offset
+ * @offset_mask:       offset mask (to align start offset on)
+ * @reloc:             reloc informations
+ *
+ * Check next packet is relocation packet3, do bo validation and compute
+ * GPU offset using the provided start.
+ **/
+static int r600_cs_packet_next_reloc_nomm(struct radeon_cs_parser *p,
+                                       struct radeon_cs_reloc **cs_reloc)
+{
+       struct radeon_cs_chunk *ib_chunk;
+       struct radeon_cs_chunk *relocs_chunk;
+       struct radeon_cs_packet p3reloc;
+       unsigned idx;
+       int r;
+
+       if (p->chunk_relocs_idx == -1) {
+               DRM_ERROR("No relocation chunk !\n");
+               return -EINVAL;
+       }
+       *cs_reloc = NULL;
+       ib_chunk = &p->chunks[p->chunk_ib_idx];
+       relocs_chunk = &p->chunks[p->chunk_relocs_idx];
+       r = r600_cs_packet_parse(p, &p3reloc, p->idx);
+       if (r) {
+               return r;
+       }
+       p->idx += p3reloc.count + 2;
+       if (p3reloc.type != PACKET_TYPE3 || p3reloc.opcode != PACKET3_NOP) {
+               DRM_ERROR("No packet3 for relocation for packet at %d.\n",
+                         p3reloc.idx);
+               return -EINVAL;
+       }
+       idx = ib_chunk->kdata[p3reloc.idx + 1];
+       if (idx >= relocs_chunk->length_dw) {
+               DRM_ERROR("Relocs at %d after relocations chunk end %d !\n",
+                         idx, relocs_chunk->length_dw);
+               return -EINVAL;
+       }
+       *cs_reloc = &p->relocs[0];
+       (*cs_reloc)->lobj.gpu_offset = (u64)relocs_chunk->kdata[idx + 3] << 32;
+       (*cs_reloc)->lobj.gpu_offset |= relocs_chunk->kdata[idx + 0];
+       return 0;
+}
+
+static int r600_packet0_check(struct radeon_cs_parser *p,
+                               struct radeon_cs_packet *pkt,
+                               unsigned idx, unsigned reg)
+{
+       switch (reg) {
+       case AVIVO_D1MODE_VLINE_START_END:
+       case AVIVO_D2MODE_VLINE_START_END:
+               break;
+       default:
+               printk(KERN_ERR "Forbidden register 0x%04X in cs at %d\n",
+                      reg, idx);
+               return -EINVAL;
+       }
+       return 0;
+}
+
+static int r600_cs_parse_packet0(struct radeon_cs_parser *p,
+                               struct radeon_cs_packet *pkt)
+{
+       unsigned reg, i;
+       unsigned idx;
+       int r;
+
+       idx = pkt->idx + 1;
+       reg = pkt->reg;
+       for (i = 0; i <= pkt->count; i++, idx++, reg += 4) {
+               r = r600_packet0_check(p, pkt, idx, reg);
+               if (r) {
+                       return r;
+               }
+       }
+       return 0;
+}
+
+static int r600_packet3_check(struct radeon_cs_parser *p,
+                               struct radeon_cs_packet *pkt)
+{
+       struct radeon_cs_chunk *ib_chunk;
+       struct radeon_cs_reloc *reloc;
+       volatile u32 *ib;
+       unsigned idx;
+       unsigned i;
+       unsigned start_reg, end_reg, reg;
+       int r;
+
+       ib = p->ib->ptr;
+       ib_chunk = &p->chunks[p->chunk_ib_idx];
+       idx = pkt->idx + 1;
+       switch (pkt->opcode) {
+       case PACKET3_START_3D_CMDBUF:
+               if (p->family >= CHIP_RV770 || pkt->count) {
+                       DRM_ERROR("bad START_3D\n");
+                       return -EINVAL;
+               }
+               break;
+       case PACKET3_CONTEXT_CONTROL:
+               if (pkt->count != 1) {
+                       DRM_ERROR("bad CONTEXT_CONTROL\n");
+                       return -EINVAL;
+               }
+               break;
+       case PACKET3_INDEX_TYPE:
+       case PACKET3_NUM_INSTANCES:
+               if (pkt->count) {
+                       DRM_ERROR("bad INDEX_TYPE/NUM_INSTANCES\n");
+                       return -EINVAL;
+               }
+               break;
+       case PACKET3_DRAW_INDEX:
+               if (pkt->count != 3) {
+                       DRM_ERROR("bad DRAW_INDEX\n");
+                       return -EINVAL;
+               }
+               r = r600_cs_packet_next_reloc(p, &reloc);
+               if (r) {
+                       DRM_ERROR("bad DRAW_INDEX\n");
+                       return -EINVAL;
+               }
+               ib[idx+0] += (u32)(reloc->lobj.gpu_offset & 0xffffffff);
+               ib[idx+1] = upper_32_bits(reloc->lobj.gpu_offset) & 0xff;
+               break;
+       case PACKET3_DRAW_INDEX_AUTO:
+               if (pkt->count != 1) {
+                       DRM_ERROR("bad DRAW_INDEX_AUTO\n");
+                       return -EINVAL;
+               }
+               break;
+       case PACKET3_DRAW_INDEX_IMMD_BE:
+       case PACKET3_DRAW_INDEX_IMMD:
+               if (pkt->count < 2) {
+                       DRM_ERROR("bad DRAW_INDEX_IMMD\n");
+                       return -EINVAL;
+               }
+               break;
+       case PACKET3_WAIT_REG_MEM:
+               if (pkt->count != 5) {
+                       DRM_ERROR("bad WAIT_REG_MEM\n");
+                       return -EINVAL;
+               }
+               /* bit 4 is reg (0) or mem (1) */
+               if (ib_chunk->kdata[idx+0] & 0x10) {
+                       r = r600_cs_packet_next_reloc(p, &reloc);
+                       if (r) {
+                               DRM_ERROR("bad WAIT_REG_MEM\n");
+                               return -EINVAL;
+                       }
+                       ib[idx+1] += (u32)(reloc->lobj.gpu_offset & 0xffffffff);
+                       ib[idx+2] = upper_32_bits(reloc->lobj.gpu_offset) & 0xff;
+               }
+               break;
+       case PACKET3_SURFACE_SYNC:
+               if (pkt->count != 3) {
+                       DRM_ERROR("bad SURFACE_SYNC\n");
+                       return -EINVAL;
+               }
+               /* 0xffffffff/0x0 is flush all cache flag */
+               if (ib_chunk->kdata[idx+1] != 0xffffffff ||
+                   ib_chunk->kdata[idx+2] != 0) {
+                       r = r600_cs_packet_next_reloc(p, &reloc);
+                       if (r) {
+                               DRM_ERROR("bad SURFACE_SYNC\n");
+                               return -EINVAL;
+                       }
+                       ib[idx+2] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
+               }
+               break;
+       case PACKET3_EVENT_WRITE:
+               if (pkt->count != 2 && pkt->count != 0) {
+                       DRM_ERROR("bad EVENT_WRITE\n");
+                       return -EINVAL;
+               }
+               if (pkt->count) {
+                       r = r600_cs_packet_next_reloc(p, &reloc);
+                       if (r) {
+                               DRM_ERROR("bad EVENT_WRITE\n");
+                               return -EINVAL;
+                       }
+                       ib[idx+1] += (u32)(reloc->lobj.gpu_offset & 0xffffffff);
+                       ib[idx+2] |= upper_32_bits(reloc->lobj.gpu_offset) & 0xff;
+               }
+               break;
+       case PACKET3_EVENT_WRITE_EOP:
+               if (pkt->count != 4) {
+                       DRM_ERROR("bad EVENT_WRITE_EOP\n");
+                       return -EINVAL;
+               }
+               r = r600_cs_packet_next_reloc(p, &reloc);
+               if (r) {
+                       DRM_ERROR("bad EVENT_WRITE\n");
+                       return -EINVAL;
+               }
+               ib[idx+1] += (u32)(reloc->lobj.gpu_offset & 0xffffffff);
+               ib[idx+2] |= upper_32_bits(reloc->lobj.gpu_offset) & 0xff;
+               break;
+       case PACKET3_SET_CONFIG_REG:
+               start_reg = (ib[idx+0] << 2) + PACKET3_SET_CONFIG_REG_OFFSET;
+               end_reg = 4 * pkt->count + start_reg - 4;
+               if ((start_reg < PACKET3_SET_CONFIG_REG_OFFSET) ||
+                   (start_reg >= PACKET3_SET_CONFIG_REG_END) ||
+                   (end_reg >= PACKET3_SET_CONFIG_REG_END)) {
+                       DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
+                       return -EINVAL;
+               }
+               for (i = 0; i < pkt->count; i++) {
+                       reg = start_reg + (4 * i);
+                       switch (reg) {
+                       case CP_COHER_BASE:
+                               /* use PACKET3_SURFACE_SYNC */
+                               return -EINVAL;
+                       default:
+                               break;
+                       }
+               }
+               break;
+       case PACKET3_SET_CONTEXT_REG:
+               start_reg = (ib[idx+0] << 2) + PACKET3_SET_CONTEXT_REG_OFFSET;
+               end_reg = 4 * pkt->count + start_reg - 4;
+               if ((start_reg < PACKET3_SET_CONTEXT_REG_OFFSET) ||
+                   (start_reg >= PACKET3_SET_CONTEXT_REG_END) ||
+                   (end_reg >= PACKET3_SET_CONTEXT_REG_END)) {
+                       DRM_ERROR("bad PACKET3_SET_CONTEXT_REG\n");
+                       return -EINVAL;
+               }
+               for (i = 0; i < pkt->count; i++) {
+                       reg = start_reg + (4 * i);
+                       switch (reg) {
+                       case DB_DEPTH_BASE:
+                       case CB_COLOR0_BASE:
+                       case CB_COLOR1_BASE:
+                       case CB_COLOR2_BASE:
+                       case CB_COLOR3_BASE:
+                       case CB_COLOR4_BASE:
+                       case CB_COLOR5_BASE:
+                       case CB_COLOR6_BASE:
+                       case CB_COLOR7_BASE:
+                       case SQ_PGM_START_FS:
+                       case SQ_PGM_START_ES:
+                       case SQ_PGM_START_VS:
+                       case SQ_PGM_START_GS:
+                       case SQ_PGM_START_PS:
+                               r = r600_cs_packet_next_reloc(p, &reloc);
+                               if (r) {
+                                       DRM_ERROR("bad SET_CONTEXT_REG "
+                                                       "0x%04X\n", reg);
+                                       return -EINVAL;
+                               }
+                               ib[idx+1+i] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
+                               break;
+                       case VGT_DMA_BASE:
+                       case VGT_DMA_BASE_HI:
+                               /* These should be handled by DRAW_INDEX packet 3 */
+                       case VGT_STRMOUT_BASE_OFFSET_0:
+                       case VGT_STRMOUT_BASE_OFFSET_1:
+                       case VGT_STRMOUT_BASE_OFFSET_2:
+                       case VGT_STRMOUT_BASE_OFFSET_3:
+                       case VGT_STRMOUT_BASE_OFFSET_HI_0:
+                       case VGT_STRMOUT_BASE_OFFSET_HI_1:
+                       case VGT_STRMOUT_BASE_OFFSET_HI_2:
+                       case VGT_STRMOUT_BASE_OFFSET_HI_3:
+                       case VGT_STRMOUT_BUFFER_BASE_0:
+                       case VGT_STRMOUT_BUFFER_BASE_1:
+                       case VGT_STRMOUT_BUFFER_BASE_2:
+                       case VGT_STRMOUT_BUFFER_BASE_3:
+                       case VGT_STRMOUT_BUFFER_OFFSET_0:
+                       case VGT_STRMOUT_BUFFER_OFFSET_1:
+                       case VGT_STRMOUT_BUFFER_OFFSET_2:
+                       case VGT_STRMOUT_BUFFER_OFFSET_3:
+                               /* These should be handled by STRMOUT_BUFFER packet 3 */
+                               DRM_ERROR("bad context reg: 0x%08x\n", reg);
+                               return -EINVAL;
+                       default:
+                               break;
+                       }
+               }
+               break;
+       case PACKET3_SET_RESOURCE:
+               if (pkt->count % 7) {
+                       DRM_ERROR("bad SET_RESOURCE\n");
+                       return -EINVAL;
+               }
+               start_reg = (ib[idx+0] << 2) + PACKET3_SET_RESOURCE_OFFSET;
+               end_reg = 4 * pkt->count + start_reg - 4;
+               if ((start_reg < PACKET3_SET_RESOURCE_OFFSET) ||
+                   (start_reg >= PACKET3_SET_RESOURCE_END) ||
+                   (end_reg >= PACKET3_SET_RESOURCE_END)) {
+                       DRM_ERROR("bad SET_RESOURCE\n");
+                       return -EINVAL;
+               }
+               for (i = 0; i < (pkt->count / 7); i++) {
+                       switch (G__SQ_VTX_CONSTANT_TYPE(ib[idx+(i*7)+6+1])) {
+                       case SQ_TEX_VTX_VALID_TEXTURE:
+                               /* tex base */
+                               r = r600_cs_packet_next_reloc(p, &reloc);
+                               if (r) {
+                                       DRM_ERROR("bad SET_RESOURCE\n");
+                                       return -EINVAL;
+                               }
+                               ib[idx+1+(i*7)+2] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
+                               /* tex mip base */
+                               r = r600_cs_packet_next_reloc(p, &reloc);
+                               if (r) {
+                                       DRM_ERROR("bad SET_RESOURCE\n");
+                                       return -EINVAL;
+                               }
+                               ib[idx+1+(i*7)+3] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
+                               break;
+                       case SQ_TEX_VTX_VALID_BUFFER:
+                               /* vtx base */
+                               r = r600_cs_packet_next_reloc(p, &reloc);
+                               if (r) {
+                                       DRM_ERROR("bad SET_RESOURCE\n");
+                                       return -EINVAL;
+                               }
+                               ib[idx+1+(i*7)+0] += (u32)((reloc->lobj.gpu_offset) & 0xffffffff);
+                               ib[idx+1+(i*7)+2] |= upper_32_bits(reloc->lobj.gpu_offset) & 0xff;
+                               break;
+                       case SQ_TEX_VTX_INVALID_TEXTURE:
+                       case SQ_TEX_VTX_INVALID_BUFFER:
+                       default:
+                               DRM_ERROR("bad SET_RESOURCE\n");
+                               return -EINVAL;
+                       }
+               }
+               break;
+       case PACKET3_SET_ALU_CONST:
+               start_reg = (ib[idx+0] << 2) + PACKET3_SET_ALU_CONST_OFFSET;
+               end_reg = 4 * pkt->count + start_reg - 4;
+               if ((start_reg < PACKET3_SET_ALU_CONST_OFFSET) ||
+                   (start_reg >= PACKET3_SET_ALU_CONST_END) ||
+                   (end_reg >= PACKET3_SET_ALU_CONST_END)) {
+                       DRM_ERROR("bad SET_ALU_CONST\n");
+                       return -EINVAL;
+               }
+               break;
+       case PACKET3_SET_BOOL_CONST:
+               start_reg = (ib[idx+0] << 2) + PACKET3_SET_BOOL_CONST_OFFSET;
+               end_reg = 4 * pkt->count + start_reg - 4;
+               if ((start_reg < PACKET3_SET_BOOL_CONST_OFFSET) ||
+                   (start_reg >= PACKET3_SET_BOOL_CONST_END) ||
+                   (end_reg >= PACKET3_SET_BOOL_CONST_END)) {
+                       DRM_ERROR("bad SET_BOOL_CONST\n");
+                       return -EINVAL;
+               }
+               break;
+       case PACKET3_SET_LOOP_CONST:
+               start_reg = (ib[idx+0] << 2) + PACKET3_SET_LOOP_CONST_OFFSET;
+               end_reg = 4 * pkt->count + start_reg - 4;
+               if ((start_reg < PACKET3_SET_LOOP_CONST_OFFSET) ||
+                   (start_reg >= PACKET3_SET_LOOP_CONST_END) ||
+                   (end_reg >= PACKET3_SET_LOOP_CONST_END)) {
+                       DRM_ERROR("bad SET_LOOP_CONST\n");
+                       return -EINVAL;
+               }
+               break;
+       case PACKET3_SET_CTL_CONST:
+               start_reg = (ib[idx+0] << 2) + PACKET3_SET_CTL_CONST_OFFSET;
+               end_reg = 4 * pkt->count + start_reg - 4;
+               if ((start_reg < PACKET3_SET_CTL_CONST_OFFSET) ||
+                   (start_reg >= PACKET3_SET_CTL_CONST_END) ||
+                   (end_reg >= PACKET3_SET_CTL_CONST_END)) {
+                       DRM_ERROR("bad SET_CTL_CONST\n");
+                       return -EINVAL;
+               }
+               break;
+       case PACKET3_SET_SAMPLER:
+               if (pkt->count % 3) {
+                       DRM_ERROR("bad SET_SAMPLER\n");
+                       return -EINVAL;
+               }
+               start_reg = (ib[idx+0] << 2) + PACKET3_SET_SAMPLER_OFFSET;
+               end_reg = 4 * pkt->count + start_reg - 4;
+               if ((start_reg < PACKET3_SET_SAMPLER_OFFSET) ||
+                   (start_reg >= PACKET3_SET_SAMPLER_END) ||
+                   (end_reg >= PACKET3_SET_SAMPLER_END)) {
+                       DRM_ERROR("bad SET_SAMPLER\n");
+                       return -EINVAL;
+               }
+               break;
+       case PACKET3_SURFACE_BASE_UPDATE:
+               if (p->family >= CHIP_RV770 || p->family == CHIP_R600) {
+                       DRM_ERROR("bad SURFACE_BASE_UPDATE\n");
+                       return -EINVAL;
+               }
+               if (pkt->count) {
+                       DRM_ERROR("bad SURFACE_BASE_UPDATE\n");
+                       return -EINVAL;
+               }
+               break;
+       case PACKET3_NOP:
+               break;
+       default:
+               DRM_ERROR("Packet3 opcode %x not supported\n", pkt->opcode);
+               return -EINVAL;
+       }
+       return 0;
+}
+
+int r600_cs_parse(struct radeon_cs_parser *p)
+{
+       struct radeon_cs_packet pkt;
+       int r;
+
+       do {
+               r = r600_cs_packet_parse(p, &pkt, p->idx);
+               if (r) {
+                       return r;
+               }
+               p->idx += pkt.count + 2;
+               switch (pkt.type) {
+               case PACKET_TYPE0:
+                       r = r600_cs_parse_packet0(p, &pkt);
+                       break;
+               case PACKET_TYPE2:
+                       break;
+               case PACKET_TYPE3:
+                       r = r600_packet3_check(p, &pkt);
+                       break;
+               default:
+                       DRM_ERROR("Unknown packet type %d !\n", pkt.type);
+                       return -EINVAL;
+               }
+               if (r) {
+                       return r;
+               }
+       } while (p->idx < p->chunks[p->chunk_ib_idx].length_dw);
+#if 0
+       for (r = 0; r < p->ib->length_dw; r++) {
+               printk(KERN_INFO "%05d  0x%08X\n", r, p->ib->ptr[r]);
+               mdelay(1);
+       }
+#endif
+       return 0;
+}
+
+static int r600_cs_parser_relocs_legacy(struct radeon_cs_parser *p)
+{
+       if (p->chunk_relocs_idx == -1) {
+               return 0;
+       }
+       p->relocs = kcalloc(1, sizeof(struct radeon_cs_reloc), GFP_KERNEL);
+       if (p->relocs == NULL) {
+               return -ENOMEM;
+       }
+       return 0;
+}
+
+/**
+ * cs_parser_fini() - clean parser states
+ * @parser:    parser structure holding parsing context.
+ * @error:     error number
+ *
+ * If error is set than unvalidate buffer, otherwise just free memory
+ * used by parsing context.
+ **/
+static void r600_cs_parser_fini(struct radeon_cs_parser *parser, int error)
+{
+       unsigned i;
+
+       kfree(parser->relocs);
+       for (i = 0; i < parser->nchunks; i++) {
+               kfree(parser->chunks[i].kdata);
+       }
+       kfree(parser->chunks);
+       kfree(parser->chunks_array);
+}
+
+int r600_cs_legacy(struct drm_device *dev, void *data, struct drm_file *filp,
+                       unsigned family, u32 *ib, int *l)
+{
+       struct radeon_cs_parser parser;
+       struct radeon_cs_chunk *ib_chunk;
+       struct radeon_ib        fake_ib;
+       int r;
+
+       /* initialize parser */
+       memset(&parser, 0, sizeof(struct radeon_cs_parser));
+       parser.filp = filp;
+       parser.rdev = NULL;
+       parser.family = family;
+       parser.ib = &fake_ib;
+       fake_ib.ptr = ib;
+       r = radeon_cs_parser_init(&parser, data);
+       if (r) {
+               DRM_ERROR("Failed to initialize parser !\n");
+               r600_cs_parser_fini(&parser, r);
+               return r;
+       }
+       r = r600_cs_parser_relocs_legacy(&parser);
+       if (r) {
+               DRM_ERROR("Failed to parse relocation !\n");
+               r600_cs_parser_fini(&parser, r);
+               return r;
+       }
+       /* Copy the packet into the IB, the parser will read from the
+        * input memory (cached) and write to the IB (which can be
+        * uncached). */
+       ib_chunk = &parser.chunks[parser.chunk_ib_idx];
+       parser.ib->length_dw = ib_chunk->length_dw;
+       memcpy((void *)parser.ib->ptr, ib_chunk->kdata, ib_chunk->length_dw*4);
+       *l = parser.ib->length_dw;
+       r = r600_cs_parse(&parser);
+       if (r) {
+               DRM_ERROR("Invalid command stream !\n");
+               r600_cs_parser_fini(&parser, r);
+               return r;
+       }
+       r600_cs_parser_fini(&parser, r);
+       return r;
+}
+
+void r600_cs_legacy_init(void)
+{
+       r600_cs_packet_next_reloc = &r600_cs_packet_next_reloc_nomm;
+}
diff --git a/drivers/gpu/drm/radeon/r600d.h b/drivers/gpu/drm/radeon/r600d.h
new file mode 100644 (file)
index 0000000..723295f
--- /dev/null
@@ -0,0 +1,661 @@
+/*
+ * Copyright 2009 Advanced Micro Devices, Inc.
+ * Copyright 2009 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Dave Airlie
+ *          Alex Deucher
+ *          Jerome Glisse
+ */
+#ifndef R600D_H
+#define R600D_H
+
+#define CP_PACKET2                     0x80000000
+#define                PACKET2_PAD_SHIFT               0
+#define                PACKET2_PAD_MASK                (0x3fffffff << 0)
+
+#define PACKET2(v)     (CP_PACKET2 | REG_SET(PACKET2_PAD, (v)))
+
+#define R6XX_MAX_SH_GPRS                       256
+#define R6XX_MAX_TEMP_GPRS                     16
+#define R6XX_MAX_SH_THREADS                    256
+#define R6XX_MAX_SH_STACK_ENTRIES              4096
+#define R6XX_MAX_BACKENDS                      8
+#define R6XX_MAX_BACKENDS_MASK                 0xff
+#define R6XX_MAX_SIMDS                         8
+#define R6XX_MAX_SIMDS_MASK                    0xff
+#define R6XX_MAX_PIPES                         8
+#define R6XX_MAX_PIPES_MASK                    0xff
+
+/* PTE flags */
+#define PTE_VALID                              (1 << 0)
+#define PTE_SYSTEM                             (1 << 1)
+#define PTE_SNOOPED                            (1 << 2)
+#define PTE_READABLE                           (1 << 5)
+#define PTE_WRITEABLE                          (1 << 6)
+
+/* Registers */
+#define        ARB_POP                                         0x2418
+#define        ENABLE_TC128                                    (1 << 30)
+#define        ARB_GDEC_RD_CNTL                                0x246C
+
+#define        CC_GC_SHADER_PIPE_CONFIG                        0x8950
+#define        CC_RB_BACKEND_DISABLE                           0x98F4
+#define                BACKEND_DISABLE(x)                              ((x) << 16)
+
+#define        CB_COLOR0_BASE                                  0x28040
+#define        CB_COLOR1_BASE                                  0x28044
+#define        CB_COLOR2_BASE                                  0x28048
+#define        CB_COLOR3_BASE                                  0x2804C
+#define        CB_COLOR4_BASE                                  0x28050
+#define        CB_COLOR5_BASE                                  0x28054
+#define        CB_COLOR6_BASE                                  0x28058
+#define        CB_COLOR7_BASE                                  0x2805C
+#define        CB_COLOR7_FRAG                                  0x280FC
+
+#define CB_COLOR0_SIZE                                  0x28060
+#define CB_COLOR0_VIEW                                  0x28080
+#define CB_COLOR0_INFO                                  0x280a0
+#define CB_COLOR0_TILE                                  0x280c0
+#define CB_COLOR0_FRAG                                  0x280e0
+#define CB_COLOR0_MASK                                  0x28100
+
+#define        CONFIG_MEMSIZE                                  0x5428
+#define        CP_STAT                                         0x8680
+#define        CP_COHER_BASE                                   0x85F8
+#define        CP_DEBUG                                        0xC1FC
+#define        R_0086D8_CP_ME_CNTL                     0x86D8
+#define                S_0086D8_CP_ME_HALT(x)                  (((x) & 1)<<28)
+#define                C_0086D8_CP_ME_HALT(x)                  ((x) & 0xEFFFFFFF)
+#define        CP_ME_RAM_DATA                                  0xC160
+#define        CP_ME_RAM_RADDR                                 0xC158
+#define        CP_ME_RAM_WADDR                                 0xC15C
+#define CP_MEQ_THRESHOLDS                              0x8764
+#define                MEQ_END(x)                                      ((x) << 16)
+#define                ROQ_END(x)                                      ((x) << 24)
+#define        CP_PERFMON_CNTL                                 0x87FC
+#define        CP_PFP_UCODE_ADDR                               0xC150
+#define        CP_PFP_UCODE_DATA                               0xC154
+#define        CP_QUEUE_THRESHOLDS                             0x8760
+#define                ROQ_IB1_START(x)                                ((x) << 0)
+#define                ROQ_IB2_START(x)                                ((x) << 8)
+#define        CP_RB_BASE                                      0xC100
+#define        CP_RB_CNTL                                      0xC104
+#define                RB_BUFSZ(x)                                     ((x)<<0)
+#define                RB_BLKSZ(x)                                     ((x)<<8)
+#define                RB_NO_UPDATE                                    (1<<27)
+#define                RB_RPTR_WR_ENA                                  (1<<31)
+#define                BUF_SWAP_32BIT                                  (2 << 16)
+#define        CP_RB_RPTR                                      0x8700
+#define        CP_RB_RPTR_ADDR                                 0xC10C
+#define        CP_RB_RPTR_ADDR_HI                              0xC110
+#define        CP_RB_RPTR_WR                                   0xC108
+#define        CP_RB_WPTR                                      0xC114
+#define        CP_RB_WPTR_ADDR                                 0xC118
+#define        CP_RB_WPTR_ADDR_HI                              0xC11C
+#define        CP_RB_WPTR_DELAY                                0x8704
+#define        CP_ROQ_IB1_STAT                                 0x8784
+#define        CP_ROQ_IB2_STAT                                 0x8788
+#define        CP_SEM_WAIT_TIMER                               0x85BC
+
+#define        DB_DEBUG                                        0x9830
+#define                PREZ_MUST_WAIT_FOR_POSTZ_DONE                   (1 << 31)
+#define        DB_DEPTH_BASE                                   0x2800C
+#define        DB_WATERMARKS                                   0x9838
+#define                DEPTH_FREE(x)                                   ((x) << 0)
+#define                DEPTH_FLUSH(x)                                  ((x) << 5)
+#define                DEPTH_PENDING_FREE(x)                           ((x) << 15)
+#define                DEPTH_CACHELINE_FREE(x)                         ((x) << 20)
+
+#define        DCP_TILING_CONFIG                               0x6CA0
+#define                PIPE_TILING(x)                                  ((x) << 1)
+#define        BANK_TILING(x)                                  ((x) << 4)
+#define                GROUP_SIZE(x)                                   ((x) << 6)
+#define                ROW_TILING(x)                                   ((x) << 8)
+#define                BANK_SWAPS(x)                                   ((x) << 11)
+#define                SAMPLE_SPLIT(x)                                 ((x) << 14)
+#define                BACKEND_MAP(x)                                  ((x) << 16)
+
+#define GB_TILING_CONFIG                               0x98F0
+
+#define        GC_USER_SHADER_PIPE_CONFIG                      0x8954
+#define                INACTIVE_QD_PIPES(x)                            ((x) << 8)
+#define                INACTIVE_QD_PIPES_MASK                          0x0000FF00
+#define                INACTIVE_SIMDS(x)                               ((x) << 16)
+#define                INACTIVE_SIMDS_MASK                             0x00FF0000
+
+#define SQ_CONFIG                                         0x8c00
+#       define VC_ENABLE                                  (1 << 0)
+#       define EXPORT_SRC_C                               (1 << 1)
+#       define DX9_CONSTS                                 (1 << 2)
+#       define ALU_INST_PREFER_VECTOR                     (1 << 3)
+#       define DX10_CLAMP                                 (1 << 4)
+#       define CLAUSE_SEQ_PRIO(x)                         ((x) << 8)
+#       define PS_PRIO(x)                                 ((x) << 24)
+#       define VS_PRIO(x)                                 ((x) << 26)
+#       define GS_PRIO(x)                                 ((x) << 28)
+#       define ES_PRIO(x)                                 ((x) << 30)
+#define SQ_GPR_RESOURCE_MGMT_1                            0x8c04
+#       define NUM_PS_GPRS(x)                             ((x) << 0)
+#       define NUM_VS_GPRS(x)                             ((x) << 16)
+#       define NUM_CLAUSE_TEMP_GPRS(x)                    ((x) << 28)
+#define SQ_GPR_RESOURCE_MGMT_2                            0x8c08
+#       define NUM_GS_GPRS(x)                             ((x) << 0)
+#       define NUM_ES_GPRS(x)                             ((x) << 16)
+#define SQ_THREAD_RESOURCE_MGMT                           0x8c0c
+#       define NUM_PS_THREADS(x)                          ((x) << 0)
+#       define NUM_VS_THREADS(x)                          ((x) << 8)
+#       define NUM_GS_THREADS(x)                          ((x) << 16)
+#       define NUM_ES_THREADS(x)                          ((x) << 24)
+#define SQ_STACK_RESOURCE_MGMT_1                          0x8c10
+#       define NUM_PS_STACK_ENTRIES(x)                    ((x) << 0)
+#       define NUM_VS_STACK_ENTRIES(x)                    ((x) << 16)
+#define SQ_STACK_RESOURCE_MGMT_2                          0x8c14
+#       define NUM_GS_STACK_ENTRIES(x)                    ((x) << 0)
+#       define NUM_ES_STACK_ENTRIES(x)                    ((x) << 16)
+
+#define GRBM_CNTL                                       0x8000
+#       define GRBM_READ_TIMEOUT(x)                     ((x) << 0)
+#define        GRBM_STATUS                                     0x8010
+#define                CMDFIFO_AVAIL_MASK                              0x0000001F
+#define                GUI_ACTIVE                                      (1<<31)
+#define        GRBM_STATUS2                                    0x8014
+#define        GRBM_SOFT_RESET                                 0x8020
+#define                SOFT_RESET_CP                                   (1<<0)
+
+#define        HDP_HOST_PATH_CNTL                              0x2C00
+#define        HDP_NONSURFACE_BASE                             0x2C04
+#define        HDP_NONSURFACE_INFO                             0x2C08
+#define        HDP_NONSURFACE_SIZE                             0x2C0C
+#define HDP_REG_COHERENCY_FLUSH_CNTL                   0x54A0
+#define        HDP_TILING_CONFIG                               0x2F3C
+
+#define MC_VM_AGP_TOP                                  0x2184
+#define MC_VM_AGP_BOT                                  0x2188
+#define        MC_VM_AGP_BASE                                  0x218C
+#define MC_VM_FB_LOCATION                              0x2180
+#define MC_VM_L1_TLB_MCD_RD_A_CNTL                     0x219C
+#define        ENABLE_L1_TLB                                   (1 << 0)
+#define                ENABLE_L1_FRAGMENT_PROCESSING                   (1 << 1)
+#define                ENABLE_L1_STRICT_ORDERING                       (1 << 2)
+#define                SYSTEM_ACCESS_MODE_MASK                         0x000000C0
+#define                SYSTEM_ACCESS_MODE_SHIFT                        6
+#define                SYSTEM_ACCESS_MODE_PA_ONLY                      (0 << 6)
+#define                SYSTEM_ACCESS_MODE_USE_SYS_MAP                  (1 << 6)
+#define                SYSTEM_ACCESS_MODE_IN_SYS                       (2 << 6)
+#define                SYSTEM_ACCESS_MODE_NOT_IN_SYS                   (3 << 6)
+#define                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU       (0 << 8)
+#define                SYSTEM_APERTURE_UNMAPPED_ACCESS_DEFAULT_PAGE    (1 << 8)
+#define                ENABLE_SEMAPHORE_MODE                           (1 << 10)
+#define                ENABLE_WAIT_L2_QUERY                            (1 << 11)
+#define                EFFECTIVE_L1_TLB_SIZE(x)                        (((x) & 7) << 12)
+#define                EFFECTIVE_L1_TLB_SIZE_MASK                      0x00007000
+#define                EFFECTIVE_L1_TLB_SIZE_SHIFT                     12
+#define                EFFECTIVE_L1_QUEUE_SIZE(x)                      (((x) & 7) << 15)
+#define                EFFECTIVE_L1_QUEUE_SIZE_MASK                    0x00038000
+#define                EFFECTIVE_L1_QUEUE_SIZE_SHIFT                   15
+#define MC_VM_L1_TLB_MCD_RD_B_CNTL                     0x21A0
+#define MC_VM_L1_TLB_MCB_RD_GFX_CNTL                   0x21FC
+#define MC_VM_L1_TLB_MCB_RD_HDP_CNTL                   0x2204
+#define MC_VM_L1_TLB_MCB_RD_PDMA_CNTL                  0x2208
+#define MC_VM_L1_TLB_MCB_RD_SEM_CNTL                   0x220C
+#define        MC_VM_L1_TLB_MCB_RD_SYS_CNTL                    0x2200
+#define MC_VM_L1_TLB_MCD_WR_A_CNTL                     0x21A4
+#define MC_VM_L1_TLB_MCD_WR_B_CNTL                     0x21A8
+#define MC_VM_L1_TLB_MCB_WR_GFX_CNTL                   0x2210
+#define MC_VM_L1_TLB_MCB_WR_HDP_CNTL                   0x2218
+#define MC_VM_L1_TLB_MCB_WR_PDMA_CNTL                  0x221C
+#define MC_VM_L1_TLB_MCB_WR_SEM_CNTL                   0x2220
+#define MC_VM_L1_TLB_MCB_WR_SYS_CNTL                   0x2214
+#define MC_VM_SYSTEM_APERTURE_LOW_ADDR                 0x2190
+#define                LOGICAL_PAGE_NUMBER_MASK                        0x000FFFFF
+#define                LOGICAL_PAGE_NUMBER_SHIFT                       0
+#define MC_VM_SYSTEM_APERTURE_HIGH_ADDR                        0x2194
+#define MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR             0x2198
+
+#define        PA_CL_ENHANCE                                   0x8A14
+#define                CLIP_VTX_REORDER_ENA                            (1 << 0)
+#define                NUM_CLIP_SEQ(x)                                 ((x) << 1)
+#define PA_SC_AA_CONFIG                                        0x28C04
+#define        PA_SC_AA_SAMPLE_LOCS_2S                         0x8B40
+#define        PA_SC_AA_SAMPLE_LOCS_4S                         0x8B44
+#define        PA_SC_AA_SAMPLE_LOCS_8S_WD0                     0x8B48
+#define        PA_SC_AA_SAMPLE_LOCS_8S_WD1                     0x8B4C
+#define                S0_X(x)                                         ((x) << 0)
+#define                S0_Y(x)                                         ((x) << 4)
+#define                S1_X(x)                                         ((x) << 8)
+#define                S1_Y(x)                                         ((x) << 12)
+#define                S2_X(x)                                         ((x) << 16)
+#define                S2_Y(x)                                         ((x) << 20)
+#define                S3_X(x)                                         ((x) << 24)
+#define                S3_Y(x)                                         ((x) << 28)
+#define                S4_X(x)                                         ((x) << 0)
+#define                S4_Y(x)                                         ((x) << 4)
+#define                S5_X(x)                                         ((x) << 8)
+#define                S5_Y(x)                                         ((x) << 12)
+#define                S6_X(x)                                         ((x) << 16)
+#define                S6_Y(x)                                         ((x) << 20)
+#define                S7_X(x)                                         ((x) << 24)
+#define                S7_Y(x)                                         ((x) << 28)
+#define PA_SC_CLIPRECT_RULE                            0x2820c
+#define        PA_SC_ENHANCE                                   0x8BF0
+#define                FORCE_EOV_MAX_CLK_CNT(x)                        ((x) << 0)
+#define                FORCE_EOV_MAX_TILE_CNT(x)                       ((x) << 12)
+#define PA_SC_LINE_STIPPLE                             0x28A0C
+#define        PA_SC_LINE_STIPPLE_STATE                        0x8B10
+#define PA_SC_MODE_CNTL                                        0x28A4C
+#define        PA_SC_MULTI_CHIP_CNTL                           0x8B20
+
+#define PA_SC_SCREEN_SCISSOR_TL                         0x28030
+#define PA_SC_GENERIC_SCISSOR_TL                        0x28240
+#define PA_SC_WINDOW_SCISSOR_TL                         0x28204
+
+#define        PCIE_PORT_INDEX                                 0x0038
+#define        PCIE_PORT_DATA                                  0x003C
+
+#define RAMCFG                                         0x2408
+#define                NOOFBANK_SHIFT                                  0
+#define                NOOFBANK_MASK                                   0x00000001
+#define                NOOFRANK_SHIFT                                  1
+#define                NOOFRANK_MASK                                   0x00000002
+#define                NOOFROWS_SHIFT                                  2
+#define                NOOFROWS_MASK                                   0x0000001C
+#define                NOOFCOLS_SHIFT                                  5
+#define                NOOFCOLS_MASK                                   0x00000060
+#define                CHANSIZE_SHIFT                                  7
+#define                CHANSIZE_MASK                                   0x00000080
+#define                BURSTLENGTH_SHIFT                               8
+#define                BURSTLENGTH_MASK                                0x00000100
+#define                CHANSIZE_OVERRIDE                               (1 << 10)
+
+#define        SCRATCH_REG0                                    0x8500
+#define        SCRATCH_REG1                                    0x8504
+#define        SCRATCH_REG2                                    0x8508
+#define        SCRATCH_REG3                                    0x850C
+#define        SCRATCH_REG4                                    0x8510
+#define        SCRATCH_REG5                                    0x8514
+#define        SCRATCH_REG6                                    0x8518
+#define        SCRATCH_REG7                                    0x851C
+#define        SCRATCH_UMSK                                    0x8540
+#define        SCRATCH_ADDR                                    0x8544
+
+#define        SPI_CONFIG_CNTL                                 0x9100
+#define                GPR_WRITE_PRIORITY(x)                           ((x) << 0)
+#define                DISABLE_INTERP_1                                (1 << 5)
+#define        SPI_CONFIG_CNTL_1                               0x913C
+#define                VTX_DONE_DELAY(x)                               ((x) << 0)
+#define                INTERP_ONE_PRIM_PER_ROW                         (1 << 4)
+#define        SPI_INPUT_Z                                     0x286D8
+#define        SPI_PS_IN_CONTROL_0                             0x286CC
+#define                NUM_INTERP(x)                                   ((x)<<0)
+#define                POSITION_ENA                                    (1<<8)
+#define                POSITION_CENTROID                               (1<<9)
+#define                POSITION_ADDR(x)                                ((x)<<10)
+#define                PARAM_GEN(x)                                    ((x)<<15)
+#define                PARAM_GEN_ADDR(x)                               ((x)<<19)
+#define                BARYC_SAMPLE_CNTL(x)                            ((x)<<26)
+#define                PERSP_GRADIENT_ENA                              (1<<28)
+#define                LINEAR_GRADIENT_ENA                             (1<<29)
+#define                POSITION_SAMPLE                                 (1<<30)
+#define                BARYC_AT_SAMPLE_ENA                             (1<<31)
+#define        SPI_PS_IN_CONTROL_1                             0x286D0
+#define                GEN_INDEX_PIX                                   (1<<0)
+#define                GEN_INDEX_PIX_ADDR(x)                           ((x)<<1)
+#define                FRONT_FACE_ENA                                  (1<<8)
+#define                FRONT_FACE_CHAN(x)                              ((x)<<9)
+#define                FRONT_FACE_ALL_BITS                             (1<<11)
+#define                FRONT_FACE_ADDR(x)                              ((x)<<12)
+#define                FOG_ADDR(x)                                     ((x)<<17)
+#define                FIXED_PT_POSITION_ENA                           (1<<24)
+#define                FIXED_PT_POSITION_ADDR(x)                       ((x)<<25)
+
+#define        SQ_MS_FIFO_SIZES                                0x8CF0
+#define                CACHE_FIFO_SIZE(x)                              ((x) << 0)
+#define                FETCH_FIFO_HIWATER(x)                           ((x) << 8)
+#define                DONE_FIFO_HIWATER(x)                            ((x) << 16)
+#define                ALU_UPDATE_FIFO_HIWATER(x)                      ((x) << 24)
+#define        SQ_PGM_START_ES                                 0x28880
+#define        SQ_PGM_START_FS                                 0x28894
+#define        SQ_PGM_START_GS                                 0x2886C
+#define        SQ_PGM_START_PS                                 0x28840
+#define SQ_PGM_RESOURCES_PS                             0x28850
+#define SQ_PGM_EXPORTS_PS                               0x28854
+#define SQ_PGM_CF_OFFSET_PS                             0x288cc
+#define        SQ_PGM_START_VS                                 0x28858
+#define SQ_PGM_RESOURCES_VS                             0x28868
+#define SQ_PGM_CF_OFFSET_VS                             0x288d0
+#define        SQ_VTX_CONSTANT_WORD6_0                         0x38018
+#define                S__SQ_VTX_CONSTANT_TYPE(x)                      (((x) & 3) << 30)
+#define                G__SQ_VTX_CONSTANT_TYPE(x)                      (((x) >> 30) & 3)
+#define                        SQ_TEX_VTX_INVALID_TEXTURE                      0x0
+#define                        SQ_TEX_VTX_INVALID_BUFFER                       0x1
+#define                        SQ_TEX_VTX_VALID_TEXTURE                        0x2
+#define                        SQ_TEX_VTX_VALID_BUFFER                         0x3
+
+
+#define        SX_MISC                                         0x28350
+#define        SX_DEBUG_1                                      0x9054
+#define                SMX_EVENT_RELEASE                               (1 << 0)
+#define                ENABLE_NEW_SMX_ADDRESS                          (1 << 16)
+
+#define        TA_CNTL_AUX                                     0x9508
+#define                DISABLE_CUBE_WRAP                               (1 << 0)
+#define                DISABLE_CUBE_ANISO                              (1 << 1)
+#define                SYNC_GRADIENT                                   (1 << 24)
+#define                SYNC_WALKER                                     (1 << 25)
+#define                SYNC_ALIGNER                                    (1 << 26)
+#define                BILINEAR_PRECISION_6_BIT                        (0 << 31)
+#define                BILINEAR_PRECISION_8_BIT                        (1 << 31)
+
+#define        TC_CNTL                                         0x9608
+#define                TC_L2_SIZE(x)                                   ((x)<<5)
+#define                L2_DISABLE_LATE_HIT                             (1<<9)
+
+
+#define        VGT_CACHE_INVALIDATION                          0x88C4
+#define                CACHE_INVALIDATION(x)                           ((x)<<0)
+#define                        VC_ONLY                                         0
+#define                        TC_ONLY                                         1
+#define                        VC_AND_TC                                       2
+#define        VGT_DMA_BASE                                    0x287E8
+#define        VGT_DMA_BASE_HI                                 0x287E4
+#define        VGT_ES_PER_GS                                   0x88CC
+#define        VGT_GS_PER_ES                                   0x88C8
+#define        VGT_GS_PER_VS                                   0x88E8
+#define        VGT_GS_VERTEX_REUSE                             0x88D4
+#define VGT_PRIMITIVE_TYPE                              0x8958
+#define        VGT_NUM_INSTANCES                               0x8974
+#define        VGT_OUT_DEALLOC_CNTL                            0x28C5C
+#define                DEALLOC_DIST_MASK                               0x0000007F
+#define        VGT_STRMOUT_BASE_OFFSET_0                       0x28B10
+#define        VGT_STRMOUT_BASE_OFFSET_1                       0x28B14
+#define        VGT_STRMOUT_BASE_OFFSET_2                       0x28B18
+#define        VGT_STRMOUT_BASE_OFFSET_3                       0x28B1c
+#define        VGT_STRMOUT_BASE_OFFSET_HI_0                    0x28B44
+#define        VGT_STRMOUT_BASE_OFFSET_HI_1                    0x28B48
+#define        VGT_STRMOUT_BASE_OFFSET_HI_2                    0x28B4c
+#define        VGT_STRMOUT_BASE_OFFSET_HI_3                    0x28B50
+#define        VGT_STRMOUT_BUFFER_BASE_0                       0x28AD8
+#define        VGT_STRMOUT_BUFFER_BASE_1                       0x28AE8
+#define        VGT_STRMOUT_BUFFER_BASE_2                       0x28AF8
+#define        VGT_STRMOUT_BUFFER_BASE_3                       0x28B08
+#define        VGT_STRMOUT_BUFFER_OFFSET_0                     0x28ADC
+#define        VGT_STRMOUT_BUFFER_OFFSET_1                     0x28AEC
+#define        VGT_STRMOUT_BUFFER_OFFSET_2                     0x28AFC
+#define        VGT_STRMOUT_BUFFER_OFFSET_3                     0x28B0C
+#define        VGT_STRMOUT_EN                                  0x28AB0
+#define        VGT_VERTEX_REUSE_BLOCK_CNTL                     0x28C58
+#define                VTX_REUSE_DEPTH_MASK                            0x000000FF
+#define VGT_EVENT_INITIATOR                             0x28a90
+#       define CACHE_FLUSH_AND_INV_EVENT                        (0x16 << 0)
+
+#define VM_CONTEXT0_CNTL                               0x1410
+#define                ENABLE_CONTEXT                                  (1 << 0)
+#define                PAGE_TABLE_DEPTH(x)                             (((x) & 3) << 1)
+#define                RANGE_PROTECTION_FAULT_ENABLE_DEFAULT           (1 << 4)
+#define VM_CONTEXT0_INVALIDATION_LOW_ADDR              0x1490
+#define VM_CONTEXT0_INVALIDATION_HIGH_ADDR             0x14B0
+#define VM_CONTEXT0_PAGE_TABLE_BASE_ADDR               0x1574
+#define VM_CONTEXT0_PAGE_TABLE_START_ADDR              0x1594
+#define VM_CONTEXT0_PAGE_TABLE_END_ADDR                        0x15B4
+#define VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR      0x1554
+#define VM_CONTEXT0_REQUEST_RESPONSE                   0x1470
+#define                REQUEST_TYPE(x)                                 (((x) & 0xf) << 0)
+#define                RESPONSE_TYPE_MASK                              0x000000F0
+#define                RESPONSE_TYPE_SHIFT                             4
+#define VM_L2_CNTL                                     0x1400
+#define                ENABLE_L2_CACHE                                 (1 << 0)
+#define                ENABLE_L2_FRAGMENT_PROCESSING                   (1 << 1)
+#define                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE         (1 << 9)
+#define                EFFECTIVE_L2_QUEUE_SIZE(x)                      (((x) & 7) << 13)
+#define VM_L2_CNTL2                                    0x1404
+#define                INVALIDATE_ALL_L1_TLBS                          (1 << 0)
+#define                INVALIDATE_L2_CACHE                             (1 << 1)
+#define VM_L2_CNTL3                                    0x1408
+#define                BANK_SELECT_0(x)                                (((x) & 0x1f) << 0)
+#define                BANK_SELECT_1(x)                                (((x) & 0x1f) << 5)
+#define                L2_CACHE_UPDATE_MODE(x)                         (((x) & 3) << 10)
+#define        VM_L2_STATUS                                    0x140C
+#define                L2_BUSY                                         (1 << 0)
+
+#define        WAIT_UNTIL                                      0x8040
+#define         WAIT_2D_IDLE_bit                                (1 << 14)
+#define         WAIT_3D_IDLE_bit                                (1 << 15)
+#define         WAIT_2D_IDLECLEAN_bit                           (1 << 16)
+#define         WAIT_3D_IDLECLEAN_bit                           (1 << 17)
+
+
+
+/*
+ * PM4
+ */
+#define        PACKET_TYPE0    0
+#define        PACKET_TYPE1    1
+#define        PACKET_TYPE2    2
+#define        PACKET_TYPE3    3
+
+#define CP_PACKET_GET_TYPE(h) (((h) >> 30) & 3)
+#define CP_PACKET_GET_COUNT(h) (((h) >> 16) & 0x3FFF)
+#define CP_PACKET0_GET_REG(h) (((h) & 0xFFFF) << 2)
+#define CP_PACKET3_GET_OPCODE(h) (((h) >> 8) & 0xFF)
+#define PACKET0(reg, n)        ((PACKET_TYPE0 << 30) |                         \
+                        (((reg) >> 2) & 0xFFFF) |                      \
+                        ((n) & 0x3FFF) << 16)
+#define PACKET3(op, n) ((PACKET_TYPE3 << 30) |                         \
+                        (((op) & 0xFF) << 8) |                         \
+                        ((n) & 0x3FFF) << 16)
+
+/* Packet 3 types */
+#define        PACKET3_NOP                                     0x10
+#define        PACKET3_INDIRECT_BUFFER_END                     0x17
+#define        PACKET3_SET_PREDICATION                         0x20
+#define        PACKET3_REG_RMW                                 0x21
+#define        PACKET3_COND_EXEC                               0x22
+#define        PACKET3_PRED_EXEC                               0x23
+#define        PACKET3_START_3D_CMDBUF                         0x24
+#define        PACKET3_DRAW_INDEX_2                            0x27
+#define        PACKET3_CONTEXT_CONTROL                         0x28
+#define        PACKET3_DRAW_INDEX_IMMD_BE                      0x29
+#define        PACKET3_INDEX_TYPE                              0x2A
+#define        PACKET3_DRAW_INDEX                              0x2B
+#define        PACKET3_DRAW_INDEX_AUTO                         0x2D
+#define        PACKET3_DRAW_INDEX_IMMD                         0x2E
+#define        PACKET3_NUM_INSTANCES                           0x2F
+#define        PACKET3_STRMOUT_BUFFER_UPDATE                   0x34
+#define        PACKET3_INDIRECT_BUFFER_MP                      0x38
+#define        PACKET3_MEM_SEMAPHORE                           0x39
+#define        PACKET3_MPEG_INDEX                              0x3A
+#define        PACKET3_WAIT_REG_MEM                            0x3C
+#define        PACKET3_MEM_WRITE                               0x3D
+#define        PACKET3_INDIRECT_BUFFER                         0x32
+#define        PACKET3_CP_INTERRUPT                            0x40
+#define        PACKET3_SURFACE_SYNC                            0x43
+#              define PACKET3_CB0_DEST_BASE_ENA    (1 << 6)
+#              define PACKET3_TC_ACTION_ENA        (1 << 23)
+#              define PACKET3_VC_ACTION_ENA        (1 << 24)
+#              define PACKET3_CB_ACTION_ENA        (1 << 25)
+#              define PACKET3_DB_ACTION_ENA        (1 << 26)
+#              define PACKET3_SH_ACTION_ENA        (1 << 27)
+#              define PACKET3_SMX_ACTION_ENA       (1 << 28)
+#define        PACKET3_ME_INITIALIZE                           0x44
+#define                PACKET3_ME_INITIALIZE_DEVICE_ID(x) ((x) << 16)
+#define        PACKET3_COND_WRITE                              0x45
+#define        PACKET3_EVENT_WRITE                             0x46
+#define        PACKET3_EVENT_WRITE_EOP                         0x47
+#define        PACKET3_ONE_REG_WRITE                           0x57
+#define        PACKET3_SET_CONFIG_REG                          0x68
+#define                PACKET3_SET_CONFIG_REG_OFFSET                   0x00008000
+#define                PACKET3_SET_CONFIG_REG_END                      0x0000ac00
+#define        PACKET3_SET_CONTEXT_REG                         0x69
+#define                PACKET3_SET_CONTEXT_REG_OFFSET                  0x00028000
+#define                PACKET3_SET_CONTEXT_REG_END                     0x00029000
+#define        PACKET3_SET_ALU_CONST                           0x6A
+#define                PACKET3_SET_ALU_CONST_OFFSET                    0x00030000
+#define                PACKET3_SET_ALU_CONST_END                       0x00032000
+#define        PACKET3_SET_BOOL_CONST                          0x6B
+#define                PACKET3_SET_BOOL_CONST_OFFSET                   0x0003e380
+#define                PACKET3_SET_BOOL_CONST_END                      0x00040000
+#define        PACKET3_SET_LOOP_CONST                          0x6C
+#define                PACKET3_SET_LOOP_CONST_OFFSET                   0x0003e200
+#define                PACKET3_SET_LOOP_CONST_END                      0x0003e380
+#define        PACKET3_SET_RESOURCE                            0x6D
+#define                PACKET3_SET_RESOURCE_OFFSET                     0x00038000
+#define                PACKET3_SET_RESOURCE_END                        0x0003c000
+#define        PACKET3_SET_SAMPLER                             0x6E
+#define                PACKET3_SET_SAMPLER_OFFSET                      0x0003c000
+#define                PACKET3_SET_SAMPLER_END                         0x0003cff0
+#define        PACKET3_SET_CTL_CONST                           0x6F
+#define                PACKET3_SET_CTL_CONST_OFFSET                    0x0003cff0
+#define                PACKET3_SET_CTL_CONST_END                       0x0003e200
+#define        PACKET3_SURFACE_BASE_UPDATE                     0x73
+
+
+#define        R_008020_GRBM_SOFT_RESET                0x8020
+#define                S_008020_SOFT_RESET_CP(x)               (((x) & 1) << 0)
+#define                S_008020_SOFT_RESET_CB(x)               (((x) & 1) << 1)
+#define                S_008020_SOFT_RESET_CR(x)               (((x) & 1) << 2)
+#define                S_008020_SOFT_RESET_DB(x)               (((x) & 1) << 3)
+#define                S_008020_SOFT_RESET_PA(x)               (((x) & 1) << 5)
+#define                S_008020_SOFT_RESET_SC(x)               (((x) & 1) << 6)
+#define                S_008020_SOFT_RESET_SMX(x)              (((x) & 1) << 7)
+#define                S_008020_SOFT_RESET_SPI(x)              (((x) & 1) << 8)
+#define                S_008020_SOFT_RESET_SH(x)               (((x) & 1) << 9)
+#define                S_008020_SOFT_RESET_SX(x)               (((x) & 1) << 10)
+#define                S_008020_SOFT_RESET_TC(x)               (((x) & 1) << 11)
+#define                S_008020_SOFT_RESET_TA(x)               (((x) & 1) << 12)
+#define                S_008020_SOFT_RESET_VC(x)               (((x) & 1) << 13)
+#define                S_008020_SOFT_RESET_VGT(x)              (((x) & 1) << 14)
+#define        R_008010_GRBM_STATUS                    0x8010
+#define                S_008010_CMDFIFO_AVAIL(x)               (((x) & 0x1F) << 0)
+#define                S_008010_CP_RQ_PENDING(x)               (((x) & 1) << 6)
+#define                S_008010_CF_RQ_PENDING(x)               (((x) & 1) << 7)
+#define                S_008010_PF_RQ_PENDING(x)               (((x) & 1) << 8)
+#define                S_008010_GRBM_EE_BUSY(x)                (((x) & 1) << 10)
+#define                S_008010_VC_BUSY(x)                     (((x) & 1) << 11)
+#define                S_008010_DB03_CLEAN(x)                  (((x) & 1) << 12)
+#define                S_008010_CB03_CLEAN(x)                  (((x) & 1) << 13)
+#define                S_008010_VGT_BUSY_NO_DMA(x)             (((x) & 1) << 16)
+#define                S_008010_VGT_BUSY(x)                    (((x) & 1) << 17)
+#define                S_008010_TA03_BUSY(x)                   (((x) & 1) << 18)
+#define                S_008010_TC_BUSY(x)                     (((x) & 1) << 19)
+#define                S_008010_SX_BUSY(x)                     (((x) & 1) << 20)
+#define                S_008010_SH_BUSY(x)                     (((x) & 1) << 21)
+#define                S_008010_SPI03_BUSY(x)                  (((x) & 1) << 22)
+#define                S_008010_SMX_BUSY(x)                    (((x) & 1) << 23)
+#define                S_008010_SC_BUSY(x)                     (((x) & 1) << 24)
+#define                S_008010_PA_BUSY(x)                     (((x) & 1) << 25)
+#define                S_008010_DB03_BUSY(x)                   (((x) & 1) << 26)
+#define                S_008010_CR_BUSY(x)                     (((x) & 1) << 27)
+#define                S_008010_CP_COHERENCY_BUSY(x)           (((x) & 1) << 28)
+#define                S_008010_CP_BUSY(x)                     (((x) & 1) << 29)
+#define                S_008010_CB03_BUSY(x)                   (((x) & 1) << 30)
+#define                S_008010_GUI_ACTIVE(x)                  (((x) & 1) << 31)
+#define                G_008010_CMDFIFO_AVAIL(x)               (((x) >> 0) & 0x1F)
+#define                G_008010_CP_RQ_PENDING(x)               (((x) >> 6) & 1)
+#define                G_008010_CF_RQ_PENDING(x)               (((x) >> 7) & 1)
+#define                G_008010_PF_RQ_PENDING(x)               (((x) >> 8) & 1)
+#define                G_008010_GRBM_EE_BUSY(x)                (((x) >> 10) & 1)
+#define                G_008010_VC_BUSY(x)                     (((x) >> 11) & 1)
+#define                G_008010_DB03_CLEAN(x)                  (((x) >> 12) & 1)
+#define                G_008010_CB03_CLEAN(x)                  (((x) >> 13) & 1)
+#define                G_008010_VGT_BUSY_NO_DMA(x)             (((x) >> 16) & 1)
+#define                G_008010_VGT_BUSY(x)                    (((x) >> 17) & 1)
+#define                G_008010_TA03_BUSY(x)                   (((x) >> 18) & 1)
+#define                G_008010_TC_BUSY(x)                     (((x) >> 19) & 1)
+#define                G_008010_SX_BUSY(x)                     (((x) >> 20) & 1)
+#define                G_008010_SH_BUSY(x)                     (((x) >> 21) & 1)
+#define                G_008010_SPI03_BUSY(x)                  (((x) >> 22) & 1)
+#define                G_008010_SMX_BUSY(x)                    (((x) >> 23) & 1)
+#define                G_008010_SC_BUSY(x)                     (((x) >> 24) & 1)
+#define                G_008010_PA_BUSY(x)                     (((x) >> 25) & 1)
+#define                G_008010_DB03_BUSY(x)                   (((x) >> 26) & 1)
+#define                G_008010_CR_BUSY(x)                     (((x) >> 27) & 1)
+#define                G_008010_CP_COHERENCY_BUSY(x)           (((x) >> 28) & 1)
+#define                G_008010_CP_BUSY(x)                     (((x) >> 29) & 1)
+#define                G_008010_CB03_BUSY(x)                   (((x) >> 30) & 1)
+#define                G_008010_GUI_ACTIVE(x)                  (((x) >> 31) & 1)
+#define        R_008014_GRBM_STATUS2                   0x8014
+#define                S_008014_CR_CLEAN(x)                    (((x) & 1) << 0)
+#define                S_008014_SMX_CLEAN(x)                   (((x) & 1) << 1)
+#define                S_008014_SPI0_BUSY(x)                   (((x) & 1) << 8)
+#define                S_008014_SPI1_BUSY(x)                   (((x) & 1) << 9)
+#define                S_008014_SPI2_BUSY(x)                   (((x) & 1) << 10)
+#define                S_008014_SPI3_BUSY(x)                   (((x) & 1) << 11)
+#define                S_008014_TA0_BUSY(x)                    (((x) & 1) << 12)
+#define                S_008014_TA1_BUSY(x)                    (((x) & 1) << 13)
+#define                S_008014_TA2_BUSY(x)                    (((x) & 1) << 14)
+#define                S_008014_TA3_BUSY(x)                    (((x) & 1) << 15)
+#define                S_008014_DB0_BUSY(x)                    (((x) & 1) << 16)
+#define                S_008014_DB1_BUSY(x)                    (((x) & 1) << 17)
+#define                S_008014_DB2_BUSY(x)                    (((x) & 1) << 18)
+#define                S_008014_DB3_BUSY(x)                    (((x) & 1) << 19)
+#define                S_008014_CB0_BUSY(x)                    (((x) & 1) << 20)
+#define                S_008014_CB1_BUSY(x)                    (((x) & 1) << 21)
+#define                S_008014_CB2_BUSY(x)                    (((x) & 1) << 22)
+#define                S_008014_CB3_BUSY(x)                    (((x) & 1) << 23)
+#define                G_008014_CR_CLEAN(x)                    (((x) >> 0) & 1)
+#define                G_008014_SMX_CLEAN(x)                   (((x) >> 1) & 1)
+#define                G_008014_SPI0_BUSY(x)                   (((x) >> 8) & 1)
+#define                G_008014_SPI1_BUSY(x)                   (((x) >> 9) & 1)
+#define                G_008014_SPI2_BUSY(x)                   (((x) >> 10) & 1)
+#define                G_008014_SPI3_BUSY(x)                   (((x) >> 11) & 1)
+#define                G_008014_TA0_BUSY(x)                    (((x) >> 12) & 1)
+#define                G_008014_TA1_BUSY(x)                    (((x) >> 13) & 1)
+#define                G_008014_TA2_BUSY(x)                    (((x) >> 14) & 1)
+#define                G_008014_TA3_BUSY(x)                    (((x) >> 15) & 1)
+#define                G_008014_DB0_BUSY(x)                    (((x) >> 16) & 1)
+#define                G_008014_DB1_BUSY(x)                    (((x) >> 17) & 1)
+#define                G_008014_DB2_BUSY(x)                    (((x) >> 18) & 1)
+#define                G_008014_DB3_BUSY(x)                    (((x) >> 19) & 1)
+#define                G_008014_CB0_BUSY(x)                    (((x) >> 20) & 1)
+#define                G_008014_CB1_BUSY(x)                    (((x) >> 21) & 1)
+#define                G_008014_CB2_BUSY(x)                    (((x) >> 22) & 1)
+#define                G_008014_CB3_BUSY(x)                    (((x) >> 23) & 1)
+#define        R_000E50_SRBM_STATUS                            0x0E50
+#define                G_000E50_RLC_RQ_PENDING(x)              (((x) >> 3) & 1)
+#define                G_000E50_RCU_RQ_PENDING(x)              (((x) >> 4) & 1)
+#define                G_000E50_GRBM_RQ_PENDING(x)             (((x) >> 5) & 1)
+#define                G_000E50_HI_RQ_PENDING(x)               (((x) >> 6) & 1)
+#define                G_000E50_IO_EXTERN_SIGNAL(x)            (((x) >> 7) & 1)
+#define                G_000E50_VMC_BUSY(x)                    (((x) >> 8) & 1)
+#define                G_000E50_MCB_BUSY(x)                    (((x) >> 9) & 1)
+#define                G_000E50_MCDZ_BUSY(x)                   (((x) >> 10) & 1)
+#define                G_000E50_MCDY_BUSY(x)                   (((x) >> 11) & 1)
+#define                G_000E50_MCDX_BUSY(x)                   (((x) >> 12) & 1)
+#define                G_000E50_MCDW_BUSY(x)                   (((x) >> 13) & 1)
+#define                G_000E50_SEM_BUSY(x)                    (((x) >> 14) & 1)
+#define                G_000E50_RLC_BUSY(x)                    (((x) >> 15) & 1)
+#define        R_000E60_SRBM_SOFT_RESET                        0x0E60
+#define                S_000E60_SOFT_RESET_BIF(x)              (((x) & 1) << 1)
+#define                S_000E60_SOFT_RESET_CG(x)               (((x) & 1) << 2)
+#define                S_000E60_SOFT_RESET_CMC(x)              (((x) & 1) << 3)
+#define                S_000E60_SOFT_RESET_CSC(x)              (((x) & 1) << 4)
+#define                S_000E60_SOFT_RESET_DC(x)               (((x) & 1) << 5)
+#define                S_000E60_SOFT_RESET_GRBM(x)             (((x) & 1) << 8)
+#define                S_000E60_SOFT_RESET_HDP(x)              (((x) & 1) << 9)
+#define                S_000E60_SOFT_RESET_IH(x)               (((x) & 1) << 10)
+#define                S_000E60_SOFT_RESET_MC(x)               (((x) & 1) << 11)
+#define                S_000E60_SOFT_RESET_RLC(x)              (((x) & 1) << 13)
+#define                S_000E60_SOFT_RESET_ROM(x)              (((x) & 1) << 14)
+#define                S_000E60_SOFT_RESET_SEM(x)              (((x) & 1) << 15)
+#define                S_000E60_SOFT_RESET_TSC(x)              (((x) & 1) << 16)
+#define                S_000E60_SOFT_RESET_VMC(x)              (((x) & 1) << 17)
+
+#endif
index e47f2fc..3299733 100644 (file)
@@ -50,8 +50,8 @@
 #include <linux/kref.h>
 
 #include "radeon_mode.h"
+#include "radeon_share.h"
 #include "radeon_reg.h"
-#include "r300.h"
 
 /*
  * Modules parameters.
@@ -112,10 +112,11 @@ enum radeon_family {
        CHIP_RV635,
        CHIP_RV670,
        CHIP_RS780,
+       CHIP_RS880,
        CHIP_RV770,
        CHIP_RV730,
        CHIP_RV710,
-       CHIP_RS880,
+       CHIP_RV740,
        CHIP_LAST,
 };
 
@@ -152,10 +153,21 @@ struct radeon_device;
  */
 bool radeon_get_bios(struct radeon_device *rdev);
 
+
 /*
- * Clocks
+ * Dummy page
  */
+struct radeon_dummy_page {
+       struct page     *page;
+       dma_addr_t      addr;
+};
+int radeon_dummy_page_init(struct radeon_device *rdev);
+void radeon_dummy_page_fini(struct radeon_device *rdev);
+
 
+/*
+ * Clocks
+ */
 struct radeon_clock {
        struct radeon_pll p1pll;
        struct radeon_pll p2pll;
@@ -166,6 +178,7 @@ struct radeon_clock {
        uint32_t default_sclk;
 };
 
+
 /*
  * Fences.
  */
@@ -332,14 +345,18 @@ struct radeon_mc {
        resource_size_t         aper_size;
        resource_size_t         aper_base;
        resource_size_t         agp_base;
-       unsigned                gtt_location;
-       unsigned                gtt_size;
-       unsigned                vram_location;
        /* for some chips with <= 32MB we need to lie
         * about vram size near mc fb location */
-       unsigned                mc_vram_size;
+       u64                     mc_vram_size;
+       u64                     gtt_location;
+       u64                     gtt_size;
+       u64                     gtt_start;
+       u64                     gtt_end;
+       u64                     vram_location;
+       u64                     vram_start;
+       u64                     vram_end;
        unsigned                vram_width;
-       unsigned                real_vram_size;
+       u64                     real_vram_size;
        int                     vram_mtrr;
        bool                    vram_is_ddr;
 };
@@ -411,6 +428,16 @@ struct radeon_cp {
        bool                    ready;
 };
 
+struct r600_blit {
+       struct radeon_object    *shader_obj;
+       u64 shader_gpu_addr;
+       u32 vs_offset, ps_offset;
+       u32 state_offset;
+       u32 state_len;
+       u32 vb_used, vb_total;
+       struct radeon_ib *vb_ib;
+};
+
 int radeon_ib_get(struct radeon_device *rdev, struct radeon_ib **ib);
 void radeon_ib_free(struct radeon_device *rdev, struct radeon_ib **ib);
 int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib);
@@ -463,6 +490,7 @@ struct radeon_cs_parser {
        int                     chunk_relocs_idx;
        struct radeon_ib        *ib;
        void                    *track;
+       unsigned                family;
 };
 
 struct radeon_cs_packet {
@@ -559,6 +587,9 @@ int r100_debugfs_cp_init(struct radeon_device *rdev);
  */
 struct radeon_asic {
        int (*init)(struct radeon_device *rdev);
+       void (*fini)(struct radeon_device *rdev);
+       int (*resume)(struct radeon_device *rdev);
+       int (*suspend)(struct radeon_device *rdev);
        void (*errata)(struct radeon_device *rdev);
        void (*vram_info)(struct radeon_device *rdev);
        int (*gpu_reset)(struct radeon_device *rdev);
@@ -573,7 +604,11 @@ struct radeon_asic {
        int (*cp_init)(struct radeon_device *rdev, unsigned ring_size);
        void (*cp_fini)(struct radeon_device *rdev);
        void (*cp_disable)(struct radeon_device *rdev);
+       void (*cp_commit)(struct radeon_device *rdev);
        void (*ring_start)(struct radeon_device *rdev);
+       int (*ring_test)(struct radeon_device *rdev);
+       void (*ring_ib_execute)(struct radeon_device *rdev, struct radeon_ib *ib);
+       int (*ib_test)(struct radeon_device *rdev);
        int (*irq_set)(struct radeon_device *rdev);
        int (*irq_process)(struct radeon_device *rdev);
        u32 (*get_vblank_counter)(struct radeon_device *rdev, int crtc);
@@ -613,6 +648,8 @@ struct r100_asic {
 union radeon_asic_config {
        struct r300_asic        r300;
        struct r100_asic        r100;
+       struct r600_asic        r600;
+       struct rv770_asic       rv770;
 };
 
 
@@ -698,12 +735,16 @@ struct radeon_device {
        struct radeon_pm                pm;
        struct mutex                    cs_mutex;
        struct radeon_wb                wb;
+       struct radeon_dummy_page        dummy_page;
        bool                            gpu_lockup;
        bool                            shutdown;
        bool                            suspend;
        bool                            need_dma32;
+       bool                            new_init_path;
        struct radeon_surface_reg surface_regs[RADEON_GEM_MAX_SURFACES];
-       const struct firmware *fw;      /* firmware */
+       const struct firmware *me_fw;   /* all family ME firmware */
+       const struct firmware *pfp_fw;  /* r6/700 PFP firmware */
+       struct r600_blit r600_blit;
 };
 
 int radeon_device_init(struct radeon_device *rdev,
@@ -713,6 +754,13 @@ int radeon_device_init(struct radeon_device *rdev,
 void radeon_device_fini(struct radeon_device *rdev);
 int radeon_gpu_wait_for_idle(struct radeon_device *rdev);
 
+/* r600 blit */
+int r600_blit_prepare_copy(struct radeon_device *rdev, int size_bytes);
+void r600_blit_done_copy(struct radeon_device *rdev, struct radeon_fence *fence);
+void r600_kms_blit_copy(struct radeon_device *rdev,
+                       u64 src_gpu_addr, u64 dst_gpu_addr,
+                       int size_bytes);
+
 static inline uint32_t r100_mm_rreg(struct radeon_device *rdev, uint32_t reg)
 {
        if (reg < 0x10000)
@@ -740,6 +788,7 @@ static inline void r100_mm_wreg(struct radeon_device *rdev, uint32_t reg, uint32
 #define RREG8(reg) readb(((void __iomem *)rdev->rmmio) + (reg))
 #define WREG8(reg, v) writeb(v, ((void __iomem *)rdev->rmmio) + (reg))
 #define RREG32(reg) r100_mm_rreg(rdev, (reg))
+#define DREG32(reg) printk(KERN_INFO "REGISTER: " #reg " : 0x%08X\n", r100_mm_rreg(rdev, (reg)))
 #define WREG32(reg, v) r100_mm_wreg(rdev, (reg), (v))
 #define REG_SET(FIELD, v) (((v) << FIELD##_SHIFT) & FIELD##_MASK)
 #define REG_GET(FIELD, v) (((v) << FIELD##_SHIFT) & FIELD##_MASK)
@@ -763,6 +812,7 @@ static inline void r100_mm_wreg(struct radeon_device *rdev, uint32_t reg, uint32
                tmp_ |= ((val) & ~(mask));                      \
                WREG32_PLL(reg, tmp_);                          \
        } while (0)
+#define DREG32_SYS(sqf, rdev, reg) seq_printf((sqf), #reg " : 0x%08X\n", r100_mm_rreg((rdev), (reg)))
 
 /*
  * Indirect registers accessor
@@ -827,51 +877,6 @@ void radeon_atombios_fini(struct radeon_device *rdev);
 /*
  * RING helpers.
  */
-#define CP_PACKET0                     0x00000000
-#define                PACKET0_BASE_INDEX_SHIFT        0
-#define                PACKET0_BASE_INDEX_MASK         (0x1ffff << 0)
-#define                PACKET0_COUNT_SHIFT             16
-#define                PACKET0_COUNT_MASK              (0x3fff << 16)
-#define CP_PACKET1                     0x40000000
-#define CP_PACKET2                     0x80000000
-#define                PACKET2_PAD_SHIFT               0
-#define                PACKET2_PAD_MASK                (0x3fffffff << 0)
-#define CP_PACKET3                     0xC0000000
-#define                PACKET3_IT_OPCODE_SHIFT         8
-#define                PACKET3_IT_OPCODE_MASK          (0xff << 8)
-#define                PACKET3_COUNT_SHIFT             16
-#define                PACKET3_COUNT_MASK              (0x3fff << 16)
-/* PACKET3 op code */
-#define                PACKET3_NOP                     0x10
-#define                PACKET3_3D_DRAW_VBUF            0x28
-#define                PACKET3_3D_DRAW_IMMD            0x29
-#define                PACKET3_3D_DRAW_INDX            0x2A
-#define                PACKET3_3D_LOAD_VBPNTR          0x2F
-#define                PACKET3_INDX_BUFFER             0x33
-#define                PACKET3_3D_DRAW_VBUF_2          0x34
-#define                PACKET3_3D_DRAW_IMMD_2          0x35
-#define                PACKET3_3D_DRAW_INDX_2          0x36
-#define                PACKET3_BITBLT_MULTI            0x9B
-
-#define PACKET0(reg, n)        (CP_PACKET0 |                                   \
-                        REG_SET(PACKET0_BASE_INDEX, (reg) >> 2) |      \
-                        REG_SET(PACKET0_COUNT, (n)))
-#define PACKET2(v)     (CP_PACKET2 | REG_SET(PACKET2_PAD, (v)))
-#define PACKET3(op, n) (CP_PACKET3 |                                   \
-                        REG_SET(PACKET3_IT_OPCODE, (op)) |             \
-                        REG_SET(PACKET3_COUNT, (n)))
-
-#define        PACKET_TYPE0    0
-#define        PACKET_TYPE1    1
-#define        PACKET_TYPE2    2
-#define        PACKET_TYPE3    3
-
-#define CP_PACKET_GET_TYPE(h) (((h) >> 30) & 3)
-#define CP_PACKET_GET_COUNT(h) (((h) >> 16) & 0x3FFF)
-#define CP_PACKET0_GET_REG(h) (((h) & 0x1FFF) << 2)
-#define CP_PACKET0_GET_ONE_REG_WR(h) (((h) >> 15) & 1)
-#define CP_PACKET3_GET_OPCODE(h) (((h) >> 8) & 0xFF)
-
 static inline void radeon_ring_write(struct radeon_device *rdev, uint32_t v)
 {
 #if DRM_DEBUG_CODE
@@ -890,6 +895,9 @@ static inline void radeon_ring_write(struct radeon_device *rdev, uint32_t v)
  * ASICs macro.
  */
 #define radeon_init(rdev) (rdev)->asic->init((rdev))
+#define radeon_fini(rdev) (rdev)->asic->fini((rdev))
+#define radeon_resume(rdev) (rdev)->asic->resume((rdev))
+#define radeon_suspend(rdev) (rdev)->asic->suspend((rdev))
 #define radeon_cs_parse(p) rdev->asic->cs_parse((p))
 #define radeon_errata(rdev) (rdev)->asic->errata((rdev))
 #define radeon_vram_info(rdev) (rdev)->asic->vram_info((rdev))
@@ -905,7 +913,11 @@ static inline void radeon_ring_write(struct radeon_device *rdev, uint32_t v)
 #define radeon_cp_init(rdev,rsize) (rdev)->asic->cp_init((rdev), (rsize))
 #define radeon_cp_fini(rdev) (rdev)->asic->cp_fini((rdev))
 #define radeon_cp_disable(rdev) (rdev)->asic->cp_disable((rdev))
+#define radeon_cp_commit(rdev) (rdev)->asic->cp_commit((rdev))
 #define radeon_ring_start(rdev) (rdev)->asic->ring_start((rdev))
+#define radeon_ring_test(rdev) (rdev)->asic->ring_test((rdev))
+#define radeon_ring_ib_execute(rdev, ib) (rdev)->asic->ring_ib_execute((rdev), (ib))
+#define radeon_ib_test(rdev) (rdev)->asic->ib_test((rdev))
 #define radeon_irq_set(rdev) (rdev)->asic->irq_set((rdev))
 #define radeon_irq_process(rdev) (rdev)->asic->irq_process((rdev))
 #define radeon_get_vblank_counter(rdev, crtc) (rdev)->asic->get_vblank_counter((rdev), (crtc))
index c9cbd8a..e87bb91 100644 (file)
@@ -60,6 +60,7 @@ int r100_pci_gart_set_page(struct radeon_device *rdev, int i, uint64_t addr);
 int r100_cp_init(struct radeon_device *rdev, unsigned ring_size);
 void r100_cp_fini(struct radeon_device *rdev);
 void r100_cp_disable(struct radeon_device *rdev);
+void r100_cp_commit(struct radeon_device *rdev);
 void r100_ring_start(struct radeon_device *rdev);
 int r100_irq_set(struct radeon_device *rdev);
 int r100_irq_process(struct radeon_device *rdev);
@@ -78,6 +79,9 @@ int r100_set_surface_reg(struct radeon_device *rdev, int reg,
                         uint32_t offset, uint32_t obj_size);
 int r100_clear_surface_reg(struct radeon_device *rdev, int reg);
 void r100_bandwidth_update(struct radeon_device *rdev);
+void r100_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib);
+int r100_ib_test(struct radeon_device *rdev);
+int r100_ring_test(struct radeon_device *rdev);
 
 static struct radeon_asic r100_asic = {
        .init = &r100_init,
@@ -95,7 +99,11 @@ static struct radeon_asic r100_asic = {
        .cp_init = &r100_cp_init,
        .cp_fini = &r100_cp_fini,
        .cp_disable = &r100_cp_disable,
+       .cp_commit = &r100_cp_commit,
        .ring_start = &r100_ring_start,
+       .ring_test = &r100_ring_test,
+       .ring_ib_execute = &r100_ring_ib_execute,
+       .ib_test = &r100_ib_test,
        .irq_set = &r100_irq_set,
        .irq_process = &r100_irq_process,
        .get_vblank_counter = &r100_get_vblank_counter,
@@ -156,7 +164,11 @@ static struct radeon_asic r300_asic = {
        .cp_init = &r100_cp_init,
        .cp_fini = &r100_cp_fini,
        .cp_disable = &r100_cp_disable,
+       .cp_commit = &r100_cp_commit,
        .ring_start = &r300_ring_start,
+       .ring_test = &r100_ring_test,
+       .ring_ib_execute = &r100_ring_ib_execute,
+       .ib_test = &r100_ib_test,
        .irq_set = &r100_irq_set,
        .irq_process = &r100_irq_process,
        .get_vblank_counter = &r100_get_vblank_counter,
@@ -197,7 +209,11 @@ static struct radeon_asic r420_asic = {
        .cp_init = &r100_cp_init,
        .cp_fini = &r100_cp_fini,
        .cp_disable = &r100_cp_disable,
+       .cp_commit = &r100_cp_commit,
        .ring_start = &r300_ring_start,
+       .ring_test = &r100_ring_test,
+       .ring_ib_execute = &r100_ring_ib_execute,
+       .ib_test = &r100_ib_test,
        .irq_set = &r100_irq_set,
        .irq_process = &r100_irq_process,
        .get_vblank_counter = &r100_get_vblank_counter,
@@ -245,7 +261,11 @@ static struct radeon_asic rs400_asic = {
        .cp_init = &r100_cp_init,
        .cp_fini = &r100_cp_fini,
        .cp_disable = &r100_cp_disable,
+       .cp_commit = &r100_cp_commit,
        .ring_start = &r300_ring_start,
+       .ring_test = &r100_ring_test,
+       .ring_ib_execute = &r100_ring_ib_execute,
+       .ib_test = &r100_ib_test,
        .irq_set = &r100_irq_set,
        .irq_process = &r100_irq_process,
        .get_vblank_counter = &r100_get_vblank_counter,
@@ -298,7 +318,11 @@ static struct radeon_asic rs600_asic = {
        .cp_init = &r100_cp_init,
        .cp_fini = &r100_cp_fini,
        .cp_disable = &r100_cp_disable,
+       .cp_commit = &r100_cp_commit,
        .ring_start = &r300_ring_start,
+       .ring_test = &r100_ring_test,
+       .ring_ib_execute = &r100_ring_ib_execute,
+       .ib_test = &r100_ib_test,
        .irq_set = &rs600_irq_set,
        .irq_process = &rs600_irq_process,
        .get_vblank_counter = &rs600_get_vblank_counter,
@@ -341,7 +365,11 @@ static struct radeon_asic rs690_asic = {
        .cp_init = &r100_cp_init,
        .cp_fini = &r100_cp_fini,
        .cp_disable = &r100_cp_disable,
+       .cp_commit = &r100_cp_commit,
        .ring_start = &r300_ring_start,
+       .ring_test = &r100_ring_test,
+       .ring_ib_execute = &r100_ring_ib_execute,
+       .ib_test = &r100_ib_test,
        .irq_set = &rs600_irq_set,
        .irq_process = &rs600_irq_process,
        .get_vblank_counter = &rs600_get_vblank_counter,
@@ -391,7 +419,11 @@ static struct radeon_asic rv515_asic = {
        .cp_init = &r100_cp_init,
        .cp_fini = &r100_cp_fini,
        .cp_disable = &r100_cp_disable,
+       .cp_commit = &r100_cp_commit,
        .ring_start = &rv515_ring_start,
+       .ring_test = &r100_ring_test,
+       .ring_ib_execute = &r100_ring_ib_execute,
+       .ib_test = &r100_ib_test,
        .irq_set = &rs600_irq_set,
        .irq_process = &rs600_irq_process,
        .get_vblank_counter = &rs600_get_vblank_counter,
@@ -434,7 +466,11 @@ static struct radeon_asic r520_asic = {
        .cp_init = &r100_cp_init,
        .cp_fini = &r100_cp_fini,
        .cp_disable = &r100_cp_disable,
+       .cp_commit = &r100_cp_commit,
        .ring_start = &rv515_ring_start,
+       .ring_test = &r100_ring_test,
+       .ring_ib_execute = &r100_ring_ib_execute,
+       .ib_test = &r100_ib_test,
        .irq_set = &rs600_irq_set,
        .irq_process = &rs600_irq_process,
        .get_vblank_counter = &rs600_get_vblank_counter,
@@ -453,9 +489,127 @@ static struct radeon_asic r520_asic = {
 };
 
 /*
- * r600,rv610,rv630,rv620,rv635,rv670,rs780,rv770,rv730,rv710
+ * r600,rv610,rv630,rv620,rv635,rv670,rs780,rs880
  */
+int r600_init(struct radeon_device *rdev);
+void r600_fini(struct radeon_device *rdev);
+int r600_suspend(struct radeon_device *rdev);
+int r600_resume(struct radeon_device *rdev);
+int r600_wb_init(struct radeon_device *rdev);
+void r600_wb_fini(struct radeon_device *rdev);
+void r600_cp_commit(struct radeon_device *rdev);
+void r600_pcie_gart_tlb_flush(struct radeon_device *rdev);
 uint32_t r600_pciep_rreg(struct radeon_device *rdev, uint32_t reg);
 void r600_pciep_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v);
+int r600_cs_parse(struct radeon_cs_parser *p);
+void r600_fence_ring_emit(struct radeon_device *rdev,
+                         struct radeon_fence *fence);
+int r600_copy_dma(struct radeon_device *rdev,
+                 uint64_t src_offset,
+                 uint64_t dst_offset,
+                 unsigned num_pages,
+                 struct radeon_fence *fence);
+int r600_irq_process(struct radeon_device *rdev);
+int r600_irq_set(struct radeon_device *rdev);
+int r600_gpu_reset(struct radeon_device *rdev);
+int r600_set_surface_reg(struct radeon_device *rdev, int reg,
+                        uint32_t tiling_flags, uint32_t pitch,
+                        uint32_t offset, uint32_t obj_size);
+int r600_clear_surface_reg(struct radeon_device *rdev, int reg);
+void r600_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib);
+int r600_ib_test(struct radeon_device *rdev);
+int r600_ring_test(struct radeon_device *rdev);
+int r600_copy_blit(struct radeon_device *rdev,
+                  uint64_t src_offset, uint64_t dst_offset,
+                  unsigned num_pages, struct radeon_fence *fence);
+
+static struct radeon_asic r600_asic = {
+       .errata = NULL,
+       .init = &r600_init,
+       .fini = &r600_fini,
+       .suspend = &r600_suspend,
+       .resume = &r600_resume,
+       .cp_commit = &r600_cp_commit,
+       .vram_info = NULL,
+       .gpu_reset = &r600_gpu_reset,
+       .mc_init = NULL,
+       .mc_fini = NULL,
+       .wb_init = &r600_wb_init,
+       .wb_fini = &r600_wb_fini,
+       .gart_enable = NULL,
+       .gart_disable = NULL,
+       .gart_tlb_flush = &r600_pcie_gart_tlb_flush,
+       .gart_set_page = &rs600_gart_set_page,
+       .cp_init = NULL,
+       .cp_fini = NULL,
+       .cp_disable = NULL,
+       .ring_start = NULL,
+       .ring_test = &r600_ring_test,
+       .ring_ib_execute = &r600_ring_ib_execute,
+       .ib_test = &r600_ib_test,
+       .irq_set = &r600_irq_set,
+       .irq_process = &r600_irq_process,
+       .fence_ring_emit = &r600_fence_ring_emit,
+       .cs_parse = &r600_cs_parse,
+       .copy_blit = &r600_copy_blit,
+       .copy_dma = &r600_copy_blit,
+       .copy = NULL,
+       .set_engine_clock = &radeon_atom_set_engine_clock,
+       .set_memory_clock = &radeon_atom_set_memory_clock,
+       .set_pcie_lanes = NULL,
+       .set_clock_gating = &radeon_atom_set_clock_gating,
+       .set_surface_reg = r600_set_surface_reg,
+       .clear_surface_reg = r600_clear_surface_reg,
+       .bandwidth_update = &r520_bandwidth_update,
+};
+
+/*
+ * rv770,rv730,rv710,rv740
+ */
+int rv770_init(struct radeon_device *rdev);
+void rv770_fini(struct radeon_device *rdev);
+int rv770_suspend(struct radeon_device *rdev);
+int rv770_resume(struct radeon_device *rdev);
+int rv770_gpu_reset(struct radeon_device *rdev);
+
+static struct radeon_asic rv770_asic = {
+       .errata = NULL,
+       .init = &rv770_init,
+       .fini = &rv770_fini,
+       .suspend = &rv770_suspend,
+       .resume = &rv770_resume,
+       .cp_commit = &r600_cp_commit,
+       .vram_info = NULL,
+       .gpu_reset = &rv770_gpu_reset,
+       .mc_init = NULL,
+       .mc_fini = NULL,
+       .wb_init = &r600_wb_init,
+       .wb_fini = &r600_wb_fini,
+       .gart_enable = NULL,
+       .gart_disable = NULL,
+       .gart_tlb_flush = &r600_pcie_gart_tlb_flush,
+       .gart_set_page = &rs600_gart_set_page,
+       .cp_init = NULL,
+       .cp_fini = NULL,
+       .cp_disable = NULL,
+       .ring_start = NULL,
+       .ring_test = &r600_ring_test,
+       .ring_ib_execute = &r600_ring_ib_execute,
+       .ib_test = &r600_ib_test,
+       .irq_set = &r600_irq_set,
+       .irq_process = &r600_irq_process,
+       .fence_ring_emit = &r600_fence_ring_emit,
+       .cs_parse = &r600_cs_parse,
+       .copy_blit = &r600_copy_blit,
+       .copy_dma = &r600_copy_blit,
+       .copy = NULL,
+       .set_engine_clock = &radeon_atom_set_engine_clock,
+       .set_memory_clock = &radeon_atom_set_memory_clock,
+       .set_pcie_lanes = NULL,
+       .set_clock_gating = &radeon_atom_set_clock_gating,
+       .set_surface_reg = r600_set_surface_reg,
+       .clear_surface_reg = r600_clear_surface_reg,
+       .bandwidth_update = &r520_bandwidth_update,
+};
 
 #endif
index bba9b4b..a8fb392 100644 (file)
@@ -370,10 +370,6 @@ bool radeon_get_atom_connector_info_from_object_table(struct drm_device *dev)
                                                       && record->
                                                       ucRecordType <=
                                                       ATOM_MAX_OBJECT_RECORD_NUMBER) {
-                                                       DRM_ERROR
-                                                           ("record type %d\n",
-                                                            record->
-                                                            ucRecordType);
                                                        switch (record->
                                                                ucRecordType) {
                                                        case ATOM_I2C_RECORD_TYPE:
index a37cbce..152eef1 100644 (file)
@@ -102,10 +102,12 @@ void radeon_get_clock_info(struct drm_device *dev)
                        p1pll->reference_div = 12;
                if (p2pll->reference_div < 2)
                        p2pll->reference_div = 12;
-               if (spll->reference_div < 2)
-                       spll->reference_div =
-                           RREG32_PLL(RADEON_M_SPLL_REF_FB_DIV) &
-                           RADEON_M_SPLL_REF_DIV_MASK;
+               if (rdev->family < CHIP_RS600) {
+                       if (spll->reference_div < 2)
+                               spll->reference_div =
+                                       RREG32_PLL(RADEON_M_SPLL_REF_FB_DIV) &
+                                       RADEON_M_SPLL_REF_DIV_MASK;
+               }
                if (mpll->reference_div < 2)
                        mpll->reference_div = spll->reference_div;
        } else {
index 7693f7c..f2469c5 100644 (file)
@@ -37,7 +37,7 @@
 /*
  * Clear GPU surface registers.
  */
-static void radeon_surface_init(struct radeon_device *rdev)
+void radeon_surface_init(struct radeon_device *rdev)
 {
        /* FIXME: check this out */
        if (rdev->family < CHIP_R600) {
@@ -56,7 +56,7 @@ static void radeon_surface_init(struct radeon_device *rdev)
 /*
  * GPU scratch registers helpers function.
  */
-static void radeon_scratch_init(struct radeon_device *rdev)
+void radeon_scratch_init(struct radeon_device *rdev)
 {
        int i;
 
@@ -156,16 +156,14 @@ int radeon_mc_setup(st