drm/radeon/kms: fix VRAM sizing like DDX does it.
[linux-2.6.git] / drivers / gpu / drm / radeon / r300.c
1 /*
2  * Copyright 2008 Advanced Micro Devices, Inc.
3  * Copyright 2008 Red Hat Inc.
4  * Copyright 2009 Jerome Glisse.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22  * OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors: Dave Airlie
25  *          Alex Deucher
26  *          Jerome Glisse
27  */
28 #include <linux/seq_file.h>
29 #include "drmP.h"
30 #include "drm.h"
31 #include "radeon_reg.h"
32 #include "radeon.h"
33
34 /* r300,r350,rv350,rv370,rv380 depends on : */
35 void r100_hdp_reset(struct radeon_device *rdev);
36 int r100_cp_reset(struct radeon_device *rdev);
37 int r100_rb2d_reset(struct radeon_device *rdev);
38 int r100_cp_init(struct radeon_device *rdev, unsigned ring_size);
39 int r100_pci_gart_enable(struct radeon_device *rdev);
40 void r100_pci_gart_disable(struct radeon_device *rdev);
41 void r100_mc_setup(struct radeon_device *rdev);
42 void r100_mc_disable_clients(struct radeon_device *rdev);
43 int r100_gui_wait_for_idle(struct radeon_device *rdev);
44 int r100_cs_packet_parse(struct radeon_cs_parser *p,
45                          struct radeon_cs_packet *pkt,
46                          unsigned idx);
47 int r100_cs_packet_parse_vline(struct radeon_cs_parser *p);
48 int r100_cs_packet_next_reloc(struct radeon_cs_parser *p,
49                               struct radeon_cs_reloc **cs_reloc);
50 int r100_cs_parse_packet0(struct radeon_cs_parser *p,
51                           struct radeon_cs_packet *pkt,
52                           const unsigned *auth, unsigned n,
53                           radeon_packet0_check_t check);
54 void r100_cs_dump_packet(struct radeon_cs_parser *p,
55                          struct radeon_cs_packet *pkt);
56 int r100_cs_track_check_pkt3_indx_buffer(struct radeon_cs_parser *p,
57                                          struct radeon_cs_packet *pkt,
58                                          struct radeon_object *robj);
59
60 /* This files gather functions specifics to:
61  * r300,r350,rv350,rv370,rv380
62  *
63  * Some of these functions might be used by newer ASICs.
64  */
65 void r300_gpu_init(struct radeon_device *rdev);
66 int r300_mc_wait_for_idle(struct radeon_device *rdev);
67 int rv370_debugfs_pcie_gart_info_init(struct radeon_device *rdev);
68
69
70 /*
71  * rv370,rv380 PCIE GART
72  */
73 void rv370_pcie_gart_tlb_flush(struct radeon_device *rdev)
74 {
75         uint32_t tmp;
76         int i;
77
78         /* Workaround HW bug do flush 2 times */
79         for (i = 0; i < 2; i++) {
80                 tmp = RREG32_PCIE(RADEON_PCIE_TX_GART_CNTL);
81                 WREG32_PCIE(RADEON_PCIE_TX_GART_CNTL, tmp | RADEON_PCIE_TX_GART_INVALIDATE_TLB);
82                 (void)RREG32_PCIE(RADEON_PCIE_TX_GART_CNTL);
83                 WREG32_PCIE(RADEON_PCIE_TX_GART_CNTL, tmp);
84                 mb();
85         }
86 }
87
88 int rv370_pcie_gart_enable(struct radeon_device *rdev)
89 {
90         uint32_t table_addr;
91         uint32_t tmp;
92         int r;
93
94         /* Initialize common gart structure */
95         r = radeon_gart_init(rdev);
96         if (r) {
97                 return r;
98         }
99         r = rv370_debugfs_pcie_gart_info_init(rdev);
100         if (r) {
101                 DRM_ERROR("Failed to register debugfs file for PCIE gart !\n");
102         }
103         rdev->gart.table_size = rdev->gart.num_gpu_pages * 4;
104         r = radeon_gart_table_vram_alloc(rdev);
105         if (r) {
106                 return r;
107         }
108         /* discard memory request outside of configured range */
109         tmp = RADEON_PCIE_TX_GART_UNMAPPED_ACCESS_DISCARD;
110         WREG32_PCIE(RADEON_PCIE_TX_GART_CNTL, tmp);
111         WREG32_PCIE(RADEON_PCIE_TX_GART_START_LO, rdev->mc.gtt_location);
112         tmp = rdev->mc.gtt_location + rdev->mc.gtt_size - 4096;
113         WREG32_PCIE(RADEON_PCIE_TX_GART_END_LO, tmp);
114         WREG32_PCIE(RADEON_PCIE_TX_GART_START_HI, 0);
115         WREG32_PCIE(RADEON_PCIE_TX_GART_END_HI, 0);
116         table_addr = rdev->gart.table_addr;
117         WREG32_PCIE(RADEON_PCIE_TX_GART_BASE, table_addr);
118         /* FIXME: setup default page */
119         WREG32_PCIE(RADEON_PCIE_TX_DISCARD_RD_ADDR_LO, rdev->mc.vram_location);
120         WREG32_PCIE(RADEON_PCIE_TX_DISCARD_RD_ADDR_HI, 0);
121         /* Clear error */
122         WREG32_PCIE(0x18, 0);
123         tmp = RREG32_PCIE(RADEON_PCIE_TX_GART_CNTL);
124         tmp |= RADEON_PCIE_TX_GART_EN;
125         tmp |= RADEON_PCIE_TX_GART_UNMAPPED_ACCESS_DISCARD;
126         WREG32_PCIE(RADEON_PCIE_TX_GART_CNTL, tmp);
127         rv370_pcie_gart_tlb_flush(rdev);
128         DRM_INFO("PCIE GART of %uM enabled (table at 0x%08X).\n",
129                  rdev->mc.gtt_size >> 20, table_addr);
130         rdev->gart.ready = true;
131         return 0;
132 }
133
134 void rv370_pcie_gart_disable(struct radeon_device *rdev)
135 {
136         uint32_t tmp;
137
138         tmp = RREG32_PCIE(RADEON_PCIE_TX_GART_CNTL);
139         tmp |= RADEON_PCIE_TX_GART_UNMAPPED_ACCESS_DISCARD;
140         WREG32_PCIE(RADEON_PCIE_TX_GART_CNTL, tmp & ~RADEON_PCIE_TX_GART_EN);
141         if (rdev->gart.table.vram.robj) {
142                 radeon_object_kunmap(rdev->gart.table.vram.robj);
143                 radeon_object_unpin(rdev->gart.table.vram.robj);
144         }
145 }
146
147 int rv370_pcie_gart_set_page(struct radeon_device *rdev, int i, uint64_t addr)
148 {
149         void __iomem *ptr = (void *)rdev->gart.table.vram.ptr;
150
151         if (i < 0 || i > rdev->gart.num_gpu_pages) {
152                 return -EINVAL;
153         }
154         addr = (lower_32_bits(addr) >> 8) |
155                ((upper_32_bits(addr) & 0xff) << 24) |
156                0xc;
157         /* on x86 we want this to be CPU endian, on powerpc
158          * on powerpc without HW swappers, it'll get swapped on way
159          * into VRAM - so no need for cpu_to_le32 on VRAM tables */
160         writel(addr, ((void __iomem *)ptr) + (i * 4));
161         return 0;
162 }
163
164 int r300_gart_enable(struct radeon_device *rdev)
165 {
166 #if __OS_HAS_AGP
167         if (rdev->flags & RADEON_IS_AGP) {
168                 if (rdev->family > CHIP_RV350) {
169                         rv370_pcie_gart_disable(rdev);
170                 } else {
171                         r100_pci_gart_disable(rdev);
172                 }
173                 return 0;
174         }
175 #endif
176         if (rdev->flags & RADEON_IS_PCIE) {
177                 rdev->asic->gart_disable = &rv370_pcie_gart_disable;
178                 rdev->asic->gart_tlb_flush = &rv370_pcie_gart_tlb_flush;
179                 rdev->asic->gart_set_page = &rv370_pcie_gart_set_page;
180                 return rv370_pcie_gart_enable(rdev);
181         }
182         return r100_pci_gart_enable(rdev);
183 }
184
185
186 /*
187  * MC
188  */
189 int r300_mc_init(struct radeon_device *rdev)
190 {
191         int r;
192
193         if (r100_debugfs_rbbm_init(rdev)) {
194                 DRM_ERROR("Failed to register debugfs file for RBBM !\n");
195         }
196
197         r300_gpu_init(rdev);
198         r100_pci_gart_disable(rdev);
199         if (rdev->flags & RADEON_IS_PCIE) {
200                 rv370_pcie_gart_disable(rdev);
201         }
202
203         /* Setup GPU memory space */
204         rdev->mc.vram_location = 0xFFFFFFFFUL;
205         rdev->mc.gtt_location = 0xFFFFFFFFUL;
206         if (rdev->flags & RADEON_IS_AGP) {
207                 r = radeon_agp_init(rdev);
208                 if (r) {
209                         printk(KERN_WARNING "[drm] Disabling AGP\n");
210                         rdev->flags &= ~RADEON_IS_AGP;
211                         rdev->mc.gtt_size = radeon_gart_size * 1024 * 1024;
212                 } else {
213                         rdev->mc.gtt_location = rdev->mc.agp_base;
214                 }
215         }
216         r = radeon_mc_setup(rdev);
217         if (r) {
218                 return r;
219         }
220
221         /* Program GPU memory space */
222         r100_mc_disable_clients(rdev);
223         if (r300_mc_wait_for_idle(rdev)) {
224                 printk(KERN_WARNING "Failed to wait MC idle while "
225                        "programming pipes. Bad things might happen.\n");
226         }
227         r100_mc_setup(rdev);
228         return 0;
229 }
230
231 void r300_mc_fini(struct radeon_device *rdev)
232 {
233         if (rdev->flags & RADEON_IS_PCIE) {
234                 rv370_pcie_gart_disable(rdev);
235                 radeon_gart_table_vram_free(rdev);
236         } else {
237                 r100_pci_gart_disable(rdev);
238                 radeon_gart_table_ram_free(rdev);
239         }
240         radeon_gart_fini(rdev);
241 }
242
243
244 /*
245  * Fence emission
246  */
247 void r300_fence_ring_emit(struct radeon_device *rdev,
248                           struct radeon_fence *fence)
249 {
250         /* Who ever call radeon_fence_emit should call ring_lock and ask
251          * for enough space (today caller are ib schedule and buffer move) */
252         /* Write SC register so SC & US assert idle */
253         radeon_ring_write(rdev, PACKET0(0x43E0, 0));
254         radeon_ring_write(rdev, 0);
255         radeon_ring_write(rdev, PACKET0(0x43E4, 0));
256         radeon_ring_write(rdev, 0);
257         /* Flush 3D cache */
258         radeon_ring_write(rdev, PACKET0(0x4E4C, 0));
259         radeon_ring_write(rdev, (2 << 0));
260         radeon_ring_write(rdev, PACKET0(0x4F18, 0));
261         radeon_ring_write(rdev, (1 << 0));
262         /* Wait until IDLE & CLEAN */
263         radeon_ring_write(rdev, PACKET0(0x1720, 0));
264         radeon_ring_write(rdev, (1 << 17) | (1 << 16)  | (1 << 9));
265         /* Emit fence sequence & fire IRQ */
266         radeon_ring_write(rdev, PACKET0(rdev->fence_drv.scratch_reg, 0));
267         radeon_ring_write(rdev, fence->seq);
268         radeon_ring_write(rdev, PACKET0(RADEON_GEN_INT_STATUS, 0));
269         radeon_ring_write(rdev, RADEON_SW_INT_FIRE);
270 }
271
272
273 /*
274  * Global GPU functions
275  */
276 int r300_copy_dma(struct radeon_device *rdev,
277                   uint64_t src_offset,
278                   uint64_t dst_offset,
279                   unsigned num_pages,
280                   struct radeon_fence *fence)
281 {
282         uint32_t size;
283         uint32_t cur_size;
284         int i, num_loops;
285         int r = 0;
286
287         /* radeon pitch is /64 */
288         size = num_pages << PAGE_SHIFT;
289         num_loops = DIV_ROUND_UP(size, 0x1FFFFF);
290         r = radeon_ring_lock(rdev, num_loops * 4 + 64);
291         if (r) {
292                 DRM_ERROR("radeon: moving bo (%d).\n", r);
293                 return r;
294         }
295         /* Must wait for 2D idle & clean before DMA or hangs might happen */
296         radeon_ring_write(rdev, PACKET0(RADEON_WAIT_UNTIL, 0 ));
297         radeon_ring_write(rdev, (1 << 16));
298         for (i = 0; i < num_loops; i++) {
299                 cur_size = size;
300                 if (cur_size > 0x1FFFFF) {
301                         cur_size = 0x1FFFFF;
302                 }
303                 size -= cur_size;
304                 radeon_ring_write(rdev, PACKET0(0x720, 2));
305                 radeon_ring_write(rdev, src_offset);
306                 radeon_ring_write(rdev, dst_offset);
307                 radeon_ring_write(rdev, cur_size | (1 << 31) | (1 << 30));
308                 src_offset += cur_size;
309                 dst_offset += cur_size;
310         }
311         radeon_ring_write(rdev, PACKET0(RADEON_WAIT_UNTIL, 0));
312         radeon_ring_write(rdev, RADEON_WAIT_DMA_GUI_IDLE);
313         if (fence) {
314                 r = radeon_fence_emit(rdev, fence);
315         }
316         radeon_ring_unlock_commit(rdev);
317         return r;
318 }
319
320 void r300_ring_start(struct radeon_device *rdev)
321 {
322         unsigned gb_tile_config;
323         int r;
324
325         /* Sub pixel 1/12 so we can have 4K rendering according to doc */
326         gb_tile_config = (R300_ENABLE_TILING | R300_TILE_SIZE_16);
327         switch(rdev->num_gb_pipes) {
328         case 2:
329                 gb_tile_config |= R300_PIPE_COUNT_R300;
330                 break;
331         case 3:
332                 gb_tile_config |= R300_PIPE_COUNT_R420_3P;
333                 break;
334         case 4:
335                 gb_tile_config |= R300_PIPE_COUNT_R420;
336                 break;
337         case 1:
338         default:
339                 gb_tile_config |= R300_PIPE_COUNT_RV350;
340                 break;
341         }
342
343         r = radeon_ring_lock(rdev, 64);
344         if (r) {
345                 return;
346         }
347         radeon_ring_write(rdev, PACKET0(RADEON_ISYNC_CNTL, 0));
348         radeon_ring_write(rdev,
349                           RADEON_ISYNC_ANY2D_IDLE3D |
350                           RADEON_ISYNC_ANY3D_IDLE2D |
351                           RADEON_ISYNC_WAIT_IDLEGUI |
352                           RADEON_ISYNC_CPSCRATCH_IDLEGUI);
353         radeon_ring_write(rdev, PACKET0(R300_GB_TILE_CONFIG, 0));
354         radeon_ring_write(rdev, gb_tile_config);
355         radeon_ring_write(rdev, PACKET0(RADEON_WAIT_UNTIL, 0));
356         radeon_ring_write(rdev,
357                           RADEON_WAIT_2D_IDLECLEAN |
358                           RADEON_WAIT_3D_IDLECLEAN);
359         radeon_ring_write(rdev, PACKET0(0x170C, 0));
360         radeon_ring_write(rdev, 1 << 31);
361         radeon_ring_write(rdev, PACKET0(R300_GB_SELECT, 0));
362         radeon_ring_write(rdev, 0);
363         radeon_ring_write(rdev, PACKET0(R300_GB_ENABLE, 0));
364         radeon_ring_write(rdev, 0);
365         radeon_ring_write(rdev, PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0));
366         radeon_ring_write(rdev, R300_RB3D_DC_FLUSH | R300_RB3D_DC_FREE);
367         radeon_ring_write(rdev, PACKET0(R300_RB3D_ZCACHE_CTLSTAT, 0));
368         radeon_ring_write(rdev, R300_ZC_FLUSH | R300_ZC_FREE);
369         radeon_ring_write(rdev, PACKET0(RADEON_WAIT_UNTIL, 0));
370         radeon_ring_write(rdev,
371                           RADEON_WAIT_2D_IDLECLEAN |
372                           RADEON_WAIT_3D_IDLECLEAN);
373         radeon_ring_write(rdev, PACKET0(R300_GB_AA_CONFIG, 0));
374         radeon_ring_write(rdev, 0);
375         radeon_ring_write(rdev, PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0));
376         radeon_ring_write(rdev, R300_RB3D_DC_FLUSH | R300_RB3D_DC_FREE);
377         radeon_ring_write(rdev, PACKET0(R300_RB3D_ZCACHE_CTLSTAT, 0));
378         radeon_ring_write(rdev, R300_ZC_FLUSH | R300_ZC_FREE);
379         radeon_ring_write(rdev, PACKET0(R300_GB_MSPOS0, 0));
380         radeon_ring_write(rdev,
381                           ((6 << R300_MS_X0_SHIFT) |
382                            (6 << R300_MS_Y0_SHIFT) |
383                            (6 << R300_MS_X1_SHIFT) |
384                            (6 << R300_MS_Y1_SHIFT) |
385                            (6 << R300_MS_X2_SHIFT) |
386                            (6 << R300_MS_Y2_SHIFT) |
387                            (6 << R300_MSBD0_Y_SHIFT) |
388                            (6 << R300_MSBD0_X_SHIFT)));
389         radeon_ring_write(rdev, PACKET0(R300_GB_MSPOS1, 0));
390         radeon_ring_write(rdev,
391                           ((6 << R300_MS_X3_SHIFT) |
392                            (6 << R300_MS_Y3_SHIFT) |
393                            (6 << R300_MS_X4_SHIFT) |
394                            (6 << R300_MS_Y4_SHIFT) |
395                            (6 << R300_MS_X5_SHIFT) |
396                            (6 << R300_MS_Y5_SHIFT) |
397                            (6 << R300_MSBD1_SHIFT)));
398         radeon_ring_write(rdev, PACKET0(R300_GA_ENHANCE, 0));
399         radeon_ring_write(rdev, R300_GA_DEADLOCK_CNTL | R300_GA_FASTSYNC_CNTL);
400         radeon_ring_write(rdev, PACKET0(R300_GA_POLY_MODE, 0));
401         radeon_ring_write(rdev,
402                           R300_FRONT_PTYPE_TRIANGE | R300_BACK_PTYPE_TRIANGE);
403         radeon_ring_write(rdev, PACKET0(R300_GA_ROUND_MODE, 0));
404         radeon_ring_write(rdev,
405                           R300_GEOMETRY_ROUND_NEAREST |
406                           R300_COLOR_ROUND_NEAREST);
407         radeon_ring_unlock_commit(rdev);
408 }
409
410 void r300_errata(struct radeon_device *rdev)
411 {
412         rdev->pll_errata = 0;
413
414         if (rdev->family == CHIP_R300 &&
415             (RREG32(RADEON_CONFIG_CNTL) & RADEON_CFG_ATI_REV_ID_MASK) == RADEON_CFG_ATI_REV_A11) {
416                 rdev->pll_errata |= CHIP_ERRATA_R300_CG;
417         }
418 }
419
420 int r300_mc_wait_for_idle(struct radeon_device *rdev)
421 {
422         unsigned i;
423         uint32_t tmp;
424
425         for (i = 0; i < rdev->usec_timeout; i++) {
426                 /* read MC_STATUS */
427                 tmp = RREG32(0x0150);
428                 if (tmp & (1 << 4)) {
429                         return 0;
430                 }
431                 DRM_UDELAY(1);
432         }
433         return -1;
434 }
435
436 void r300_gpu_init(struct radeon_device *rdev)
437 {
438         uint32_t gb_tile_config, tmp;
439
440         r100_hdp_reset(rdev);
441         /* FIXME: rv380 one pipes ? */
442         if ((rdev->family == CHIP_R300) || (rdev->family == CHIP_R350)) {
443                 /* r300,r350 */
444                 rdev->num_gb_pipes = 2;
445         } else {
446                 /* rv350,rv370,rv380 */
447                 rdev->num_gb_pipes = 1;
448         }
449         gb_tile_config = (R300_ENABLE_TILING | R300_TILE_SIZE_16);
450         switch (rdev->num_gb_pipes) {
451         case 2:
452                 gb_tile_config |= R300_PIPE_COUNT_R300;
453                 break;
454         case 3:
455                 gb_tile_config |= R300_PIPE_COUNT_R420_3P;
456                 break;
457         case 4:
458                 gb_tile_config |= R300_PIPE_COUNT_R420;
459                 break;
460         default:
461         case 1:
462                 gb_tile_config |= R300_PIPE_COUNT_RV350;
463                 break;
464         }
465         WREG32(R300_GB_TILE_CONFIG, gb_tile_config);
466
467         if (r100_gui_wait_for_idle(rdev)) {
468                 printk(KERN_WARNING "Failed to wait GUI idle while "
469                        "programming pipes. Bad things might happen.\n");
470         }
471
472         tmp = RREG32(0x170C);
473         WREG32(0x170C, tmp | (1 << 31));
474
475         WREG32(R300_RB2D_DSTCACHE_MODE,
476                R300_DC_AUTOFLUSH_ENABLE |
477                R300_DC_DC_DISABLE_IGNORE_PE);
478
479         if (r100_gui_wait_for_idle(rdev)) {
480                 printk(KERN_WARNING "Failed to wait GUI idle while "
481                        "programming pipes. Bad things might happen.\n");
482         }
483         if (r300_mc_wait_for_idle(rdev)) {
484                 printk(KERN_WARNING "Failed to wait MC idle while "
485                        "programming pipes. Bad things might happen.\n");
486         }
487         DRM_INFO("radeon: %d pipes initialized.\n", rdev->num_gb_pipes);
488 }
489
490 int r300_ga_reset(struct radeon_device *rdev)
491 {
492         uint32_t tmp;
493         bool reinit_cp;
494         int i;
495
496         reinit_cp = rdev->cp.ready;
497         rdev->cp.ready = false;
498         for (i = 0; i < rdev->usec_timeout; i++) {
499                 WREG32(RADEON_CP_CSQ_MODE, 0);
500                 WREG32(RADEON_CP_CSQ_CNTL, 0);
501                 WREG32(RADEON_RBBM_SOFT_RESET, 0x32005);
502                 (void)RREG32(RADEON_RBBM_SOFT_RESET);
503                 udelay(200);
504                 WREG32(RADEON_RBBM_SOFT_RESET, 0);
505                 /* Wait to prevent race in RBBM_STATUS */
506                 mdelay(1);
507                 tmp = RREG32(RADEON_RBBM_STATUS);
508                 if (tmp & ((1 << 20) | (1 << 26))) {
509                         DRM_ERROR("VAP & CP still busy (RBBM_STATUS=0x%08X)", tmp);
510                         /* GA still busy soft reset it */
511                         WREG32(0x429C, 0x200);
512                         WREG32(R300_VAP_PVS_STATE_FLUSH_REG, 0);
513                         WREG32(0x43E0, 0);
514                         WREG32(0x43E4, 0);
515                         WREG32(0x24AC, 0);
516                 }
517                 /* Wait to prevent race in RBBM_STATUS */
518                 mdelay(1);
519                 tmp = RREG32(RADEON_RBBM_STATUS);
520                 if (!(tmp & ((1 << 20) | (1 << 26)))) {
521                         break;
522                 }
523         }
524         for (i = 0; i < rdev->usec_timeout; i++) {
525                 tmp = RREG32(RADEON_RBBM_STATUS);
526                 if (!(tmp & ((1 << 20) | (1 << 26)))) {
527                         DRM_INFO("GA reset succeed (RBBM_STATUS=0x%08X)\n",
528                                  tmp);
529                         if (reinit_cp) {
530                                 return r100_cp_init(rdev, rdev->cp.ring_size);
531                         }
532                         return 0;
533                 }
534                 DRM_UDELAY(1);
535         }
536         tmp = RREG32(RADEON_RBBM_STATUS);
537         DRM_ERROR("Failed to reset GA ! (RBBM_STATUS=0x%08X)\n", tmp);
538         return -1;
539 }
540
541 int r300_gpu_reset(struct radeon_device *rdev)
542 {
543         uint32_t status;
544
545         /* reset order likely matter */
546         status = RREG32(RADEON_RBBM_STATUS);
547         /* reset HDP */
548         r100_hdp_reset(rdev);
549         /* reset rb2d */
550         if (status & ((1 << 17) | (1 << 18) | (1 << 27))) {
551                 r100_rb2d_reset(rdev);
552         }
553         /* reset GA */
554         if (status & ((1 << 20) | (1 << 26))) {
555                 r300_ga_reset(rdev);
556         }
557         /* reset CP */
558         status = RREG32(RADEON_RBBM_STATUS);
559         if (status & (1 << 16)) {
560                 r100_cp_reset(rdev);
561         }
562         /* Check if GPU is idle */
563         status = RREG32(RADEON_RBBM_STATUS);
564         if (status & (1 << 31)) {
565                 DRM_ERROR("Failed to reset GPU (RBBM_STATUS=0x%08X)\n", status);
566                 return -1;
567         }
568         DRM_INFO("GPU reset succeed (RBBM_STATUS=0x%08X)\n", status);
569         return 0;
570 }
571
572
573 /*
574  * r300,r350,rv350,rv380 VRAM info
575  */
576 void r300_vram_info(struct radeon_device *rdev)
577 {
578         uint32_t tmp;
579
580         /* DDR for all card after R300 & IGP */
581         rdev->mc.vram_is_ddr = true;
582         tmp = RREG32(RADEON_MEM_CNTL);
583         if (tmp & R300_MEM_NUM_CHANNELS_MASK) {
584                 rdev->mc.vram_width = 128;
585         } else {
586                 rdev->mc.vram_width = 64;
587         }
588
589         r100_vram_init_sizes(rdev);
590 }
591
592
593 /*
594  * Indirect registers accessor
595  */
596 uint32_t rv370_pcie_rreg(struct radeon_device *rdev, uint32_t reg)
597 {
598         uint32_t r;
599
600         WREG8(RADEON_PCIE_INDEX, ((reg) & 0xff));
601         (void)RREG32(RADEON_PCIE_INDEX);
602         r = RREG32(RADEON_PCIE_DATA);
603         return r;
604 }
605
606 void rv370_pcie_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v)
607 {
608         WREG8(RADEON_PCIE_INDEX, ((reg) & 0xff));
609         (void)RREG32(RADEON_PCIE_INDEX);
610         WREG32(RADEON_PCIE_DATA, (v));
611         (void)RREG32(RADEON_PCIE_DATA);
612 }
613
614 /*
615  * PCIE Lanes
616  */
617
618 void rv370_set_pcie_lanes(struct radeon_device *rdev, int lanes)
619 {
620         uint32_t link_width_cntl, mask;
621
622         if (rdev->flags & RADEON_IS_IGP)
623                 return;
624
625         if (!(rdev->flags & RADEON_IS_PCIE))
626                 return;
627
628         /* FIXME wait for idle */
629
630         switch (lanes) {
631         case 0:
632                 mask = RADEON_PCIE_LC_LINK_WIDTH_X0;
633                 break;
634         case 1:
635                 mask = RADEON_PCIE_LC_LINK_WIDTH_X1;
636                 break;
637         case 2:
638                 mask = RADEON_PCIE_LC_LINK_WIDTH_X2;
639                 break;
640         case 4:
641                 mask = RADEON_PCIE_LC_LINK_WIDTH_X4;
642                 break;
643         case 8:
644                 mask = RADEON_PCIE_LC_LINK_WIDTH_X8;
645                 break;
646         case 12:
647                 mask = RADEON_PCIE_LC_LINK_WIDTH_X12;
648                 break;
649         case 16:
650         default:
651                 mask = RADEON_PCIE_LC_LINK_WIDTH_X16;
652                 break;
653         }
654
655         link_width_cntl = RREG32_PCIE(RADEON_PCIE_LC_LINK_WIDTH_CNTL);
656
657         if ((link_width_cntl & RADEON_PCIE_LC_LINK_WIDTH_RD_MASK) ==
658             (mask << RADEON_PCIE_LC_LINK_WIDTH_RD_SHIFT))
659                 return;
660
661         link_width_cntl &= ~(RADEON_PCIE_LC_LINK_WIDTH_MASK |
662                              RADEON_PCIE_LC_RECONFIG_NOW |
663                              RADEON_PCIE_LC_RECONFIG_LATER |
664                              RADEON_PCIE_LC_SHORT_RECONFIG_EN);
665         link_width_cntl |= mask;
666         WREG32_PCIE(RADEON_PCIE_LC_LINK_WIDTH_CNTL, link_width_cntl);
667         WREG32_PCIE(RADEON_PCIE_LC_LINK_WIDTH_CNTL, (link_width_cntl |
668                                                      RADEON_PCIE_LC_RECONFIG_NOW));
669
670         /* wait for lane set to complete */
671         link_width_cntl = RREG32_PCIE(RADEON_PCIE_LC_LINK_WIDTH_CNTL);
672         while (link_width_cntl == 0xffffffff)
673                 link_width_cntl = RREG32_PCIE(RADEON_PCIE_LC_LINK_WIDTH_CNTL);
674
675 }
676
677
678 /*
679  * Debugfs info
680  */
681 #if defined(CONFIG_DEBUG_FS)
682 static int rv370_debugfs_pcie_gart_info(struct seq_file *m, void *data)
683 {
684         struct drm_info_node *node = (struct drm_info_node *) m->private;
685         struct drm_device *dev = node->minor->dev;
686         struct radeon_device *rdev = dev->dev_private;
687         uint32_t tmp;
688
689         tmp = RREG32_PCIE(RADEON_PCIE_TX_GART_CNTL);
690         seq_printf(m, "PCIE_TX_GART_CNTL 0x%08x\n", tmp);
691         tmp = RREG32_PCIE(RADEON_PCIE_TX_GART_BASE);
692         seq_printf(m, "PCIE_TX_GART_BASE 0x%08x\n", tmp);
693         tmp = RREG32_PCIE(RADEON_PCIE_TX_GART_START_LO);
694         seq_printf(m, "PCIE_TX_GART_START_LO 0x%08x\n", tmp);
695         tmp = RREG32_PCIE(RADEON_PCIE_TX_GART_START_HI);
696         seq_printf(m, "PCIE_TX_GART_START_HI 0x%08x\n", tmp);
697         tmp = RREG32_PCIE(RADEON_PCIE_TX_GART_END_LO);
698         seq_printf(m, "PCIE_TX_GART_END_LO 0x%08x\n", tmp);
699         tmp = RREG32_PCIE(RADEON_PCIE_TX_GART_END_HI);
700         seq_printf(m, "PCIE_TX_GART_END_HI 0x%08x\n", tmp);
701         tmp = RREG32_PCIE(RADEON_PCIE_TX_GART_ERROR);
702         seq_printf(m, "PCIE_TX_GART_ERROR 0x%08x\n", tmp);
703         return 0;
704 }
705
706 static struct drm_info_list rv370_pcie_gart_info_list[] = {
707         {"rv370_pcie_gart_info", rv370_debugfs_pcie_gart_info, 0, NULL},
708 };
709 #endif
710
711 int rv370_debugfs_pcie_gart_info_init(struct radeon_device *rdev)
712 {
713 #if defined(CONFIG_DEBUG_FS)
714         return radeon_debugfs_add_files(rdev, rv370_pcie_gart_info_list, 1);
715 #else
716         return 0;
717 #endif
718 }
719
720
721 /*
722  * CS functions
723  */
724 struct r300_cs_track_cb {
725         struct radeon_object    *robj;
726         unsigned                pitch;
727         unsigned                cpp;
728         unsigned                offset;
729 };
730
731 struct r300_cs_track_array {
732         struct radeon_object    *robj;
733         unsigned                esize;
734 };
735
736 struct r300_cs_track_texture {
737         struct radeon_object    *robj;
738         unsigned                pitch;
739         unsigned                width;
740         unsigned                height;
741         unsigned                num_levels;
742         unsigned                cpp;
743         unsigned                tex_coord_type;
744         unsigned                txdepth;
745         unsigned                width_11;
746         unsigned                height_11;
747         bool                    use_pitch;
748         bool                    enabled;
749         bool                    roundup_w;
750         bool                    roundup_h;
751 };
752
753 struct r300_cs_track {
754         unsigned                        num_cb;
755         unsigned                        maxy;
756         unsigned                        vtx_size;
757         unsigned                        vap_vf_cntl;
758         unsigned                        immd_dwords;
759         unsigned                        num_arrays;
760         unsigned                        max_indx;
761         struct r300_cs_track_array      arrays[11];
762         struct r300_cs_track_cb         cb[4];
763         struct r300_cs_track_cb         zb;
764         struct r300_cs_track_texture    textures[16];
765         bool                            z_enabled;
766 };
767
768 static inline void r300_cs_track_texture_print(struct r300_cs_track_texture *t)
769 {
770         DRM_ERROR("pitch                      %d\n", t->pitch);
771         DRM_ERROR("width                      %d\n", t->width);
772         DRM_ERROR("height                     %d\n", t->height);
773         DRM_ERROR("num levels                 %d\n", t->num_levels);
774         DRM_ERROR("depth                      %d\n", t->txdepth);
775         DRM_ERROR("bpp                        %d\n", t->cpp);
776         DRM_ERROR("coordinate type            %d\n", t->tex_coord_type);
777         DRM_ERROR("width round to power of 2  %d\n", t->roundup_w);
778         DRM_ERROR("height round to power of 2 %d\n", t->roundup_h);
779 }
780
781 static inline int r300_cs_track_texture_check(struct radeon_device *rdev,
782                                               struct r300_cs_track *track)
783 {
784         struct radeon_object *robj;
785         unsigned long size;
786         unsigned u, i, w, h;
787
788         for (u = 0; u < 16; u++) {
789                 if (!track->textures[u].enabled)
790                         continue;
791                 robj = track->textures[u].robj;
792                 if (robj == NULL) {
793                         DRM_ERROR("No texture bound to unit %u\n", u);
794                         return -EINVAL;
795                 }
796                 size = 0;
797                 for (i = 0; i <= track->textures[u].num_levels; i++) {
798                         if (track->textures[u].use_pitch) {
799                                 w = track->textures[u].pitch / (1 << i);
800                         } else {
801                                 w = track->textures[u].width / (1 << i);
802                                 if (rdev->family >= CHIP_RV515)
803                                         w |= track->textures[u].width_11;
804                                 if (track->textures[u].roundup_w)
805                                         w = roundup_pow_of_two(w);
806                         }
807                         h = track->textures[u].height / (1 << i);
808                         if (rdev->family >= CHIP_RV515)
809                                 h |= track->textures[u].height_11;
810                         if (track->textures[u].roundup_h)
811                                 h = roundup_pow_of_two(h);
812                         size += w * h;
813                 }
814                 size *= track->textures[u].cpp;
815                 switch (track->textures[u].tex_coord_type) {
816                 case 0:
817                         break;
818                 case 1:
819                         size *= (1 << track->textures[u].txdepth);
820                         break;
821                 case 2:
822                         size *= 6;
823                         break;
824                 default:
825                         DRM_ERROR("Invalid texture coordinate type %u for unit "
826                                   "%u\n", track->textures[u].tex_coord_type, u);
827                         return -EINVAL;
828                 }
829                 if (size > radeon_object_size(robj)) {
830                         DRM_ERROR("Texture of unit %u needs %lu bytes but is "
831                                   "%lu\n", u, size, radeon_object_size(robj));
832                         r300_cs_track_texture_print(&track->textures[u]);
833                         return -EINVAL;
834                 }
835         }
836         return 0;
837 }
838
839 int r300_cs_track_check(struct radeon_device *rdev, struct r300_cs_track *track)
840 {
841         unsigned i;
842         unsigned long size;
843         unsigned prim_walk;
844         unsigned nverts;
845
846         for (i = 0; i < track->num_cb; i++) {
847                 if (track->cb[i].robj == NULL) {
848                         DRM_ERROR("[drm] No buffer for color buffer %d !\n", i);
849                         return -EINVAL;
850                 }
851                 size = track->cb[i].pitch * track->cb[i].cpp * track->maxy;
852                 size += track->cb[i].offset;
853                 if (size > radeon_object_size(track->cb[i].robj)) {
854                         DRM_ERROR("[drm] Buffer too small for color buffer %d "
855                                   "(need %lu have %lu) !\n", i, size,
856                                   radeon_object_size(track->cb[i].robj));
857                         DRM_ERROR("[drm] color buffer %d (%u %u %u %u)\n",
858                                   i, track->cb[i].pitch, track->cb[i].cpp,
859                                   track->cb[i].offset, track->maxy);
860                         return -EINVAL;
861                 }
862         }
863         if (track->z_enabled) {
864                 if (track->zb.robj == NULL) {
865                         DRM_ERROR("[drm] No buffer for z buffer !\n");
866                         return -EINVAL;
867                 }
868                 size = track->zb.pitch * track->zb.cpp * track->maxy;
869                 size += track->zb.offset;
870                 if (size > radeon_object_size(track->zb.robj)) {
871                         DRM_ERROR("[drm] Buffer too small for z buffer "
872                                   "(need %lu have %lu) !\n", size,
873                                   radeon_object_size(track->zb.robj));
874                         return -EINVAL;
875                 }
876         }
877         prim_walk = (track->vap_vf_cntl >> 4) & 0x3;
878         nverts = (track->vap_vf_cntl >> 16) & 0xFFFF;
879         switch (prim_walk) {
880         case 1:
881                 for (i = 0; i < track->num_arrays; i++) {
882                         size = track->arrays[i].esize * track->max_indx * 4;
883                         if (track->arrays[i].robj == NULL) {
884                                 DRM_ERROR("(PW %u) Vertex array %u no buffer "
885                                           "bound\n", prim_walk, i);
886                                 return -EINVAL;
887                         }
888                         if (size > radeon_object_size(track->arrays[i].robj)) {
889                                 DRM_ERROR("(PW %u) Vertex array %u need %lu dwords "
890                                            "have %lu dwords\n", prim_walk, i,
891                                            size >> 2,
892                                            radeon_object_size(track->arrays[i].robj) >> 2);
893                                 DRM_ERROR("Max indices %u\n", track->max_indx);
894                                 return -EINVAL;
895                         }
896                 }
897                 break;
898         case 2:
899                 for (i = 0; i < track->num_arrays; i++) {
900                         size = track->arrays[i].esize * (nverts - 1) * 4;
901                         if (track->arrays[i].robj == NULL) {
902                                 DRM_ERROR("(PW %u) Vertex array %u no buffer "
903                                           "bound\n", prim_walk, i);
904                                 return -EINVAL;
905                         }
906                         if (size > radeon_object_size(track->arrays[i].robj)) {
907                                 DRM_ERROR("(PW %u) Vertex array %u need %lu dwords "
908                                            "have %lu dwords\n", prim_walk, i, size >> 2,
909                                            radeon_object_size(track->arrays[i].robj) >> 2);
910                                 return -EINVAL;
911                         }
912                 }
913                 break;
914         case 3:
915                 size = track->vtx_size * nverts;
916                 if (size != track->immd_dwords) {
917                         DRM_ERROR("IMMD draw %u dwors but needs %lu dwords\n",
918                                   track->immd_dwords, size);
919                         DRM_ERROR("VAP_VF_CNTL.NUM_VERTICES %u, VTX_SIZE %u\n",
920                                   nverts, track->vtx_size);
921                         return -EINVAL;
922                 }
923                 break;
924         default:
925                 DRM_ERROR("[drm] Invalid primitive walk %d for VAP_VF_CNTL\n",
926                           prim_walk);
927                 return -EINVAL;
928         }
929         return r300_cs_track_texture_check(rdev, track);
930 }
931
932 static inline void r300_cs_track_clear(struct r300_cs_track *track)
933 {
934         unsigned i;
935
936         track->num_cb = 4;
937         track->maxy = 4096;
938         for (i = 0; i < track->num_cb; i++) {
939                 track->cb[i].robj = NULL;
940                 track->cb[i].pitch = 8192;
941                 track->cb[i].cpp = 16;
942                 track->cb[i].offset = 0;
943         }
944         track->z_enabled = true;
945         track->zb.robj = NULL;
946         track->zb.pitch = 8192;
947         track->zb.cpp = 4;
948         track->zb.offset = 0;
949         track->vtx_size = 0x7F;
950         track->immd_dwords = 0xFFFFFFFFUL;
951         track->num_arrays = 11;
952         track->max_indx = 0x00FFFFFFUL;
953         for (i = 0; i < track->num_arrays; i++) {
954                 track->arrays[i].robj = NULL;
955                 track->arrays[i].esize = 0x7F;
956         }
957         for (i = 0; i < 16; i++) {
958                 track->textures[i].pitch = 16536;
959                 track->textures[i].width = 16536;
960                 track->textures[i].height = 16536;
961                 track->textures[i].width_11 = 1 << 11;
962                 track->textures[i].height_11 = 1 << 11;
963                 track->textures[i].num_levels = 12;
964                 track->textures[i].txdepth = 16;
965                 track->textures[i].cpp = 64;
966                 track->textures[i].tex_coord_type = 1;
967                 track->textures[i].robj = NULL;
968                 /* CS IB emission code makes sure texture unit are disabled */
969                 track->textures[i].enabled = false;
970                 track->textures[i].roundup_w = true;
971                 track->textures[i].roundup_h = true;
972         }
973 }
974
975 static const unsigned r300_reg_safe_bm[159] = {
976         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
977         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
978         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
979         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
980         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
981         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
982         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
983         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
984         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
985         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
986         0x17FF1FFF, 0xFFFFFFFC, 0xFFFFFFFF, 0xFF30FFBF,
987         0xFFFFFFF8, 0xC3E6FFFF, 0xFFFFF6DF, 0xFFFFFFFF,
988         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
989         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
990         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF03F,
991         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
992         0xFFFFFFFF, 0xFFFFEFCE, 0xF00EBFFF, 0x007C0000,
993         0xF0000078, 0xFF000009, 0xFFFFFFFF, 0xFFFFFFFF,
994         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
995         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
996         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
997         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
998         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
999         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
1000         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
1001         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
1002         0xFFFFF7FF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
1003         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
1004         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
1005         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
1006         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
1007         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
1008         0xFFFFFC78, 0xFFFFFFFF, 0xFFFFFFFE, 0xFFFFFFFF,
1009         0x38FF8F50, 0xFFF88082, 0xF000000C, 0xFAE009FF,
1010         0x0000FFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000,
1011         0x00000000, 0x0000C100, 0x00000000, 0x00000000,
1012         0x00000000, 0x00000000, 0x00000000, 0x00000000,
1013         0x00000000, 0xFFFF0000, 0xFFFFFFFF, 0xFF80FFFF,
1014         0x00000000, 0x00000000, 0x00000000, 0x00000000,
1015         0x0003FC01, 0xFFFFFFF8, 0xFE800B19,
1016 };
1017
1018 static int r300_packet0_check(struct radeon_cs_parser *p,
1019                 struct radeon_cs_packet *pkt,
1020                 unsigned idx, unsigned reg)
1021 {
1022         struct radeon_cs_chunk *ib_chunk;
1023         struct radeon_cs_reloc *reloc;
1024         struct r300_cs_track *track;
1025         volatile uint32_t *ib;
1026         uint32_t tmp;
1027         unsigned i;
1028         int r;
1029
1030         ib = p->ib->ptr;
1031         ib_chunk = &p->chunks[p->chunk_ib_idx];
1032         track = (struct r300_cs_track*)p->track;
1033         switch(reg) {
1034         case AVIVO_D1MODE_VLINE_START_END:
1035         case RADEON_CRTC_GUI_TRIG_VLINE:
1036                 r = r100_cs_packet_parse_vline(p);
1037                 if (r) {
1038                         DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1039                                         idx, reg);
1040                         r100_cs_dump_packet(p, pkt);
1041                         return r;
1042                 }
1043                 break;
1044         case RADEON_DST_PITCH_OFFSET:
1045         case RADEON_SRC_PITCH_OFFSET:
1046                 r = r100_cs_packet_next_reloc(p, &reloc);
1047                 if (r) {
1048                         DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1049                                         idx, reg);
1050                         r100_cs_dump_packet(p, pkt);
1051                         return r;
1052                 }
1053                 tmp = ib_chunk->kdata[idx] & 0x003fffff;
1054                 tmp += (((u32)reloc->lobj.gpu_offset) >> 10);
1055                 ib[idx] = (ib_chunk->kdata[idx] & 0xffc00000) | tmp;
1056                 break;
1057         case R300_RB3D_COLOROFFSET0:
1058         case R300_RB3D_COLOROFFSET1:
1059         case R300_RB3D_COLOROFFSET2:
1060         case R300_RB3D_COLOROFFSET3:
1061                 i = (reg - R300_RB3D_COLOROFFSET0) >> 2;
1062                 r = r100_cs_packet_next_reloc(p, &reloc);
1063                 if (r) {
1064                         DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1065                                         idx, reg);
1066                         r100_cs_dump_packet(p, pkt);
1067                         return r;
1068                 }
1069                 track->cb[i].robj = reloc->robj;
1070                 track->cb[i].offset = ib_chunk->kdata[idx];
1071                 ib[idx] = ib_chunk->kdata[idx] + ((u32)reloc->lobj.gpu_offset);
1072                 break;
1073         case R300_ZB_DEPTHOFFSET:
1074                 r = r100_cs_packet_next_reloc(p, &reloc);
1075                 if (r) {
1076                         DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1077                                         idx, reg);
1078                         r100_cs_dump_packet(p, pkt);
1079                         return r;
1080                 }
1081                 track->zb.robj = reloc->robj;
1082                 track->zb.offset = ib_chunk->kdata[idx];
1083                 ib[idx] = ib_chunk->kdata[idx] + ((u32)reloc->lobj.gpu_offset);
1084                 break;
1085         case R300_TX_OFFSET_0:
1086         case R300_TX_OFFSET_0+4:
1087         case R300_TX_OFFSET_0+8:
1088         case R300_TX_OFFSET_0+12:
1089         case R300_TX_OFFSET_0+16:
1090         case R300_TX_OFFSET_0+20:
1091         case R300_TX_OFFSET_0+24:
1092         case R300_TX_OFFSET_0+28:
1093         case R300_TX_OFFSET_0+32:
1094         case R300_TX_OFFSET_0+36:
1095         case R300_TX_OFFSET_0+40:
1096         case R300_TX_OFFSET_0+44:
1097         case R300_TX_OFFSET_0+48:
1098         case R300_TX_OFFSET_0+52:
1099         case R300_TX_OFFSET_0+56:
1100         case R300_TX_OFFSET_0+60:
1101                 i = (reg - R300_TX_OFFSET_0) >> 2;
1102                 r = r100_cs_packet_next_reloc(p, &reloc);
1103                 if (r) {
1104                         DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1105                                         idx, reg);
1106                         r100_cs_dump_packet(p, pkt);
1107                         return r;
1108                 }
1109                 ib[idx] = ib_chunk->kdata[idx] + ((u32)reloc->lobj.gpu_offset);
1110                 track->textures[i].robj = reloc->robj;
1111                 break;
1112         /* Tracked registers */
1113         case 0x2084:
1114                 /* VAP_VF_CNTL */
1115                 track->vap_vf_cntl = ib_chunk->kdata[idx];
1116                 break;
1117         case 0x20B4:
1118                 /* VAP_VTX_SIZE */
1119                 track->vtx_size = ib_chunk->kdata[idx] & 0x7F;
1120                 break;
1121         case 0x2134:
1122                 /* VAP_VF_MAX_VTX_INDX */
1123                 track->max_indx = ib_chunk->kdata[idx] & 0x00FFFFFFUL;
1124                 break;
1125         case 0x43E4:
1126                 /* SC_SCISSOR1 */
1127                 track->maxy = ((ib_chunk->kdata[idx] >> 13) & 0x1FFF) + 1;
1128                 if (p->rdev->family < CHIP_RV515) {
1129                         track->maxy -= 1440;
1130                 }
1131                 break;
1132         case 0x4E00:
1133                 /* RB3D_CCTL */
1134                 track->num_cb = ((ib_chunk->kdata[idx] >> 5) & 0x3) + 1;
1135                 break;
1136         case 0x4E38:
1137         case 0x4E3C:
1138         case 0x4E40:
1139         case 0x4E44:
1140                 /* RB3D_COLORPITCH0 */
1141                 /* RB3D_COLORPITCH1 */
1142                 /* RB3D_COLORPITCH2 */
1143                 /* RB3D_COLORPITCH3 */
1144                 i = (reg - 0x4E38) >> 2;
1145                 track->cb[i].pitch = ib_chunk->kdata[idx] & 0x3FFE;
1146                 switch (((ib_chunk->kdata[idx] >> 21) & 0xF)) {
1147                 case 9:
1148                 case 11:
1149                 case 12:
1150                         track->cb[i].cpp = 1;
1151                         break;
1152                 case 3:
1153                 case 4:
1154                 case 13:
1155                 case 15:
1156                         track->cb[i].cpp = 2;
1157                         break;
1158                 case 6:
1159                         track->cb[i].cpp = 4;
1160                         break;
1161                 case 10:
1162                         track->cb[i].cpp = 8;
1163                         break;
1164                 case 7:
1165                         track->cb[i].cpp = 16;
1166                         break;
1167                 default:
1168                         DRM_ERROR("Invalid color buffer format (%d) !\n",
1169                                   ((ib_chunk->kdata[idx] >> 21) & 0xF));
1170                         return -EINVAL;
1171                 }
1172                 break;
1173         case 0x4F00:
1174                 /* ZB_CNTL */
1175                 if (ib_chunk->kdata[idx] & 2) {
1176                         track->z_enabled = true;
1177                 } else {
1178                         track->z_enabled = false;
1179                 }
1180                 break;
1181         case 0x4F10:
1182                 /* ZB_FORMAT */
1183                 switch ((ib_chunk->kdata[idx] & 0xF)) {
1184                 case 0:
1185                 case 1:
1186                         track->zb.cpp = 2;
1187                         break;
1188                 case 2:
1189                         track->zb.cpp = 4;
1190                         break;
1191                 default:
1192                         DRM_ERROR("Invalid z buffer format (%d) !\n",
1193                                   (ib_chunk->kdata[idx] & 0xF));
1194                         return -EINVAL;
1195                 }
1196                 break;
1197         case 0x4F24:
1198                 /* ZB_DEPTHPITCH */
1199                 track->zb.pitch = ib_chunk->kdata[idx] & 0x3FFC;
1200                 break;
1201         case 0x4104:
1202                 for (i = 0; i < 16; i++) {
1203                         bool enabled;
1204
1205                         enabled = !!(ib_chunk->kdata[idx] & (1 << i));
1206                         track->textures[i].enabled = enabled;
1207                 }
1208                 break;
1209         case 0x44C0:
1210         case 0x44C4:
1211         case 0x44C8:
1212         case 0x44CC:
1213         case 0x44D0:
1214         case 0x44D4:
1215         case 0x44D8:
1216         case 0x44DC:
1217         case 0x44E0:
1218         case 0x44E4:
1219         case 0x44E8:
1220         case 0x44EC:
1221         case 0x44F0:
1222         case 0x44F4:
1223         case 0x44F8:
1224         case 0x44FC:
1225                 /* TX_FORMAT1_[0-15] */
1226                 i = (reg - 0x44C0) >> 2;
1227                 tmp = (ib_chunk->kdata[idx] >> 25) & 0x3;
1228                 track->textures[i].tex_coord_type = tmp;
1229                 switch ((ib_chunk->kdata[idx] & 0x1F)) {
1230                 case 0:
1231                 case 2:
1232                 case 5:
1233                 case 18:
1234                 case 20:
1235                 case 21:
1236                         track->textures[i].cpp = 1;
1237                         break;
1238                 case 1:
1239                 case 3:
1240                 case 6:
1241                 case 7:
1242                 case 10:
1243                 case 11:
1244                 case 19:
1245                 case 22:
1246                 case 24:
1247                         track->textures[i].cpp = 2;
1248                         break;
1249                 case 4:
1250                 case 8:
1251                 case 9:
1252                 case 12:
1253                 case 13:
1254                 case 23:
1255                 case 25:
1256                 case 27:
1257                 case 30:
1258                         track->textures[i].cpp = 4;
1259                         break;
1260                 case 14:
1261                 case 26:
1262                 case 28:
1263                         track->textures[i].cpp = 8;
1264                         break;
1265                 case 29:
1266                         track->textures[i].cpp = 16;
1267                         break;
1268                 default:
1269                         DRM_ERROR("Invalid texture format %u\n",
1270                                   (ib_chunk->kdata[idx] & 0x1F));
1271                         return -EINVAL;
1272                         break;
1273                 }
1274                 break;
1275         case 0x4400:
1276         case 0x4404:
1277         case 0x4408:
1278         case 0x440C:
1279         case 0x4410:
1280         case 0x4414:
1281         case 0x4418:
1282         case 0x441C:
1283         case 0x4420:
1284         case 0x4424:
1285         case 0x4428:
1286         case 0x442C:
1287         case 0x4430:
1288         case 0x4434:
1289         case 0x4438:
1290         case 0x443C:
1291                 /* TX_FILTER0_[0-15] */
1292                 i = (reg - 0x4400) >> 2;
1293                 tmp = ib_chunk->kdata[idx] & 0x7;;
1294                 if (tmp == 2 || tmp == 4 || tmp == 6) {
1295                         track->textures[i].roundup_w = false;
1296                 }
1297                 tmp = (ib_chunk->kdata[idx] >> 3) & 0x7;;
1298                 if (tmp == 2 || tmp == 4 || tmp == 6) {
1299                         track->textures[i].roundup_h = false;
1300                 }
1301                 break;
1302         case 0x4500:
1303         case 0x4504:
1304         case 0x4508:
1305         case 0x450C:
1306         case 0x4510:
1307         case 0x4514:
1308         case 0x4518:
1309         case 0x451C:
1310         case 0x4520:
1311         case 0x4524:
1312         case 0x4528:
1313         case 0x452C:
1314         case 0x4530:
1315         case 0x4534:
1316         case 0x4538:
1317         case 0x453C:
1318                 /* TX_FORMAT2_[0-15] */
1319                 i = (reg - 0x4500) >> 2;
1320                 tmp = ib_chunk->kdata[idx] & 0x3FFF;
1321                 track->textures[i].pitch = tmp + 1;
1322                 if (p->rdev->family >= CHIP_RV515) {
1323                         tmp = ((ib_chunk->kdata[idx] >> 15) & 1) << 11;
1324                         track->textures[i].width_11 = tmp;
1325                         tmp = ((ib_chunk->kdata[idx] >> 16) & 1) << 11;
1326                         track->textures[i].height_11 = tmp;
1327                 }
1328                 break;
1329         case 0x4480:
1330         case 0x4484:
1331         case 0x4488:
1332         case 0x448C:
1333         case 0x4490:
1334         case 0x4494:
1335         case 0x4498:
1336         case 0x449C:
1337         case 0x44A0:
1338         case 0x44A4:
1339         case 0x44A8:
1340         case 0x44AC:
1341         case 0x44B0:
1342         case 0x44B4:
1343         case 0x44B8:
1344         case 0x44BC:
1345                 /* TX_FORMAT0_[0-15] */
1346                 i = (reg - 0x4480) >> 2;
1347                 tmp = ib_chunk->kdata[idx] & 0x7FF;
1348                 track->textures[i].width = tmp + 1;
1349                 tmp = (ib_chunk->kdata[idx] >> 11) & 0x7FF;
1350                 track->textures[i].height = tmp + 1;
1351                 tmp = (ib_chunk->kdata[idx] >> 26) & 0xF;
1352                 track->textures[i].num_levels = tmp;
1353                 tmp = ib_chunk->kdata[idx] & (1 << 31);
1354                 track->textures[i].use_pitch = !!tmp;
1355                 tmp = (ib_chunk->kdata[idx] >> 22) & 0xF;
1356                 track->textures[i].txdepth = tmp;
1357                 break;
1358         default:
1359                 printk(KERN_ERR "Forbidden register 0x%04X in cs at %d\n",
1360                        reg, idx);
1361                 return -EINVAL;
1362         }
1363         return 0;
1364 }
1365
1366 static int r300_packet3_check(struct radeon_cs_parser *p,
1367                               struct radeon_cs_packet *pkt)
1368 {
1369         struct radeon_cs_chunk *ib_chunk;
1370         struct radeon_cs_reloc *reloc;
1371         struct r300_cs_track *track;
1372         volatile uint32_t *ib;
1373         unsigned idx;
1374         unsigned i, c;
1375         int r;
1376
1377         ib = p->ib->ptr;
1378         ib_chunk = &p->chunks[p->chunk_ib_idx];
1379         idx = pkt->idx + 1;
1380         track = (struct r300_cs_track*)p->track;
1381         switch(pkt->opcode) {
1382         case PACKET3_3D_LOAD_VBPNTR:
1383                 c = ib_chunk->kdata[idx++] & 0x1F;
1384                 track->num_arrays = c;
1385                 for (i = 0; i < (c - 1); i+=2, idx+=3) {
1386                         r = r100_cs_packet_next_reloc(p, &reloc);
1387                         if (r) {
1388                                 DRM_ERROR("No reloc for packet3 %d\n",
1389                                           pkt->opcode);
1390                                 r100_cs_dump_packet(p, pkt);
1391                                 return r;
1392                         }
1393                         ib[idx+1] = ib_chunk->kdata[idx+1] + ((u32)reloc->lobj.gpu_offset);
1394                         track->arrays[i + 0].robj = reloc->robj;
1395                         track->arrays[i + 0].esize = ib_chunk->kdata[idx] >> 8;
1396                         track->arrays[i + 0].esize &= 0x7F;
1397                         r = r100_cs_packet_next_reloc(p, &reloc);
1398                         if (r) {
1399                                 DRM_ERROR("No reloc for packet3 %d\n",
1400                                           pkt->opcode);
1401                                 r100_cs_dump_packet(p, pkt);
1402                                 return r;
1403                         }
1404                         ib[idx+2] = ib_chunk->kdata[idx+2] + ((u32)reloc->lobj.gpu_offset);
1405                         track->arrays[i + 1].robj = reloc->robj;
1406                         track->arrays[i + 1].esize = ib_chunk->kdata[idx] >> 24;
1407                         track->arrays[i + 1].esize &= 0x7F;
1408                 }
1409                 if (c & 1) {
1410                         r = r100_cs_packet_next_reloc(p, &reloc);
1411                         if (r) {
1412                                 DRM_ERROR("No reloc for packet3 %d\n",
1413                                           pkt->opcode);
1414                                 r100_cs_dump_packet(p, pkt);
1415                                 return r;
1416                         }
1417                         ib[idx+1] = ib_chunk->kdata[idx+1] + ((u32)reloc->lobj.gpu_offset);
1418                         track->arrays[i + 0].robj = reloc->robj;
1419                         track->arrays[i + 0].esize = ib_chunk->kdata[idx] >> 8;
1420                         track->arrays[i + 0].esize &= 0x7F;
1421                 }
1422                 break;
1423         case PACKET3_INDX_BUFFER:
1424                 r = r100_cs_packet_next_reloc(p, &reloc);
1425                 if (r) {
1426                         DRM_ERROR("No reloc for packet3 %d\n", pkt->opcode);
1427                         r100_cs_dump_packet(p, pkt);
1428                         return r;
1429                 }
1430                 ib[idx+1] = ib_chunk->kdata[idx+1] + ((u32)reloc->lobj.gpu_offset);
1431                 r = r100_cs_track_check_pkt3_indx_buffer(p, pkt, reloc->robj);
1432                 if (r) {
1433                         return r;
1434                 }
1435                 break;
1436         /* Draw packet */
1437         case PACKET3_3D_DRAW_IMMD:
1438                 /* Number of dwords is vtx_size * (num_vertices - 1)
1439                  * PRIM_WALK must be equal to 3 vertex data in embedded
1440                  * in cmd stream */
1441                 if (((ib_chunk->kdata[idx+1] >> 4) & 0x3) != 3) {
1442                         DRM_ERROR("PRIM_WALK must be 3 for IMMD draw\n");
1443                         return -EINVAL;
1444                 }
1445                 track->vap_vf_cntl = ib_chunk->kdata[idx+1];
1446                 track->immd_dwords = pkt->count - 1;
1447                 r = r300_cs_track_check(p->rdev, track);
1448                 if (r) {
1449                         return r;
1450                 }
1451                 break;
1452         case PACKET3_3D_DRAW_IMMD_2:
1453                 /* Number of dwords is vtx_size * (num_vertices - 1)
1454                  * PRIM_WALK must be equal to 3 vertex data in embedded
1455                  * in cmd stream */
1456                 if (((ib_chunk->kdata[idx] >> 4) & 0x3) != 3) {
1457                         DRM_ERROR("PRIM_WALK must be 3 for IMMD draw\n");
1458                         return -EINVAL;
1459                 }
1460                 track->vap_vf_cntl = ib_chunk->kdata[idx];
1461                 track->immd_dwords = pkt->count;
1462                 r = r300_cs_track_check(p->rdev, track);
1463                 if (r) {
1464                         return r;
1465                 }
1466                 break;
1467         case PACKET3_3D_DRAW_VBUF:
1468                 track->vap_vf_cntl = ib_chunk->kdata[idx + 1];
1469                 r = r300_cs_track_check(p->rdev, track);
1470                 if (r) {
1471                         return r;
1472                 }
1473                 break;
1474         case PACKET3_3D_DRAW_VBUF_2:
1475                 track->vap_vf_cntl = ib_chunk->kdata[idx];
1476                 r = r300_cs_track_check(p->rdev, track);
1477                 if (r) {
1478                         return r;
1479                 }
1480                 break;
1481         case PACKET3_3D_DRAW_INDX:
1482                 track->vap_vf_cntl = ib_chunk->kdata[idx + 1];
1483                 r = r300_cs_track_check(p->rdev, track);
1484                 if (r) {
1485                         return r;
1486                 }
1487                 break;
1488         case PACKET3_3D_DRAW_INDX_2:
1489                 track->vap_vf_cntl = ib_chunk->kdata[idx];
1490                 r = r300_cs_track_check(p->rdev, track);
1491                 if (r) {
1492                         return r;
1493                 }
1494                 break;
1495         case PACKET3_NOP:
1496                 break;
1497         default:
1498                 DRM_ERROR("Packet3 opcode %x not supported\n", pkt->opcode);
1499                 return -EINVAL;
1500         }
1501         return 0;
1502 }
1503
1504 int r300_cs_parse(struct radeon_cs_parser *p)
1505 {
1506         struct radeon_cs_packet pkt;
1507         struct r300_cs_track track;
1508         int r;
1509
1510         r300_cs_track_clear(&track);
1511         p->track = &track;
1512         do {
1513                 r = r100_cs_packet_parse(p, &pkt, p->idx);
1514                 if (r) {
1515                         return r;
1516                 }
1517                 p->idx += pkt.count + 2;
1518                 switch (pkt.type) {
1519                 case PACKET_TYPE0:
1520                         r = r100_cs_parse_packet0(p, &pkt,
1521                                                   p->rdev->config.r300.reg_safe_bm,
1522                                                   p->rdev->config.r300.reg_safe_bm_size,
1523                                                   &r300_packet0_check);
1524                         break;
1525                 case PACKET_TYPE2:
1526                         break;
1527                 case PACKET_TYPE3:
1528                         r = r300_packet3_check(p, &pkt);
1529                         break;
1530                 default:
1531                         DRM_ERROR("Unknown packet type %d !\n", pkt.type);
1532                         return -EINVAL;
1533                 }
1534                 if (r) {
1535                         return r;
1536                 }
1537         } while (p->idx < p->chunks[p->chunk_ib_idx].length_dw);
1538         return 0;
1539 }
1540
1541 int r300_init(struct radeon_device *rdev)
1542 {
1543         rdev->config.r300.reg_safe_bm = r300_reg_safe_bm;
1544         rdev->config.r300.reg_safe_bm_size = ARRAY_SIZE(r300_reg_safe_bm);
1545         return 0;
1546 }