drm/radeon/kms: cut down indirects in register accesses.
[linux-2.6.git] / drivers / gpu / drm / radeon / r300.c
1 /*
2  * Copyright 2008 Advanced Micro Devices, Inc.
3  * Copyright 2008 Red Hat Inc.
4  * Copyright 2009 Jerome Glisse.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22  * OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors: Dave Airlie
25  *          Alex Deucher
26  *          Jerome Glisse
27  */
28 #include <linux/seq_file.h>
29 #include "drmP.h"
30 #include "drm.h"
31 #include "radeon_reg.h"
32 #include "radeon.h"
33 #include "radeon_drm.h"
34 #include "radeon_share.h"
35
36 /* r300,r350,rv350,rv370,rv380 depends on : */
37 void r100_hdp_reset(struct radeon_device *rdev);
38 int r100_cp_reset(struct radeon_device *rdev);
39 int r100_rb2d_reset(struct radeon_device *rdev);
40 int r100_cp_init(struct radeon_device *rdev, unsigned ring_size);
41 int r100_pci_gart_enable(struct radeon_device *rdev);
42 void r100_pci_gart_disable(struct radeon_device *rdev);
43 void r100_mc_setup(struct radeon_device *rdev);
44 void r100_mc_disable_clients(struct radeon_device *rdev);
45 int r100_gui_wait_for_idle(struct radeon_device *rdev);
46 int r100_cs_packet_parse(struct radeon_cs_parser *p,
47                          struct radeon_cs_packet *pkt,
48                          unsigned idx);
49 int r100_cs_packet_parse_vline(struct radeon_cs_parser *p);
50 int r100_cs_packet_next_reloc(struct radeon_cs_parser *p,
51                               struct radeon_cs_reloc **cs_reloc);
52 int r100_cs_parse_packet0(struct radeon_cs_parser *p,
53                           struct radeon_cs_packet *pkt,
54                           const unsigned *auth, unsigned n,
55                           radeon_packet0_check_t check);
56 void r100_cs_dump_packet(struct radeon_cs_parser *p,
57                          struct radeon_cs_packet *pkt);
58 int r100_cs_track_check_pkt3_indx_buffer(struct radeon_cs_parser *p,
59                                          struct radeon_cs_packet *pkt,
60                                          struct radeon_object *robj);
61
62 /* This files gather functions specifics to:
63  * r300,r350,rv350,rv370,rv380
64  *
65  * Some of these functions might be used by newer ASICs.
66  */
67 void r300_gpu_init(struct radeon_device *rdev);
68 int r300_mc_wait_for_idle(struct radeon_device *rdev);
69 int rv370_debugfs_pcie_gart_info_init(struct radeon_device *rdev);
70
71
72 /*
73  * rv370,rv380 PCIE GART
74  */
75 void rv370_pcie_gart_tlb_flush(struct radeon_device *rdev)
76 {
77         uint32_t tmp;
78         int i;
79
80         /* Workaround HW bug do flush 2 times */
81         for (i = 0; i < 2; i++) {
82                 tmp = RREG32_PCIE(RADEON_PCIE_TX_GART_CNTL);
83                 WREG32_PCIE(RADEON_PCIE_TX_GART_CNTL, tmp | RADEON_PCIE_TX_GART_INVALIDATE_TLB);
84                 (void)RREG32_PCIE(RADEON_PCIE_TX_GART_CNTL);
85                 WREG32_PCIE(RADEON_PCIE_TX_GART_CNTL, tmp);
86         }
87         mb();
88 }
89
90 int rv370_pcie_gart_enable(struct radeon_device *rdev)
91 {
92         uint32_t table_addr;
93         uint32_t tmp;
94         int r;
95
96         /* Initialize common gart structure */
97         r = radeon_gart_init(rdev);
98         if (r) {
99                 return r;
100         }
101         r = rv370_debugfs_pcie_gart_info_init(rdev);
102         if (r) {
103                 DRM_ERROR("Failed to register debugfs file for PCIE gart !\n");
104         }
105         rdev->gart.table_size = rdev->gart.num_gpu_pages * 4;
106         r = radeon_gart_table_vram_alloc(rdev);
107         if (r) {
108                 return r;
109         }
110         /* discard memory request outside of configured range */
111         tmp = RADEON_PCIE_TX_GART_UNMAPPED_ACCESS_DISCARD;
112         WREG32_PCIE(RADEON_PCIE_TX_GART_CNTL, tmp);
113         WREG32_PCIE(RADEON_PCIE_TX_GART_START_LO, rdev->mc.gtt_location);
114         tmp = rdev->mc.gtt_location + rdev->mc.gtt_size - 4096;
115         WREG32_PCIE(RADEON_PCIE_TX_GART_END_LO, tmp);
116         WREG32_PCIE(RADEON_PCIE_TX_GART_START_HI, 0);
117         WREG32_PCIE(RADEON_PCIE_TX_GART_END_HI, 0);
118         table_addr = rdev->gart.table_addr;
119         WREG32_PCIE(RADEON_PCIE_TX_GART_BASE, table_addr);
120         /* FIXME: setup default page */
121         WREG32_PCIE(RADEON_PCIE_TX_DISCARD_RD_ADDR_LO, rdev->mc.vram_location);
122         WREG32_PCIE(RADEON_PCIE_TX_DISCARD_RD_ADDR_HI, 0);
123         /* Clear error */
124         WREG32_PCIE(0x18, 0);
125         tmp = RREG32_PCIE(RADEON_PCIE_TX_GART_CNTL);
126         tmp |= RADEON_PCIE_TX_GART_EN;
127         tmp |= RADEON_PCIE_TX_GART_UNMAPPED_ACCESS_DISCARD;
128         WREG32_PCIE(RADEON_PCIE_TX_GART_CNTL, tmp);
129         rv370_pcie_gart_tlb_flush(rdev);
130         DRM_INFO("PCIE GART of %uM enabled (table at 0x%08X).\n",
131                  rdev->mc.gtt_size >> 20, table_addr);
132         rdev->gart.ready = true;
133         return 0;
134 }
135
136 void rv370_pcie_gart_disable(struct radeon_device *rdev)
137 {
138         uint32_t tmp;
139
140         tmp = RREG32_PCIE(RADEON_PCIE_TX_GART_CNTL);
141         tmp |= RADEON_PCIE_TX_GART_UNMAPPED_ACCESS_DISCARD;
142         WREG32_PCIE(RADEON_PCIE_TX_GART_CNTL, tmp & ~RADEON_PCIE_TX_GART_EN);
143         if (rdev->gart.table.vram.robj) {
144                 radeon_object_kunmap(rdev->gart.table.vram.robj);
145                 radeon_object_unpin(rdev->gart.table.vram.robj);
146         }
147 }
148
149 int rv370_pcie_gart_set_page(struct radeon_device *rdev, int i, uint64_t addr)
150 {
151         void __iomem *ptr = (void *)rdev->gart.table.vram.ptr;
152
153         if (i < 0 || i > rdev->gart.num_gpu_pages) {
154                 return -EINVAL;
155         }
156         addr = (lower_32_bits(addr) >> 8) |
157                ((upper_32_bits(addr) & 0xff) << 24) |
158                0xc;
159         /* on x86 we want this to be CPU endian, on powerpc
160          * on powerpc without HW swappers, it'll get swapped on way
161          * into VRAM - so no need for cpu_to_le32 on VRAM tables */
162         writel(addr, ((void __iomem *)ptr) + (i * 4));
163         return 0;
164 }
165
166 int r300_gart_enable(struct radeon_device *rdev)
167 {
168 #if __OS_HAS_AGP
169         if (rdev->flags & RADEON_IS_AGP) {
170                 if (rdev->family > CHIP_RV350) {
171                         rv370_pcie_gart_disable(rdev);
172                 } else {
173                         r100_pci_gart_disable(rdev);
174                 }
175                 return 0;
176         }
177 #endif
178         if (rdev->flags & RADEON_IS_PCIE) {
179                 rdev->asic->gart_disable = &rv370_pcie_gart_disable;
180                 rdev->asic->gart_tlb_flush = &rv370_pcie_gart_tlb_flush;
181                 rdev->asic->gart_set_page = &rv370_pcie_gart_set_page;
182                 return rv370_pcie_gart_enable(rdev);
183         }
184         return r100_pci_gart_enable(rdev);
185 }
186
187
188 /*
189  * MC
190  */
191 int r300_mc_init(struct radeon_device *rdev)
192 {
193         int r;
194
195         if (r100_debugfs_rbbm_init(rdev)) {
196                 DRM_ERROR("Failed to register debugfs file for RBBM !\n");
197         }
198
199         r300_gpu_init(rdev);
200         r100_pci_gart_disable(rdev);
201         if (rdev->flags & RADEON_IS_PCIE) {
202                 rv370_pcie_gart_disable(rdev);
203         }
204
205         /* Setup GPU memory space */
206         rdev->mc.vram_location = 0xFFFFFFFFUL;
207         rdev->mc.gtt_location = 0xFFFFFFFFUL;
208         if (rdev->flags & RADEON_IS_AGP) {
209                 r = radeon_agp_init(rdev);
210                 if (r) {
211                         printk(KERN_WARNING "[drm] Disabling AGP\n");
212                         rdev->flags &= ~RADEON_IS_AGP;
213                         rdev->mc.gtt_size = radeon_gart_size * 1024 * 1024;
214                 } else {
215                         rdev->mc.gtt_location = rdev->mc.agp_base;
216                 }
217         }
218         r = radeon_mc_setup(rdev);
219         if (r) {
220                 return r;
221         }
222
223         /* Program GPU memory space */
224         r100_mc_disable_clients(rdev);
225         if (r300_mc_wait_for_idle(rdev)) {
226                 printk(KERN_WARNING "Failed to wait MC idle while "
227                        "programming pipes. Bad things might happen.\n");
228         }
229         r100_mc_setup(rdev);
230         return 0;
231 }
232
233 void r300_mc_fini(struct radeon_device *rdev)
234 {
235         if (rdev->flags & RADEON_IS_PCIE) {
236                 rv370_pcie_gart_disable(rdev);
237                 radeon_gart_table_vram_free(rdev);
238         } else {
239                 r100_pci_gart_disable(rdev);
240                 radeon_gart_table_ram_free(rdev);
241         }
242         radeon_gart_fini(rdev);
243 }
244
245
246 /*
247  * Fence emission
248  */
249 void r300_fence_ring_emit(struct radeon_device *rdev,
250                           struct radeon_fence *fence)
251 {
252         /* Who ever call radeon_fence_emit should call ring_lock and ask
253          * for enough space (today caller are ib schedule and buffer move) */
254         /* Write SC register so SC & US assert idle */
255         radeon_ring_write(rdev, PACKET0(0x43E0, 0));
256         radeon_ring_write(rdev, 0);
257         radeon_ring_write(rdev, PACKET0(0x43E4, 0));
258         radeon_ring_write(rdev, 0);
259         /* Flush 3D cache */
260         radeon_ring_write(rdev, PACKET0(0x4E4C, 0));
261         radeon_ring_write(rdev, (2 << 0));
262         radeon_ring_write(rdev, PACKET0(0x4F18, 0));
263         radeon_ring_write(rdev, (1 << 0));
264         /* Wait until IDLE & CLEAN */
265         radeon_ring_write(rdev, PACKET0(0x1720, 0));
266         radeon_ring_write(rdev, (1 << 17) | (1 << 16)  | (1 << 9));
267         /* Emit fence sequence & fire IRQ */
268         radeon_ring_write(rdev, PACKET0(rdev->fence_drv.scratch_reg, 0));
269         radeon_ring_write(rdev, fence->seq);
270         radeon_ring_write(rdev, PACKET0(RADEON_GEN_INT_STATUS, 0));
271         radeon_ring_write(rdev, RADEON_SW_INT_FIRE);
272 }
273
274
275 /*
276  * Global GPU functions
277  */
278 int r300_copy_dma(struct radeon_device *rdev,
279                   uint64_t src_offset,
280                   uint64_t dst_offset,
281                   unsigned num_pages,
282                   struct radeon_fence *fence)
283 {
284         uint32_t size;
285         uint32_t cur_size;
286         int i, num_loops;
287         int r = 0;
288
289         /* radeon pitch is /64 */
290         size = num_pages << PAGE_SHIFT;
291         num_loops = DIV_ROUND_UP(size, 0x1FFFFF);
292         r = radeon_ring_lock(rdev, num_loops * 4 + 64);
293         if (r) {
294                 DRM_ERROR("radeon: moving bo (%d).\n", r);
295                 return r;
296         }
297         /* Must wait for 2D idle & clean before DMA or hangs might happen */
298         radeon_ring_write(rdev, PACKET0(RADEON_WAIT_UNTIL, 0 ));
299         radeon_ring_write(rdev, (1 << 16));
300         for (i = 0; i < num_loops; i++) {
301                 cur_size = size;
302                 if (cur_size > 0x1FFFFF) {
303                         cur_size = 0x1FFFFF;
304                 }
305                 size -= cur_size;
306                 radeon_ring_write(rdev, PACKET0(0x720, 2));
307                 radeon_ring_write(rdev, src_offset);
308                 radeon_ring_write(rdev, dst_offset);
309                 radeon_ring_write(rdev, cur_size | (1 << 31) | (1 << 30));
310                 src_offset += cur_size;
311                 dst_offset += cur_size;
312         }
313         radeon_ring_write(rdev, PACKET0(RADEON_WAIT_UNTIL, 0));
314         radeon_ring_write(rdev, RADEON_WAIT_DMA_GUI_IDLE);
315         if (fence) {
316                 r = radeon_fence_emit(rdev, fence);
317         }
318         radeon_ring_unlock_commit(rdev);
319         return r;
320 }
321
322 void r300_ring_start(struct radeon_device *rdev)
323 {
324         unsigned gb_tile_config;
325         int r;
326
327         /* Sub pixel 1/12 so we can have 4K rendering according to doc */
328         gb_tile_config = (R300_ENABLE_TILING | R300_TILE_SIZE_16);
329         switch(rdev->num_gb_pipes) {
330         case 2:
331                 gb_tile_config |= R300_PIPE_COUNT_R300;
332                 break;
333         case 3:
334                 gb_tile_config |= R300_PIPE_COUNT_R420_3P;
335                 break;
336         case 4:
337                 gb_tile_config |= R300_PIPE_COUNT_R420;
338                 break;
339         case 1:
340         default:
341                 gb_tile_config |= R300_PIPE_COUNT_RV350;
342                 break;
343         }
344
345         r = radeon_ring_lock(rdev, 64);
346         if (r) {
347                 return;
348         }
349         radeon_ring_write(rdev, PACKET0(RADEON_ISYNC_CNTL, 0));
350         radeon_ring_write(rdev,
351                           RADEON_ISYNC_ANY2D_IDLE3D |
352                           RADEON_ISYNC_ANY3D_IDLE2D |
353                           RADEON_ISYNC_WAIT_IDLEGUI |
354                           RADEON_ISYNC_CPSCRATCH_IDLEGUI);
355         radeon_ring_write(rdev, PACKET0(R300_GB_TILE_CONFIG, 0));
356         radeon_ring_write(rdev, gb_tile_config);
357         radeon_ring_write(rdev, PACKET0(RADEON_WAIT_UNTIL, 0));
358         radeon_ring_write(rdev,
359                           RADEON_WAIT_2D_IDLECLEAN |
360                           RADEON_WAIT_3D_IDLECLEAN);
361         radeon_ring_write(rdev, PACKET0(0x170C, 0));
362         radeon_ring_write(rdev, 1 << 31);
363         radeon_ring_write(rdev, PACKET0(R300_GB_SELECT, 0));
364         radeon_ring_write(rdev, 0);
365         radeon_ring_write(rdev, PACKET0(R300_GB_ENABLE, 0));
366         radeon_ring_write(rdev, 0);
367         radeon_ring_write(rdev, PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0));
368         radeon_ring_write(rdev, R300_RB3D_DC_FLUSH | R300_RB3D_DC_FREE);
369         radeon_ring_write(rdev, PACKET0(R300_RB3D_ZCACHE_CTLSTAT, 0));
370         radeon_ring_write(rdev, R300_ZC_FLUSH | R300_ZC_FREE);
371         radeon_ring_write(rdev, PACKET0(RADEON_WAIT_UNTIL, 0));
372         radeon_ring_write(rdev,
373                           RADEON_WAIT_2D_IDLECLEAN |
374                           RADEON_WAIT_3D_IDLECLEAN);
375         radeon_ring_write(rdev, PACKET0(R300_GB_AA_CONFIG, 0));
376         radeon_ring_write(rdev, 0);
377         radeon_ring_write(rdev, PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0));
378         radeon_ring_write(rdev, R300_RB3D_DC_FLUSH | R300_RB3D_DC_FREE);
379         radeon_ring_write(rdev, PACKET0(R300_RB3D_ZCACHE_CTLSTAT, 0));
380         radeon_ring_write(rdev, R300_ZC_FLUSH | R300_ZC_FREE);
381         radeon_ring_write(rdev, PACKET0(R300_GB_MSPOS0, 0));
382         radeon_ring_write(rdev,
383                           ((6 << R300_MS_X0_SHIFT) |
384                            (6 << R300_MS_Y0_SHIFT) |
385                            (6 << R300_MS_X1_SHIFT) |
386                            (6 << R300_MS_Y1_SHIFT) |
387                            (6 << R300_MS_X2_SHIFT) |
388                            (6 << R300_MS_Y2_SHIFT) |
389                            (6 << R300_MSBD0_Y_SHIFT) |
390                            (6 << R300_MSBD0_X_SHIFT)));
391         radeon_ring_write(rdev, PACKET0(R300_GB_MSPOS1, 0));
392         radeon_ring_write(rdev,
393                           ((6 << R300_MS_X3_SHIFT) |
394                            (6 << R300_MS_Y3_SHIFT) |
395                            (6 << R300_MS_X4_SHIFT) |
396                            (6 << R300_MS_Y4_SHIFT) |
397                            (6 << R300_MS_X5_SHIFT) |
398                            (6 << R300_MS_Y5_SHIFT) |
399                            (6 << R300_MSBD1_SHIFT)));
400         radeon_ring_write(rdev, PACKET0(R300_GA_ENHANCE, 0));
401         radeon_ring_write(rdev, R300_GA_DEADLOCK_CNTL | R300_GA_FASTSYNC_CNTL);
402         radeon_ring_write(rdev, PACKET0(R300_GA_POLY_MODE, 0));
403         radeon_ring_write(rdev,
404                           R300_FRONT_PTYPE_TRIANGE | R300_BACK_PTYPE_TRIANGE);
405         radeon_ring_write(rdev, PACKET0(R300_GA_ROUND_MODE, 0));
406         radeon_ring_write(rdev,
407                           R300_GEOMETRY_ROUND_NEAREST |
408                           R300_COLOR_ROUND_NEAREST);
409         radeon_ring_unlock_commit(rdev);
410 }
411
412 void r300_errata(struct radeon_device *rdev)
413 {
414         rdev->pll_errata = 0;
415
416         if (rdev->family == CHIP_R300 &&
417             (RREG32(RADEON_CONFIG_CNTL) & RADEON_CFG_ATI_REV_ID_MASK) == RADEON_CFG_ATI_REV_A11) {
418                 rdev->pll_errata |= CHIP_ERRATA_R300_CG;
419         }
420 }
421
422 int r300_mc_wait_for_idle(struct radeon_device *rdev)
423 {
424         unsigned i;
425         uint32_t tmp;
426
427         for (i = 0; i < rdev->usec_timeout; i++) {
428                 /* read MC_STATUS */
429                 tmp = RREG32(0x0150);
430                 if (tmp & (1 << 4)) {
431                         return 0;
432                 }
433                 DRM_UDELAY(1);
434         }
435         return -1;
436 }
437
438 void r300_gpu_init(struct radeon_device *rdev)
439 {
440         uint32_t gb_tile_config, tmp;
441
442         r100_hdp_reset(rdev);
443         /* FIXME: rv380 one pipes ? */
444         if ((rdev->family == CHIP_R300) || (rdev->family == CHIP_R350)) {
445                 /* r300,r350 */
446                 rdev->num_gb_pipes = 2;
447         } else {
448                 /* rv350,rv370,rv380 */
449                 rdev->num_gb_pipes = 1;
450         }
451         gb_tile_config = (R300_ENABLE_TILING | R300_TILE_SIZE_16);
452         switch (rdev->num_gb_pipes) {
453         case 2:
454                 gb_tile_config |= R300_PIPE_COUNT_R300;
455                 break;
456         case 3:
457                 gb_tile_config |= R300_PIPE_COUNT_R420_3P;
458                 break;
459         case 4:
460                 gb_tile_config |= R300_PIPE_COUNT_R420;
461                 break;
462         default:
463         case 1:
464                 gb_tile_config |= R300_PIPE_COUNT_RV350;
465                 break;
466         }
467         WREG32(R300_GB_TILE_CONFIG, gb_tile_config);
468
469         if (r100_gui_wait_for_idle(rdev)) {
470                 printk(KERN_WARNING "Failed to wait GUI idle while "
471                        "programming pipes. Bad things might happen.\n");
472         }
473
474         tmp = RREG32(0x170C);
475         WREG32(0x170C, tmp | (1 << 31));
476
477         WREG32(R300_RB2D_DSTCACHE_MODE,
478                R300_DC_AUTOFLUSH_ENABLE |
479                R300_DC_DC_DISABLE_IGNORE_PE);
480
481         if (r100_gui_wait_for_idle(rdev)) {
482                 printk(KERN_WARNING "Failed to wait GUI idle while "
483                        "programming pipes. Bad things might happen.\n");
484         }
485         if (r300_mc_wait_for_idle(rdev)) {
486                 printk(KERN_WARNING "Failed to wait MC idle while "
487                        "programming pipes. Bad things might happen.\n");
488         }
489         DRM_INFO("radeon: %d pipes initialized.\n", rdev->num_gb_pipes);
490 }
491
492 int r300_ga_reset(struct radeon_device *rdev)
493 {
494         uint32_t tmp;
495         bool reinit_cp;
496         int i;
497
498         reinit_cp = rdev->cp.ready;
499         rdev->cp.ready = false;
500         for (i = 0; i < rdev->usec_timeout; i++) {
501                 WREG32(RADEON_CP_CSQ_MODE, 0);
502                 WREG32(RADEON_CP_CSQ_CNTL, 0);
503                 WREG32(RADEON_RBBM_SOFT_RESET, 0x32005);
504                 (void)RREG32(RADEON_RBBM_SOFT_RESET);
505                 udelay(200);
506                 WREG32(RADEON_RBBM_SOFT_RESET, 0);
507                 /* Wait to prevent race in RBBM_STATUS */
508                 mdelay(1);
509                 tmp = RREG32(RADEON_RBBM_STATUS);
510                 if (tmp & ((1 << 20) | (1 << 26))) {
511                         DRM_ERROR("VAP & CP still busy (RBBM_STATUS=0x%08X)", tmp);
512                         /* GA still busy soft reset it */
513                         WREG32(0x429C, 0x200);
514                         WREG32(R300_VAP_PVS_STATE_FLUSH_REG, 0);
515                         WREG32(0x43E0, 0);
516                         WREG32(0x43E4, 0);
517                         WREG32(0x24AC, 0);
518                 }
519                 /* Wait to prevent race in RBBM_STATUS */
520                 mdelay(1);
521                 tmp = RREG32(RADEON_RBBM_STATUS);
522                 if (!(tmp & ((1 << 20) | (1 << 26)))) {
523                         break;
524                 }
525         }
526         for (i = 0; i < rdev->usec_timeout; i++) {
527                 tmp = RREG32(RADEON_RBBM_STATUS);
528                 if (!(tmp & ((1 << 20) | (1 << 26)))) {
529                         DRM_INFO("GA reset succeed (RBBM_STATUS=0x%08X)\n",
530                                  tmp);
531                         if (reinit_cp) {
532                                 return r100_cp_init(rdev, rdev->cp.ring_size);
533                         }
534                         return 0;
535                 }
536                 DRM_UDELAY(1);
537         }
538         tmp = RREG32(RADEON_RBBM_STATUS);
539         DRM_ERROR("Failed to reset GA ! (RBBM_STATUS=0x%08X)\n", tmp);
540         return -1;
541 }
542
543 int r300_gpu_reset(struct radeon_device *rdev)
544 {
545         uint32_t status;
546
547         /* reset order likely matter */
548         status = RREG32(RADEON_RBBM_STATUS);
549         /* reset HDP */
550         r100_hdp_reset(rdev);
551         /* reset rb2d */
552         if (status & ((1 << 17) | (1 << 18) | (1 << 27))) {
553                 r100_rb2d_reset(rdev);
554         }
555         /* reset GA */
556         if (status & ((1 << 20) | (1 << 26))) {
557                 r300_ga_reset(rdev);
558         }
559         /* reset CP */
560         status = RREG32(RADEON_RBBM_STATUS);
561         if (status & (1 << 16)) {
562                 r100_cp_reset(rdev);
563         }
564         /* Check if GPU is idle */
565         status = RREG32(RADEON_RBBM_STATUS);
566         if (status & (1 << 31)) {
567                 DRM_ERROR("Failed to reset GPU (RBBM_STATUS=0x%08X)\n", status);
568                 return -1;
569         }
570         DRM_INFO("GPU reset succeed (RBBM_STATUS=0x%08X)\n", status);
571         return 0;
572 }
573
574
575 /*
576  * r300,r350,rv350,rv380 VRAM info
577  */
578 void r300_vram_info(struct radeon_device *rdev)
579 {
580         uint32_t tmp;
581
582         /* DDR for all card after R300 & IGP */
583         rdev->mc.vram_is_ddr = true;
584         tmp = RREG32(RADEON_MEM_CNTL);
585         if (tmp & R300_MEM_NUM_CHANNELS_MASK) {
586                 rdev->mc.vram_width = 128;
587         } else {
588                 rdev->mc.vram_width = 64;
589         }
590
591         r100_vram_init_sizes(rdev);
592 }
593
594
595 /*
596  * PCIE Lanes
597  */
598
599 void rv370_set_pcie_lanes(struct radeon_device *rdev, int lanes)
600 {
601         uint32_t link_width_cntl, mask;
602
603         if (rdev->flags & RADEON_IS_IGP)
604                 return;
605
606         if (!(rdev->flags & RADEON_IS_PCIE))
607                 return;
608
609         /* FIXME wait for idle */
610
611         switch (lanes) {
612         case 0:
613                 mask = RADEON_PCIE_LC_LINK_WIDTH_X0;
614                 break;
615         case 1:
616                 mask = RADEON_PCIE_LC_LINK_WIDTH_X1;
617                 break;
618         case 2:
619                 mask = RADEON_PCIE_LC_LINK_WIDTH_X2;
620                 break;
621         case 4:
622                 mask = RADEON_PCIE_LC_LINK_WIDTH_X4;
623                 break;
624         case 8:
625                 mask = RADEON_PCIE_LC_LINK_WIDTH_X8;
626                 break;
627         case 12:
628                 mask = RADEON_PCIE_LC_LINK_WIDTH_X12;
629                 break;
630         case 16:
631         default:
632                 mask = RADEON_PCIE_LC_LINK_WIDTH_X16;
633                 break;
634         }
635
636         link_width_cntl = RREG32_PCIE(RADEON_PCIE_LC_LINK_WIDTH_CNTL);
637
638         if ((link_width_cntl & RADEON_PCIE_LC_LINK_WIDTH_RD_MASK) ==
639             (mask << RADEON_PCIE_LC_LINK_WIDTH_RD_SHIFT))
640                 return;
641
642         link_width_cntl &= ~(RADEON_PCIE_LC_LINK_WIDTH_MASK |
643                              RADEON_PCIE_LC_RECONFIG_NOW |
644                              RADEON_PCIE_LC_RECONFIG_LATER |
645                              RADEON_PCIE_LC_SHORT_RECONFIG_EN);
646         link_width_cntl |= mask;
647         WREG32_PCIE(RADEON_PCIE_LC_LINK_WIDTH_CNTL, link_width_cntl);
648         WREG32_PCIE(RADEON_PCIE_LC_LINK_WIDTH_CNTL, (link_width_cntl |
649                                                      RADEON_PCIE_LC_RECONFIG_NOW));
650
651         /* wait for lane set to complete */
652         link_width_cntl = RREG32_PCIE(RADEON_PCIE_LC_LINK_WIDTH_CNTL);
653         while (link_width_cntl == 0xffffffff)
654                 link_width_cntl = RREG32_PCIE(RADEON_PCIE_LC_LINK_WIDTH_CNTL);
655
656 }
657
658
659 /*
660  * Debugfs info
661  */
662 #if defined(CONFIG_DEBUG_FS)
663 static int rv370_debugfs_pcie_gart_info(struct seq_file *m, void *data)
664 {
665         struct drm_info_node *node = (struct drm_info_node *) m->private;
666         struct drm_device *dev = node->minor->dev;
667         struct radeon_device *rdev = dev->dev_private;
668         uint32_t tmp;
669
670         tmp = RREG32_PCIE(RADEON_PCIE_TX_GART_CNTL);
671         seq_printf(m, "PCIE_TX_GART_CNTL 0x%08x\n", tmp);
672         tmp = RREG32_PCIE(RADEON_PCIE_TX_GART_BASE);
673         seq_printf(m, "PCIE_TX_GART_BASE 0x%08x\n", tmp);
674         tmp = RREG32_PCIE(RADEON_PCIE_TX_GART_START_LO);
675         seq_printf(m, "PCIE_TX_GART_START_LO 0x%08x\n", tmp);
676         tmp = RREG32_PCIE(RADEON_PCIE_TX_GART_START_HI);
677         seq_printf(m, "PCIE_TX_GART_START_HI 0x%08x\n", tmp);
678         tmp = RREG32_PCIE(RADEON_PCIE_TX_GART_END_LO);
679         seq_printf(m, "PCIE_TX_GART_END_LO 0x%08x\n", tmp);
680         tmp = RREG32_PCIE(RADEON_PCIE_TX_GART_END_HI);
681         seq_printf(m, "PCIE_TX_GART_END_HI 0x%08x\n", tmp);
682         tmp = RREG32_PCIE(RADEON_PCIE_TX_GART_ERROR);
683         seq_printf(m, "PCIE_TX_GART_ERROR 0x%08x\n", tmp);
684         return 0;
685 }
686
687 static struct drm_info_list rv370_pcie_gart_info_list[] = {
688         {"rv370_pcie_gart_info", rv370_debugfs_pcie_gart_info, 0, NULL},
689 };
690 #endif
691
692 int rv370_debugfs_pcie_gart_info_init(struct radeon_device *rdev)
693 {
694 #if defined(CONFIG_DEBUG_FS)
695         return radeon_debugfs_add_files(rdev, rv370_pcie_gart_info_list, 1);
696 #else
697         return 0;
698 #endif
699 }
700
701
702 /*
703  * CS functions
704  */
705 struct r300_cs_track_cb {
706         struct radeon_object    *robj;
707         unsigned                pitch;
708         unsigned                cpp;
709         unsigned                offset;
710 };
711
712 struct r300_cs_track_array {
713         struct radeon_object    *robj;
714         unsigned                esize;
715 };
716
717 struct r300_cs_track_texture {
718         struct radeon_object    *robj;
719         unsigned                pitch;
720         unsigned                width;
721         unsigned                height;
722         unsigned                num_levels;
723         unsigned                cpp;
724         unsigned                tex_coord_type;
725         unsigned                txdepth;
726         unsigned                width_11;
727         unsigned                height_11;
728         bool                    use_pitch;
729         bool                    enabled;
730         bool                    roundup_w;
731         bool                    roundup_h;
732 };
733
734 struct r300_cs_track {
735         unsigned                        num_cb;
736         unsigned                        maxy;
737         unsigned                        vtx_size;
738         unsigned                        vap_vf_cntl;
739         unsigned                        immd_dwords;
740         unsigned                        num_arrays;
741         unsigned                        max_indx;
742         struct r300_cs_track_array      arrays[11];
743         struct r300_cs_track_cb         cb[4];
744         struct r300_cs_track_cb         zb;
745         struct r300_cs_track_texture    textures[16];
746         bool                            z_enabled;
747 };
748
749 static inline void r300_cs_track_texture_print(struct r300_cs_track_texture *t)
750 {
751         DRM_ERROR("pitch                      %d\n", t->pitch);
752         DRM_ERROR("width                      %d\n", t->width);
753         DRM_ERROR("height                     %d\n", t->height);
754         DRM_ERROR("num levels                 %d\n", t->num_levels);
755         DRM_ERROR("depth                      %d\n", t->txdepth);
756         DRM_ERROR("bpp                        %d\n", t->cpp);
757         DRM_ERROR("coordinate type            %d\n", t->tex_coord_type);
758         DRM_ERROR("width round to power of 2  %d\n", t->roundup_w);
759         DRM_ERROR("height round to power of 2 %d\n", t->roundup_h);
760 }
761
762 static inline int r300_cs_track_texture_check(struct radeon_device *rdev,
763                                               struct r300_cs_track *track)
764 {
765         struct radeon_object *robj;
766         unsigned long size;
767         unsigned u, i, w, h;
768
769         for (u = 0; u < 16; u++) {
770                 if (!track->textures[u].enabled)
771                         continue;
772                 robj = track->textures[u].robj;
773                 if (robj == NULL) {
774                         DRM_ERROR("No texture bound to unit %u\n", u);
775                         return -EINVAL;
776                 }
777                 size = 0;
778                 for (i = 0; i <= track->textures[u].num_levels; i++) {
779                         if (track->textures[u].use_pitch) {
780                                 w = track->textures[u].pitch / (1 << i);
781                         } else {
782                                 w = track->textures[u].width / (1 << i);
783                                 if (rdev->family >= CHIP_RV515)
784                                         w |= track->textures[u].width_11;
785                                 if (track->textures[u].roundup_w)
786                                         w = roundup_pow_of_two(w);
787                         }
788                         h = track->textures[u].height / (1 << i);
789                         if (rdev->family >= CHIP_RV515)
790                                 h |= track->textures[u].height_11;
791                         if (track->textures[u].roundup_h)
792                                 h = roundup_pow_of_two(h);
793                         size += w * h;
794                 }
795                 size *= track->textures[u].cpp;
796                 switch (track->textures[u].tex_coord_type) {
797                 case 0:
798                         break;
799                 case 1:
800                         size *= (1 << track->textures[u].txdepth);
801                         break;
802                 case 2:
803                         size *= 6;
804                         break;
805                 default:
806                         DRM_ERROR("Invalid texture coordinate type %u for unit "
807                                   "%u\n", track->textures[u].tex_coord_type, u);
808                         return -EINVAL;
809                 }
810                 if (size > radeon_object_size(robj)) {
811                         DRM_ERROR("Texture of unit %u needs %lu bytes but is "
812                                   "%lu\n", u, size, radeon_object_size(robj));
813                         r300_cs_track_texture_print(&track->textures[u]);
814                         return -EINVAL;
815                 }
816         }
817         return 0;
818 }
819
820 int r300_cs_track_check(struct radeon_device *rdev, struct r300_cs_track *track)
821 {
822         unsigned i;
823         unsigned long size;
824         unsigned prim_walk;
825         unsigned nverts;
826
827         for (i = 0; i < track->num_cb; i++) {
828                 if (track->cb[i].robj == NULL) {
829                         DRM_ERROR("[drm] No buffer for color buffer %d !\n", i);
830                         return -EINVAL;
831                 }
832                 size = track->cb[i].pitch * track->cb[i].cpp * track->maxy;
833                 size += track->cb[i].offset;
834                 if (size > radeon_object_size(track->cb[i].robj)) {
835                         DRM_ERROR("[drm] Buffer too small for color buffer %d "
836                                   "(need %lu have %lu) !\n", i, size,
837                                   radeon_object_size(track->cb[i].robj));
838                         DRM_ERROR("[drm] color buffer %d (%u %u %u %u)\n",
839                                   i, track->cb[i].pitch, track->cb[i].cpp,
840                                   track->cb[i].offset, track->maxy);
841                         return -EINVAL;
842                 }
843         }
844         if (track->z_enabled) {
845                 if (track->zb.robj == NULL) {
846                         DRM_ERROR("[drm] No buffer for z buffer !\n");
847                         return -EINVAL;
848                 }
849                 size = track->zb.pitch * track->zb.cpp * track->maxy;
850                 size += track->zb.offset;
851                 if (size > radeon_object_size(track->zb.robj)) {
852                         DRM_ERROR("[drm] Buffer too small for z buffer "
853                                   "(need %lu have %lu) !\n", size,
854                                   radeon_object_size(track->zb.robj));
855                         return -EINVAL;
856                 }
857         }
858         prim_walk = (track->vap_vf_cntl >> 4) & 0x3;
859         nverts = (track->vap_vf_cntl >> 16) & 0xFFFF;
860         switch (prim_walk) {
861         case 1:
862                 for (i = 0; i < track->num_arrays; i++) {
863                         size = track->arrays[i].esize * track->max_indx * 4;
864                         if (track->arrays[i].robj == NULL) {
865                                 DRM_ERROR("(PW %u) Vertex array %u no buffer "
866                                           "bound\n", prim_walk, i);
867                                 return -EINVAL;
868                         }
869                         if (size > radeon_object_size(track->arrays[i].robj)) {
870                                 DRM_ERROR("(PW %u) Vertex array %u need %lu dwords "
871                                            "have %lu dwords\n", prim_walk, i,
872                                            size >> 2,
873                                            radeon_object_size(track->arrays[i].robj) >> 2);
874                                 DRM_ERROR("Max indices %u\n", track->max_indx);
875                                 return -EINVAL;
876                         }
877                 }
878                 break;
879         case 2:
880                 for (i = 0; i < track->num_arrays; i++) {
881                         size = track->arrays[i].esize * (nverts - 1) * 4;
882                         if (track->arrays[i].robj == NULL) {
883                                 DRM_ERROR("(PW %u) Vertex array %u no buffer "
884                                           "bound\n", prim_walk, i);
885                                 return -EINVAL;
886                         }
887                         if (size > radeon_object_size(track->arrays[i].robj)) {
888                                 DRM_ERROR("(PW %u) Vertex array %u need %lu dwords "
889                                            "have %lu dwords\n", prim_walk, i, size >> 2,
890                                            radeon_object_size(track->arrays[i].robj) >> 2);
891                                 return -EINVAL;
892                         }
893                 }
894                 break;
895         case 3:
896                 size = track->vtx_size * nverts;
897                 if (size != track->immd_dwords) {
898                         DRM_ERROR("IMMD draw %u dwors but needs %lu dwords\n",
899                                   track->immd_dwords, size);
900                         DRM_ERROR("VAP_VF_CNTL.NUM_VERTICES %u, VTX_SIZE %u\n",
901                                   nverts, track->vtx_size);
902                         return -EINVAL;
903                 }
904                 break;
905         default:
906                 DRM_ERROR("[drm] Invalid primitive walk %d for VAP_VF_CNTL\n",
907                           prim_walk);
908                 return -EINVAL;
909         }
910         return r300_cs_track_texture_check(rdev, track);
911 }
912
913 static inline void r300_cs_track_clear(struct r300_cs_track *track)
914 {
915         unsigned i;
916
917         track->num_cb = 4;
918         track->maxy = 4096;
919         for (i = 0; i < track->num_cb; i++) {
920                 track->cb[i].robj = NULL;
921                 track->cb[i].pitch = 8192;
922                 track->cb[i].cpp = 16;
923                 track->cb[i].offset = 0;
924         }
925         track->z_enabled = true;
926         track->zb.robj = NULL;
927         track->zb.pitch = 8192;
928         track->zb.cpp = 4;
929         track->zb.offset = 0;
930         track->vtx_size = 0x7F;
931         track->immd_dwords = 0xFFFFFFFFUL;
932         track->num_arrays = 11;
933         track->max_indx = 0x00FFFFFFUL;
934         for (i = 0; i < track->num_arrays; i++) {
935                 track->arrays[i].robj = NULL;
936                 track->arrays[i].esize = 0x7F;
937         }
938         for (i = 0; i < 16; i++) {
939                 track->textures[i].pitch = 16536;
940                 track->textures[i].width = 16536;
941                 track->textures[i].height = 16536;
942                 track->textures[i].width_11 = 1 << 11;
943                 track->textures[i].height_11 = 1 << 11;
944                 track->textures[i].num_levels = 12;
945                 track->textures[i].txdepth = 16;
946                 track->textures[i].cpp = 64;
947                 track->textures[i].tex_coord_type = 1;
948                 track->textures[i].robj = NULL;
949                 /* CS IB emission code makes sure texture unit are disabled */
950                 track->textures[i].enabled = false;
951                 track->textures[i].roundup_w = true;
952                 track->textures[i].roundup_h = true;
953         }
954 }
955
956 static const unsigned r300_reg_safe_bm[159] = {
957         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
958         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
959         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
960         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
961         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
962         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
963         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
964         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
965         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
966         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
967         0x17FF1FFF, 0xFFFFFFFC, 0xFFFFFFFF, 0xFF30FFBF,
968         0xFFFFFFF8, 0xC3E6FFFF, 0xFFFFF6DF, 0xFFFFFFFF,
969         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
970         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
971         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF03F,
972         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
973         0xFFFFFFFF, 0xFFFFEFCE, 0xF00EBFFF, 0x007C0000,
974         0xF0000078, 0xFF000009, 0xFFFFFFFF, 0xFFFFFFFF,
975         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
976         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
977         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
978         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
979         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
980         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
981         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
982         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
983         0xFFFFF7FF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
984         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
985         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
986         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
987         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
988         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
989         0xFFFFFC78, 0xFFFFFFFF, 0xFFFFFFFE, 0xFFFFFFFF,
990         0x38FF8F50, 0xFFF88082, 0xF000000C, 0xFAE009FF,
991         0x0000FFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000,
992         0x00000000, 0x0000C100, 0x00000000, 0x00000000,
993         0x00000000, 0x00000000, 0x00000000, 0x00000000,
994         0x00000000, 0xFFFF0000, 0xFFFFFFFF, 0xFF80FFFF,
995         0x00000000, 0x00000000, 0x00000000, 0x00000000,
996         0x0003FC01, 0xFFFFFFF8, 0xFE800B19,
997 };
998
999 static int r300_packet0_check(struct radeon_cs_parser *p,
1000                 struct radeon_cs_packet *pkt,
1001                 unsigned idx, unsigned reg)
1002 {
1003         struct radeon_cs_chunk *ib_chunk;
1004         struct radeon_cs_reloc *reloc;
1005         struct r300_cs_track *track;
1006         volatile uint32_t *ib;
1007         uint32_t tmp, tile_flags = 0;
1008         unsigned i;
1009         int r;
1010
1011         ib = p->ib->ptr;
1012         ib_chunk = &p->chunks[p->chunk_ib_idx];
1013         track = (struct r300_cs_track*)p->track;
1014         switch(reg) {
1015         case AVIVO_D1MODE_VLINE_START_END:
1016         case RADEON_CRTC_GUI_TRIG_VLINE:
1017                 r = r100_cs_packet_parse_vline(p);
1018                 if (r) {
1019                         DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1020                                         idx, reg);
1021                         r100_cs_dump_packet(p, pkt);
1022                         return r;
1023                 }
1024                 break;
1025         case RADEON_DST_PITCH_OFFSET:
1026         case RADEON_SRC_PITCH_OFFSET:
1027                 r = r100_cs_packet_next_reloc(p, &reloc);
1028                 if (r) {
1029                         DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1030                                         idx, reg);
1031                         r100_cs_dump_packet(p, pkt);
1032                         return r;
1033                 }
1034                 tmp = ib_chunk->kdata[idx] & 0x003fffff;
1035                 tmp += (((u32)reloc->lobj.gpu_offset) >> 10);
1036
1037                 if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO)
1038                         tile_flags |= RADEON_DST_TILE_MACRO;
1039                 if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) {
1040                         if (reg == RADEON_SRC_PITCH_OFFSET) {
1041                                 DRM_ERROR("Cannot src blit from microtiled surface\n");
1042                                 r100_cs_dump_packet(p, pkt);
1043                                 return -EINVAL;
1044                         }
1045                         tile_flags |= RADEON_DST_TILE_MICRO;
1046                 }
1047                 tmp |= tile_flags;
1048                 ib[idx] = (ib_chunk->kdata[idx] & 0x3fc00000) | tmp;
1049                 break;
1050         case R300_RB3D_COLOROFFSET0:
1051         case R300_RB3D_COLOROFFSET1:
1052         case R300_RB3D_COLOROFFSET2:
1053         case R300_RB3D_COLOROFFSET3:
1054                 i = (reg - R300_RB3D_COLOROFFSET0) >> 2;
1055                 r = r100_cs_packet_next_reloc(p, &reloc);
1056                 if (r) {
1057                         DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1058                                         idx, reg);
1059                         r100_cs_dump_packet(p, pkt);
1060                         return r;
1061                 }
1062                 track->cb[i].robj = reloc->robj;
1063                 track->cb[i].offset = ib_chunk->kdata[idx];
1064                 ib[idx] = ib_chunk->kdata[idx] + ((u32)reloc->lobj.gpu_offset);
1065                 break;
1066         case R300_ZB_DEPTHOFFSET:
1067                 r = r100_cs_packet_next_reloc(p, &reloc);
1068                 if (r) {
1069                         DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1070                                         idx, reg);
1071                         r100_cs_dump_packet(p, pkt);
1072                         return r;
1073                 }
1074                 track->zb.robj = reloc->robj;
1075                 track->zb.offset = ib_chunk->kdata[idx];
1076                 ib[idx] = ib_chunk->kdata[idx] + ((u32)reloc->lobj.gpu_offset);
1077                 break;
1078         case R300_TX_OFFSET_0:
1079         case R300_TX_OFFSET_0+4:
1080         case R300_TX_OFFSET_0+8:
1081         case R300_TX_OFFSET_0+12:
1082         case R300_TX_OFFSET_0+16:
1083         case R300_TX_OFFSET_0+20:
1084         case R300_TX_OFFSET_0+24:
1085         case R300_TX_OFFSET_0+28:
1086         case R300_TX_OFFSET_0+32:
1087         case R300_TX_OFFSET_0+36:
1088         case R300_TX_OFFSET_0+40:
1089         case R300_TX_OFFSET_0+44:
1090         case R300_TX_OFFSET_0+48:
1091         case R300_TX_OFFSET_0+52:
1092         case R300_TX_OFFSET_0+56:
1093         case R300_TX_OFFSET_0+60:
1094                 i = (reg - R300_TX_OFFSET_0) >> 2;
1095                 r = r100_cs_packet_next_reloc(p, &reloc);
1096                 if (r) {
1097                         DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1098                                         idx, reg);
1099                         r100_cs_dump_packet(p, pkt);
1100                         return r;
1101                 }
1102                 ib[idx] = ib_chunk->kdata[idx] + ((u32)reloc->lobj.gpu_offset);
1103                 track->textures[i].robj = reloc->robj;
1104                 break;
1105         /* Tracked registers */
1106         case 0x2084:
1107                 /* VAP_VF_CNTL */
1108                 track->vap_vf_cntl = ib_chunk->kdata[idx];
1109                 break;
1110         case 0x20B4:
1111                 /* VAP_VTX_SIZE */
1112                 track->vtx_size = ib_chunk->kdata[idx] & 0x7F;
1113                 break;
1114         case 0x2134:
1115                 /* VAP_VF_MAX_VTX_INDX */
1116                 track->max_indx = ib_chunk->kdata[idx] & 0x00FFFFFFUL;
1117                 break;
1118         case 0x43E4:
1119                 /* SC_SCISSOR1 */
1120                 track->maxy = ((ib_chunk->kdata[idx] >> 13) & 0x1FFF) + 1;
1121                 if (p->rdev->family < CHIP_RV515) {
1122                         track->maxy -= 1440;
1123                 }
1124                 break;
1125         case 0x4E00:
1126                 /* RB3D_CCTL */
1127                 track->num_cb = ((ib_chunk->kdata[idx] >> 5) & 0x3) + 1;
1128                 break;
1129         case 0x4E38:
1130         case 0x4E3C:
1131         case 0x4E40:
1132         case 0x4E44:
1133                 /* RB3D_COLORPITCH0 */
1134                 /* RB3D_COLORPITCH1 */
1135                 /* RB3D_COLORPITCH2 */
1136                 /* RB3D_COLORPITCH3 */
1137                 r = r100_cs_packet_next_reloc(p, &reloc);
1138                 if (r) {
1139                         DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1140                                   idx, reg);
1141                         r100_cs_dump_packet(p, pkt);
1142                         return r;
1143                 }
1144
1145                 if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO)
1146                         tile_flags |= R300_COLOR_TILE_ENABLE;
1147                 if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO)
1148                         tile_flags |= R300_COLOR_MICROTILE_ENABLE;
1149
1150                 tmp = ib_chunk->kdata[idx] & ~(0x7 << 16);
1151                 tmp |= tile_flags;
1152                 ib[idx] = tmp;
1153
1154                 i = (reg - 0x4E38) >> 2;
1155                 track->cb[i].pitch = ib_chunk->kdata[idx] & 0x3FFE;
1156                 switch (((ib_chunk->kdata[idx] >> 21) & 0xF)) {
1157                 case 9:
1158                 case 11:
1159                 case 12:
1160                         track->cb[i].cpp = 1;
1161                         break;
1162                 case 3:
1163                 case 4:
1164                 case 13:
1165                 case 15:
1166                         track->cb[i].cpp = 2;
1167                         break;
1168                 case 6:
1169                         track->cb[i].cpp = 4;
1170                         break;
1171                 case 10:
1172                         track->cb[i].cpp = 8;
1173                         break;
1174                 case 7:
1175                         track->cb[i].cpp = 16;
1176                         break;
1177                 default:
1178                         DRM_ERROR("Invalid color buffer format (%d) !\n",
1179                                   ((ib_chunk->kdata[idx] >> 21) & 0xF));
1180                         return -EINVAL;
1181                 }
1182                 break;
1183         case 0x4F00:
1184                 /* ZB_CNTL */
1185                 if (ib_chunk->kdata[idx] & 2) {
1186                         track->z_enabled = true;
1187                 } else {
1188                         track->z_enabled = false;
1189                 }
1190                 break;
1191         case 0x4F10:
1192                 /* ZB_FORMAT */
1193                 switch ((ib_chunk->kdata[idx] & 0xF)) {
1194                 case 0:
1195                 case 1:
1196                         track->zb.cpp = 2;
1197                         break;
1198                 case 2:
1199                         track->zb.cpp = 4;
1200                         break;
1201                 default:
1202                         DRM_ERROR("Invalid z buffer format (%d) !\n",
1203                                   (ib_chunk->kdata[idx] & 0xF));
1204                         return -EINVAL;
1205                 }
1206                 break;
1207         case 0x4F24:
1208                 /* ZB_DEPTHPITCH */
1209                 r = r100_cs_packet_next_reloc(p, &reloc);
1210                 if (r) {
1211                         DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1212                                   idx, reg);
1213                         r100_cs_dump_packet(p, pkt);
1214                         return r;
1215                 }
1216
1217                 if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO)
1218                         tile_flags |= R300_DEPTHMACROTILE_ENABLE;
1219                 if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO)
1220                         tile_flags |= R300_DEPTHMICROTILE_TILED;;
1221
1222                 tmp = ib_chunk->kdata[idx] & ~(0x7 << 16);
1223                 tmp |= tile_flags;
1224                 ib[idx] = tmp;
1225
1226                 track->zb.pitch = ib_chunk->kdata[idx] & 0x3FFC;
1227                 break;
1228         case 0x4104:
1229                 for (i = 0; i < 16; i++) {
1230                         bool enabled;
1231
1232                         enabled = !!(ib_chunk->kdata[idx] & (1 << i));
1233                         track->textures[i].enabled = enabled;
1234                 }
1235                 break;
1236         case 0x44C0:
1237         case 0x44C4:
1238         case 0x44C8:
1239         case 0x44CC:
1240         case 0x44D0:
1241         case 0x44D4:
1242         case 0x44D8:
1243         case 0x44DC:
1244         case 0x44E0:
1245         case 0x44E4:
1246         case 0x44E8:
1247         case 0x44EC:
1248         case 0x44F0:
1249         case 0x44F4:
1250         case 0x44F8:
1251         case 0x44FC:
1252                 /* TX_FORMAT1_[0-15] */
1253                 i = (reg - 0x44C0) >> 2;
1254                 tmp = (ib_chunk->kdata[idx] >> 25) & 0x3;
1255                 track->textures[i].tex_coord_type = tmp;
1256                 switch ((ib_chunk->kdata[idx] & 0x1F)) {
1257                 case 0:
1258                 case 2:
1259                 case 5:
1260                 case 18:
1261                 case 20:
1262                 case 21:
1263                         track->textures[i].cpp = 1;
1264                         break;
1265                 case 1:
1266                 case 3:
1267                 case 6:
1268                 case 7:
1269                 case 10:
1270                 case 11:
1271                 case 19:
1272                 case 22:
1273                 case 24:
1274                         track->textures[i].cpp = 2;
1275                         break;
1276                 case 4:
1277                 case 8:
1278                 case 9:
1279                 case 12:
1280                 case 13:
1281                 case 23:
1282                 case 25:
1283                 case 27:
1284                 case 30:
1285                         track->textures[i].cpp = 4;
1286                         break;
1287                 case 14:
1288                 case 26:
1289                 case 28:
1290                         track->textures[i].cpp = 8;
1291                         break;
1292                 case 29:
1293                         track->textures[i].cpp = 16;
1294                         break;
1295                 default:
1296                         DRM_ERROR("Invalid texture format %u\n",
1297                                   (ib_chunk->kdata[idx] & 0x1F));
1298                         return -EINVAL;
1299                         break;
1300                 }
1301                 break;
1302         case 0x4400:
1303         case 0x4404:
1304         case 0x4408:
1305         case 0x440C:
1306         case 0x4410:
1307         case 0x4414:
1308         case 0x4418:
1309         case 0x441C:
1310         case 0x4420:
1311         case 0x4424:
1312         case 0x4428:
1313         case 0x442C:
1314         case 0x4430:
1315         case 0x4434:
1316         case 0x4438:
1317         case 0x443C:
1318                 /* TX_FILTER0_[0-15] */
1319                 i = (reg - 0x4400) >> 2;
1320                 tmp = ib_chunk->kdata[idx] & 0x7;;
1321                 if (tmp == 2 || tmp == 4 || tmp == 6) {
1322                         track->textures[i].roundup_w = false;
1323                 }
1324                 tmp = (ib_chunk->kdata[idx] >> 3) & 0x7;;
1325                 if (tmp == 2 || tmp == 4 || tmp == 6) {
1326                         track->textures[i].roundup_h = false;
1327                 }
1328                 break;
1329         case 0x4500:
1330         case 0x4504:
1331         case 0x4508:
1332         case 0x450C:
1333         case 0x4510:
1334         case 0x4514:
1335         case 0x4518:
1336         case 0x451C:
1337         case 0x4520:
1338         case 0x4524:
1339         case 0x4528:
1340         case 0x452C:
1341         case 0x4530:
1342         case 0x4534:
1343         case 0x4538:
1344         case 0x453C:
1345                 /* TX_FORMAT2_[0-15] */
1346                 i = (reg - 0x4500) >> 2;
1347                 tmp = ib_chunk->kdata[idx] & 0x3FFF;
1348                 track->textures[i].pitch = tmp + 1;
1349                 if (p->rdev->family >= CHIP_RV515) {
1350                         tmp = ((ib_chunk->kdata[idx] >> 15) & 1) << 11;
1351                         track->textures[i].width_11 = tmp;
1352                         tmp = ((ib_chunk->kdata[idx] >> 16) & 1) << 11;
1353                         track->textures[i].height_11 = tmp;
1354                 }
1355                 break;
1356         case 0x4480:
1357         case 0x4484:
1358         case 0x4488:
1359         case 0x448C:
1360         case 0x4490:
1361         case 0x4494:
1362         case 0x4498:
1363         case 0x449C:
1364         case 0x44A0:
1365         case 0x44A4:
1366         case 0x44A8:
1367         case 0x44AC:
1368         case 0x44B0:
1369         case 0x44B4:
1370         case 0x44B8:
1371         case 0x44BC:
1372                 /* TX_FORMAT0_[0-15] */
1373                 i = (reg - 0x4480) >> 2;
1374                 tmp = ib_chunk->kdata[idx] & 0x7FF;
1375                 track->textures[i].width = tmp + 1;
1376                 tmp = (ib_chunk->kdata[idx] >> 11) & 0x7FF;
1377                 track->textures[i].height = tmp + 1;
1378                 tmp = (ib_chunk->kdata[idx] >> 26) & 0xF;
1379                 track->textures[i].num_levels = tmp;
1380                 tmp = ib_chunk->kdata[idx] & (1 << 31);
1381                 track->textures[i].use_pitch = !!tmp;
1382                 tmp = (ib_chunk->kdata[idx] >> 22) & 0xF;
1383                 track->textures[i].txdepth = tmp;
1384                 break;
1385         case R300_ZB_ZPASS_ADDR:
1386                 r = r100_cs_packet_next_reloc(p, &reloc);
1387                 if (r) {
1388                         DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1389                                         idx, reg);
1390                         r100_cs_dump_packet(p, pkt);
1391                         return r;
1392                 }
1393                 ib[idx] = ib_chunk->kdata[idx] + ((u32)reloc->lobj.gpu_offset);
1394                 break;
1395         case 0x4be8:
1396                 /* valid register only on RV530 */
1397                 if (p->rdev->family == CHIP_RV530)
1398                         break;
1399                 /* fallthrough do not move */
1400         default:
1401                 printk(KERN_ERR "Forbidden register 0x%04X in cs at %d\n",
1402                        reg, idx);
1403                 return -EINVAL;
1404         }
1405         return 0;
1406 }
1407
1408 static int r300_packet3_check(struct radeon_cs_parser *p,
1409                               struct radeon_cs_packet *pkt)
1410 {
1411         struct radeon_cs_chunk *ib_chunk;
1412         struct radeon_cs_reloc *reloc;
1413         struct r300_cs_track *track;
1414         volatile uint32_t *ib;
1415         unsigned idx;
1416         unsigned i, c;
1417         int r;
1418
1419         ib = p->ib->ptr;
1420         ib_chunk = &p->chunks[p->chunk_ib_idx];
1421         idx = pkt->idx + 1;
1422         track = (struct r300_cs_track*)p->track;
1423         switch(pkt->opcode) {
1424         case PACKET3_3D_LOAD_VBPNTR:
1425                 c = ib_chunk->kdata[idx++] & 0x1F;
1426                 track->num_arrays = c;
1427                 for (i = 0; i < (c - 1); i+=2, idx+=3) {
1428                         r = r100_cs_packet_next_reloc(p, &reloc);
1429                         if (r) {
1430                                 DRM_ERROR("No reloc for packet3 %d\n",
1431                                           pkt->opcode);
1432                                 r100_cs_dump_packet(p, pkt);
1433                                 return r;
1434                         }
1435                         ib[idx+1] = ib_chunk->kdata[idx+1] + ((u32)reloc->lobj.gpu_offset);
1436                         track->arrays[i + 0].robj = reloc->robj;
1437                         track->arrays[i + 0].esize = ib_chunk->kdata[idx] >> 8;
1438                         track->arrays[i + 0].esize &= 0x7F;
1439                         r = r100_cs_packet_next_reloc(p, &reloc);
1440                         if (r) {
1441                                 DRM_ERROR("No reloc for packet3 %d\n",
1442                                           pkt->opcode);
1443                                 r100_cs_dump_packet(p, pkt);
1444                                 return r;
1445                         }
1446                         ib[idx+2] = ib_chunk->kdata[idx+2] + ((u32)reloc->lobj.gpu_offset);
1447                         track->arrays[i + 1].robj = reloc->robj;
1448                         track->arrays[i + 1].esize = ib_chunk->kdata[idx] >> 24;
1449                         track->arrays[i + 1].esize &= 0x7F;
1450                 }
1451                 if (c & 1) {
1452                         r = r100_cs_packet_next_reloc(p, &reloc);
1453                         if (r) {
1454                                 DRM_ERROR("No reloc for packet3 %d\n",
1455                                           pkt->opcode);
1456                                 r100_cs_dump_packet(p, pkt);
1457                                 return r;
1458                         }
1459                         ib[idx+1] = ib_chunk->kdata[idx+1] + ((u32)reloc->lobj.gpu_offset);
1460                         track->arrays[i + 0].robj = reloc->robj;
1461                         track->arrays[i + 0].esize = ib_chunk->kdata[idx] >> 8;
1462                         track->arrays[i + 0].esize &= 0x7F;
1463                 }
1464                 break;
1465         case PACKET3_INDX_BUFFER:
1466                 r = r100_cs_packet_next_reloc(p, &reloc);
1467                 if (r) {
1468                         DRM_ERROR("No reloc for packet3 %d\n", pkt->opcode);
1469                         r100_cs_dump_packet(p, pkt);
1470                         return r;
1471                 }
1472                 ib[idx+1] = ib_chunk->kdata[idx+1] + ((u32)reloc->lobj.gpu_offset);
1473                 r = r100_cs_track_check_pkt3_indx_buffer(p, pkt, reloc->robj);
1474                 if (r) {
1475                         return r;
1476                 }
1477                 break;
1478         /* Draw packet */
1479         case PACKET3_3D_DRAW_IMMD:
1480                 /* Number of dwords is vtx_size * (num_vertices - 1)
1481                  * PRIM_WALK must be equal to 3 vertex data in embedded
1482                  * in cmd stream */
1483                 if (((ib_chunk->kdata[idx+1] >> 4) & 0x3) != 3) {
1484                         DRM_ERROR("PRIM_WALK must be 3 for IMMD draw\n");
1485                         return -EINVAL;
1486                 }
1487                 track->vap_vf_cntl = ib_chunk->kdata[idx+1];
1488                 track->immd_dwords = pkt->count - 1;
1489                 r = r300_cs_track_check(p->rdev, track);
1490                 if (r) {
1491                         return r;
1492                 }
1493                 break;
1494         case PACKET3_3D_DRAW_IMMD_2:
1495                 /* Number of dwords is vtx_size * (num_vertices - 1)
1496                  * PRIM_WALK must be equal to 3 vertex data in embedded
1497                  * in cmd stream */
1498                 if (((ib_chunk->kdata[idx] >> 4) & 0x3) != 3) {
1499                         DRM_ERROR("PRIM_WALK must be 3 for IMMD draw\n");
1500                         return -EINVAL;
1501                 }
1502                 track->vap_vf_cntl = ib_chunk->kdata[idx];
1503                 track->immd_dwords = pkt->count;
1504                 r = r300_cs_track_check(p->rdev, track);
1505                 if (r) {
1506                         return r;
1507                 }
1508                 break;
1509         case PACKET3_3D_DRAW_VBUF:
1510                 track->vap_vf_cntl = ib_chunk->kdata[idx + 1];
1511                 r = r300_cs_track_check(p->rdev, track);
1512                 if (r) {
1513                         return r;
1514                 }
1515                 break;
1516         case PACKET3_3D_DRAW_VBUF_2:
1517                 track->vap_vf_cntl = ib_chunk->kdata[idx];
1518                 r = r300_cs_track_check(p->rdev, track);
1519                 if (r) {
1520                         return r;
1521                 }
1522                 break;
1523         case PACKET3_3D_DRAW_INDX:
1524                 track->vap_vf_cntl = ib_chunk->kdata[idx + 1];
1525                 r = r300_cs_track_check(p->rdev, track);
1526                 if (r) {
1527                         return r;
1528                 }
1529                 break;
1530         case PACKET3_3D_DRAW_INDX_2:
1531                 track->vap_vf_cntl = ib_chunk->kdata[idx];
1532                 r = r300_cs_track_check(p->rdev, track);
1533                 if (r) {
1534                         return r;
1535                 }
1536                 break;
1537         case PACKET3_NOP:
1538                 break;
1539         default:
1540                 DRM_ERROR("Packet3 opcode %x not supported\n", pkt->opcode);
1541                 return -EINVAL;
1542         }
1543         return 0;
1544 }
1545
1546 int r300_cs_parse(struct radeon_cs_parser *p)
1547 {
1548         struct radeon_cs_packet pkt;
1549         struct r300_cs_track track;
1550         int r;
1551
1552         r300_cs_track_clear(&track);
1553         p->track = &track;
1554         do {
1555                 r = r100_cs_packet_parse(p, &pkt, p->idx);
1556                 if (r) {
1557                         return r;
1558                 }
1559                 p->idx += pkt.count + 2;
1560                 switch (pkt.type) {
1561                 case PACKET_TYPE0:
1562                         r = r100_cs_parse_packet0(p, &pkt,
1563                                                   p->rdev->config.r300.reg_safe_bm,
1564                                                   p->rdev->config.r300.reg_safe_bm_size,
1565                                                   &r300_packet0_check);
1566                         break;
1567                 case PACKET_TYPE2:
1568                         break;
1569                 case PACKET_TYPE3:
1570                         r = r300_packet3_check(p, &pkt);
1571                         break;
1572                 default:
1573                         DRM_ERROR("Unknown packet type %d !\n", pkt.type);
1574                         return -EINVAL;
1575                 }
1576                 if (r) {
1577                         return r;
1578                 }
1579         } while (p->idx < p->chunks[p->chunk_ib_idx].length_dw);
1580         return 0;
1581 }
1582
1583 int r300_init(struct radeon_device *rdev)
1584 {
1585         rdev->config.r300.reg_safe_bm = r300_reg_safe_bm;
1586         rdev->config.r300.reg_safe_bm_size = ARRAY_SIZE(r300_reg_safe_bm);
1587         return 0;
1588 }