drm/radeon/kms: don't swap PCIEGART PTEs in VRAM.
[linux-2.6.git] / drivers / gpu / drm / radeon / r300.c
1 /*
2  * Copyright 2008 Advanced Micro Devices, Inc.
3  * Copyright 2008 Red Hat Inc.
4  * Copyright 2009 Jerome Glisse.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22  * OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors: Dave Airlie
25  *          Alex Deucher
26  *          Jerome Glisse
27  */
28 #include <linux/seq_file.h>
29 #include "drmP.h"
30 #include "drm.h"
31 #include "radeon_reg.h"
32 #include "radeon.h"
33
34 /* r300,r350,rv350,rv370,rv380 depends on : */
35 void r100_hdp_reset(struct radeon_device *rdev);
36 int r100_cp_reset(struct radeon_device *rdev);
37 int r100_rb2d_reset(struct radeon_device *rdev);
38 int r100_cp_init(struct radeon_device *rdev, unsigned ring_size);
39 int r100_pci_gart_enable(struct radeon_device *rdev);
40 void r100_pci_gart_disable(struct radeon_device *rdev);
41 void r100_mc_setup(struct radeon_device *rdev);
42 void r100_mc_disable_clients(struct radeon_device *rdev);
43 int r100_gui_wait_for_idle(struct radeon_device *rdev);
44 int r100_cs_packet_parse(struct radeon_cs_parser *p,
45                          struct radeon_cs_packet *pkt,
46                          unsigned idx);
47 int r100_cs_packet_parse_vline(struct radeon_cs_parser *p);
48 int r100_cs_packet_next_reloc(struct radeon_cs_parser *p,
49                               struct radeon_cs_reloc **cs_reloc);
50 int r100_cs_parse_packet0(struct radeon_cs_parser *p,
51                           struct radeon_cs_packet *pkt,
52                           const unsigned *auth, unsigned n,
53                           radeon_packet0_check_t check);
54 void r100_cs_dump_packet(struct radeon_cs_parser *p,
55                          struct radeon_cs_packet *pkt);
56 int r100_cs_track_check_pkt3_indx_buffer(struct radeon_cs_parser *p,
57                                          struct radeon_cs_packet *pkt,
58                                          struct radeon_object *robj);
59
60 /* This files gather functions specifics to:
61  * r300,r350,rv350,rv370,rv380
62  *
63  * Some of these functions might be used by newer ASICs.
64  */
65 void r300_gpu_init(struct radeon_device *rdev);
66 int r300_mc_wait_for_idle(struct radeon_device *rdev);
67 int rv370_debugfs_pcie_gart_info_init(struct radeon_device *rdev);
68
69
70 /*
71  * rv370,rv380 PCIE GART
72  */
73 void rv370_pcie_gart_tlb_flush(struct radeon_device *rdev)
74 {
75         uint32_t tmp;
76         int i;
77
78         /* Workaround HW bug do flush 2 times */
79         for (i = 0; i < 2; i++) {
80                 tmp = RREG32_PCIE(RADEON_PCIE_TX_GART_CNTL);
81                 WREG32_PCIE(RADEON_PCIE_TX_GART_CNTL, tmp | RADEON_PCIE_TX_GART_INVALIDATE_TLB);
82                 (void)RREG32_PCIE(RADEON_PCIE_TX_GART_CNTL);
83                 WREG32_PCIE(RADEON_PCIE_TX_GART_CNTL, tmp);
84                 mb();
85         }
86 }
87
88 int rv370_pcie_gart_enable(struct radeon_device *rdev)
89 {
90         uint32_t table_addr;
91         uint32_t tmp;
92         int r;
93
94         /* Initialize common gart structure */
95         r = radeon_gart_init(rdev);
96         if (r) {
97                 return r;
98         }
99         r = rv370_debugfs_pcie_gart_info_init(rdev);
100         if (r) {
101                 DRM_ERROR("Failed to register debugfs file for PCIE gart !\n");
102         }
103         rdev->gart.table_size = rdev->gart.num_gpu_pages * 4;
104         r = radeon_gart_table_vram_alloc(rdev);
105         if (r) {
106                 return r;
107         }
108         /* discard memory request outside of configured range */
109         tmp = RADEON_PCIE_TX_GART_UNMAPPED_ACCESS_DISCARD;
110         WREG32_PCIE(RADEON_PCIE_TX_GART_CNTL, tmp);
111         WREG32_PCIE(RADEON_PCIE_TX_GART_START_LO, rdev->mc.gtt_location);
112         tmp = rdev->mc.gtt_location + rdev->mc.gtt_size - 4096;
113         WREG32_PCIE(RADEON_PCIE_TX_GART_END_LO, tmp);
114         WREG32_PCIE(RADEON_PCIE_TX_GART_START_HI, 0);
115         WREG32_PCIE(RADEON_PCIE_TX_GART_END_HI, 0);
116         table_addr = rdev->gart.table_addr;
117         WREG32_PCIE(RADEON_PCIE_TX_GART_BASE, table_addr);
118         /* FIXME: setup default page */
119         WREG32_PCIE(RADEON_PCIE_TX_DISCARD_RD_ADDR_LO, rdev->mc.vram_location);
120         WREG32_PCIE(RADEON_PCIE_TX_DISCARD_RD_ADDR_HI, 0);
121         /* Clear error */
122         WREG32_PCIE(0x18, 0);
123         tmp = RREG32_PCIE(RADEON_PCIE_TX_GART_CNTL);
124         tmp |= RADEON_PCIE_TX_GART_EN;
125         tmp |= RADEON_PCIE_TX_GART_UNMAPPED_ACCESS_DISCARD;
126         WREG32_PCIE(RADEON_PCIE_TX_GART_CNTL, tmp);
127         rv370_pcie_gart_tlb_flush(rdev);
128         DRM_INFO("PCIE GART of %uM enabled (table at 0x%08X).\n",
129                  rdev->mc.gtt_size >> 20, table_addr);
130         rdev->gart.ready = true;
131         return 0;
132 }
133
134 void rv370_pcie_gart_disable(struct radeon_device *rdev)
135 {
136         uint32_t tmp;
137
138         tmp = RREG32_PCIE(RADEON_PCIE_TX_GART_CNTL);
139         tmp |= RADEON_PCIE_TX_GART_UNMAPPED_ACCESS_DISCARD;
140         WREG32_PCIE(RADEON_PCIE_TX_GART_CNTL, tmp & ~RADEON_PCIE_TX_GART_EN);
141         if (rdev->gart.table.vram.robj) {
142                 radeon_object_kunmap(rdev->gart.table.vram.robj);
143                 radeon_object_unpin(rdev->gart.table.vram.robj);
144         }
145 }
146
147 int rv370_pcie_gart_set_page(struct radeon_device *rdev, int i, uint64_t addr)
148 {
149         void __iomem *ptr = (void *)rdev->gart.table.vram.ptr;
150
151         if (i < 0 || i > rdev->gart.num_gpu_pages) {
152                 return -EINVAL;
153         }
154         addr = (lower_32_bits(addr) >> 8) |
155                ((upper_32_bits(addr) & 0xff) << 24) |
156                0xc;
157         /* on x86 we want this to be CPU endian, on powerpc
158          * on powerpc without HW swappers, it'll get swapped on way
159          * into VRAM - so no need for cpu_to_le32 on VRAM tables */
160         writel(addr, ((void __iomem *)ptr) + (i * 4));
161         return 0;
162 }
163
164 int r300_gart_enable(struct radeon_device *rdev)
165 {
166 #if __OS_HAS_AGP
167         if (rdev->flags & RADEON_IS_AGP) {
168                 if (rdev->family > CHIP_RV350) {
169                         rv370_pcie_gart_disable(rdev);
170                 } else {
171                         r100_pci_gart_disable(rdev);
172                 }
173                 return 0;
174         }
175 #endif
176         if (rdev->flags & RADEON_IS_PCIE) {
177                 rdev->asic->gart_disable = &rv370_pcie_gart_disable;
178                 rdev->asic->gart_tlb_flush = &rv370_pcie_gart_tlb_flush;
179                 rdev->asic->gart_set_page = &rv370_pcie_gart_set_page;
180                 return rv370_pcie_gart_enable(rdev);
181         }
182         return r100_pci_gart_enable(rdev);
183 }
184
185
186 /*
187  * MC
188  */
189 int r300_mc_init(struct radeon_device *rdev)
190 {
191         int r;
192
193         if (r100_debugfs_rbbm_init(rdev)) {
194                 DRM_ERROR("Failed to register debugfs file for RBBM !\n");
195         }
196
197         r300_gpu_init(rdev);
198         r100_pci_gart_disable(rdev);
199         if (rdev->flags & RADEON_IS_PCIE) {
200                 rv370_pcie_gart_disable(rdev);
201         }
202
203         /* Setup GPU memory space */
204         rdev->mc.vram_location = 0xFFFFFFFFUL;
205         rdev->mc.gtt_location = 0xFFFFFFFFUL;
206         if (rdev->flags & RADEON_IS_AGP) {
207                 r = radeon_agp_init(rdev);
208                 if (r) {
209                         printk(KERN_WARNING "[drm] Disabling AGP\n");
210                         rdev->flags &= ~RADEON_IS_AGP;
211                         rdev->mc.gtt_size = radeon_gart_size * 1024 * 1024;
212                 } else {
213                         rdev->mc.gtt_location = rdev->mc.agp_base;
214                 }
215         }
216         r = radeon_mc_setup(rdev);
217         if (r) {
218                 return r;
219         }
220
221         /* Program GPU memory space */
222         r100_mc_disable_clients(rdev);
223         if (r300_mc_wait_for_idle(rdev)) {
224                 printk(KERN_WARNING "Failed to wait MC idle while "
225                        "programming pipes. Bad things might happen.\n");
226         }
227         r100_mc_setup(rdev);
228         return 0;
229 }
230
231 void r300_mc_fini(struct radeon_device *rdev)
232 {
233         if (rdev->flags & RADEON_IS_PCIE) {
234                 rv370_pcie_gart_disable(rdev);
235                 radeon_gart_table_vram_free(rdev);
236         } else {
237                 r100_pci_gart_disable(rdev);
238                 radeon_gart_table_ram_free(rdev);
239         }
240         radeon_gart_fini(rdev);
241 }
242
243
244 /*
245  * Fence emission
246  */
247 void r300_fence_ring_emit(struct radeon_device *rdev,
248                           struct radeon_fence *fence)
249 {
250         /* Who ever call radeon_fence_emit should call ring_lock and ask
251          * for enough space (today caller are ib schedule and buffer move) */
252         /* Write SC register so SC & US assert idle */
253         radeon_ring_write(rdev, PACKET0(0x43E0, 0));
254         radeon_ring_write(rdev, 0);
255         radeon_ring_write(rdev, PACKET0(0x43E4, 0));
256         radeon_ring_write(rdev, 0);
257         /* Flush 3D cache */
258         radeon_ring_write(rdev, PACKET0(0x4E4C, 0));
259         radeon_ring_write(rdev, (2 << 0));
260         radeon_ring_write(rdev, PACKET0(0x4F18, 0));
261         radeon_ring_write(rdev, (1 << 0));
262         /* Wait until IDLE & CLEAN */
263         radeon_ring_write(rdev, PACKET0(0x1720, 0));
264         radeon_ring_write(rdev, (1 << 17) | (1 << 16)  | (1 << 9));
265         /* Emit fence sequence & fire IRQ */
266         radeon_ring_write(rdev, PACKET0(rdev->fence_drv.scratch_reg, 0));
267         radeon_ring_write(rdev, fence->seq);
268         radeon_ring_write(rdev, PACKET0(RADEON_GEN_INT_STATUS, 0));
269         radeon_ring_write(rdev, RADEON_SW_INT_FIRE);
270 }
271
272
273 /*
274  * Global GPU functions
275  */
276 int r300_copy_dma(struct radeon_device *rdev,
277                   uint64_t src_offset,
278                   uint64_t dst_offset,
279                   unsigned num_pages,
280                   struct radeon_fence *fence)
281 {
282         uint32_t size;
283         uint32_t cur_size;
284         int i, num_loops;
285         int r = 0;
286
287         /* radeon pitch is /64 */
288         size = num_pages << PAGE_SHIFT;
289         num_loops = DIV_ROUND_UP(size, 0x1FFFFF);
290         r = radeon_ring_lock(rdev, num_loops * 4 + 64);
291         if (r) {
292                 DRM_ERROR("radeon: moving bo (%d).\n", r);
293                 return r;
294         }
295         /* Must wait for 2D idle & clean before DMA or hangs might happen */
296         radeon_ring_write(rdev, PACKET0(RADEON_WAIT_UNTIL, 0 ));
297         radeon_ring_write(rdev, (1 << 16));
298         for (i = 0; i < num_loops; i++) {
299                 cur_size = size;
300                 if (cur_size > 0x1FFFFF) {
301                         cur_size = 0x1FFFFF;
302                 }
303                 size -= cur_size;
304                 radeon_ring_write(rdev, PACKET0(0x720, 2));
305                 radeon_ring_write(rdev, src_offset);
306                 radeon_ring_write(rdev, dst_offset);
307                 radeon_ring_write(rdev, cur_size | (1 << 31) | (1 << 30));
308                 src_offset += cur_size;
309                 dst_offset += cur_size;
310         }
311         radeon_ring_write(rdev, PACKET0(RADEON_WAIT_UNTIL, 0));
312         radeon_ring_write(rdev, RADEON_WAIT_DMA_GUI_IDLE);
313         if (fence) {
314                 r = radeon_fence_emit(rdev, fence);
315         }
316         radeon_ring_unlock_commit(rdev);
317         return r;
318 }
319
320 void r300_ring_start(struct radeon_device *rdev)
321 {
322         unsigned gb_tile_config;
323         int r;
324
325         /* Sub pixel 1/12 so we can have 4K rendering according to doc */
326         gb_tile_config = (R300_ENABLE_TILING | R300_TILE_SIZE_16);
327         switch(rdev->num_gb_pipes) {
328         case 2:
329                 gb_tile_config |= R300_PIPE_COUNT_R300;
330                 break;
331         case 3:
332                 gb_tile_config |= R300_PIPE_COUNT_R420_3P;
333                 break;
334         case 4:
335                 gb_tile_config |= R300_PIPE_COUNT_R420;
336                 break;
337         case 1:
338         default:
339                 gb_tile_config |= R300_PIPE_COUNT_RV350;
340                 break;
341         }
342
343         r = radeon_ring_lock(rdev, 64);
344         if (r) {
345                 return;
346         }
347         radeon_ring_write(rdev, PACKET0(RADEON_ISYNC_CNTL, 0));
348         radeon_ring_write(rdev,
349                           RADEON_ISYNC_ANY2D_IDLE3D |
350                           RADEON_ISYNC_ANY3D_IDLE2D |
351                           RADEON_ISYNC_WAIT_IDLEGUI |
352                           RADEON_ISYNC_CPSCRATCH_IDLEGUI);
353         radeon_ring_write(rdev, PACKET0(R300_GB_TILE_CONFIG, 0));
354         radeon_ring_write(rdev, gb_tile_config);
355         radeon_ring_write(rdev, PACKET0(RADEON_WAIT_UNTIL, 0));
356         radeon_ring_write(rdev,
357                           RADEON_WAIT_2D_IDLECLEAN |
358                           RADEON_WAIT_3D_IDLECLEAN);
359         radeon_ring_write(rdev, PACKET0(0x170C, 0));
360         radeon_ring_write(rdev, 1 << 31);
361         radeon_ring_write(rdev, PACKET0(R300_GB_SELECT, 0));
362         radeon_ring_write(rdev, 0);
363         radeon_ring_write(rdev, PACKET0(R300_GB_ENABLE, 0));
364         radeon_ring_write(rdev, 0);
365         radeon_ring_write(rdev, PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0));
366         radeon_ring_write(rdev, R300_RB3D_DC_FLUSH | R300_RB3D_DC_FREE);
367         radeon_ring_write(rdev, PACKET0(R300_RB3D_ZCACHE_CTLSTAT, 0));
368         radeon_ring_write(rdev, R300_ZC_FLUSH | R300_ZC_FREE);
369         radeon_ring_write(rdev, PACKET0(RADEON_WAIT_UNTIL, 0));
370         radeon_ring_write(rdev,
371                           RADEON_WAIT_2D_IDLECLEAN |
372                           RADEON_WAIT_3D_IDLECLEAN);
373         radeon_ring_write(rdev, PACKET0(R300_GB_AA_CONFIG, 0));
374         radeon_ring_write(rdev, 0);
375         radeon_ring_write(rdev, PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0));
376         radeon_ring_write(rdev, R300_RB3D_DC_FLUSH | R300_RB3D_DC_FREE);
377         radeon_ring_write(rdev, PACKET0(R300_RB3D_ZCACHE_CTLSTAT, 0));
378         radeon_ring_write(rdev, R300_ZC_FLUSH | R300_ZC_FREE);
379         radeon_ring_write(rdev, PACKET0(R300_GB_MSPOS0, 0));
380         radeon_ring_write(rdev,
381                           ((6 << R300_MS_X0_SHIFT) |
382                            (6 << R300_MS_Y0_SHIFT) |
383                            (6 << R300_MS_X1_SHIFT) |
384                            (6 << R300_MS_Y1_SHIFT) |
385                            (6 << R300_MS_X2_SHIFT) |
386                            (6 << R300_MS_Y2_SHIFT) |
387                            (6 << R300_MSBD0_Y_SHIFT) |
388                            (6 << R300_MSBD0_X_SHIFT)));
389         radeon_ring_write(rdev, PACKET0(R300_GB_MSPOS1, 0));
390         radeon_ring_write(rdev,
391                           ((6 << R300_MS_X3_SHIFT) |
392                            (6 << R300_MS_Y3_SHIFT) |
393                            (6 << R300_MS_X4_SHIFT) |
394                            (6 << R300_MS_Y4_SHIFT) |
395                            (6 << R300_MS_X5_SHIFT) |
396                            (6 << R300_MS_Y5_SHIFT) |
397                            (6 << R300_MSBD1_SHIFT)));
398         radeon_ring_write(rdev, PACKET0(R300_GA_ENHANCE, 0));
399         radeon_ring_write(rdev, R300_GA_DEADLOCK_CNTL | R300_GA_FASTSYNC_CNTL);
400         radeon_ring_write(rdev, PACKET0(R300_GA_POLY_MODE, 0));
401         radeon_ring_write(rdev,
402                           R300_FRONT_PTYPE_TRIANGE | R300_BACK_PTYPE_TRIANGE);
403         radeon_ring_write(rdev, PACKET0(R300_GA_ROUND_MODE, 0));
404         radeon_ring_write(rdev,
405                           R300_GEOMETRY_ROUND_NEAREST |
406                           R300_COLOR_ROUND_NEAREST);
407         radeon_ring_unlock_commit(rdev);
408 }
409
410 void r300_errata(struct radeon_device *rdev)
411 {
412         rdev->pll_errata = 0;
413
414         if (rdev->family == CHIP_R300 &&
415             (RREG32(RADEON_CONFIG_CNTL) & RADEON_CFG_ATI_REV_ID_MASK) == RADEON_CFG_ATI_REV_A11) {
416                 rdev->pll_errata |= CHIP_ERRATA_R300_CG;
417         }
418 }
419
420 int r300_mc_wait_for_idle(struct radeon_device *rdev)
421 {
422         unsigned i;
423         uint32_t tmp;
424
425         for (i = 0; i < rdev->usec_timeout; i++) {
426                 /* read MC_STATUS */
427                 tmp = RREG32(0x0150);
428                 if (tmp & (1 << 4)) {
429                         return 0;
430                 }
431                 DRM_UDELAY(1);
432         }
433         return -1;
434 }
435
436 void r300_gpu_init(struct radeon_device *rdev)
437 {
438         uint32_t gb_tile_config, tmp;
439
440         r100_hdp_reset(rdev);
441         /* FIXME: rv380 one pipes ? */
442         if ((rdev->family == CHIP_R300) || (rdev->family == CHIP_R350)) {
443                 /* r300,r350 */
444                 rdev->num_gb_pipes = 2;
445         } else {
446                 /* rv350,rv370,rv380 */
447                 rdev->num_gb_pipes = 1;
448         }
449         gb_tile_config = (R300_ENABLE_TILING | R300_TILE_SIZE_16);
450         switch (rdev->num_gb_pipes) {
451         case 2:
452                 gb_tile_config |= R300_PIPE_COUNT_R300;
453                 break;
454         case 3:
455                 gb_tile_config |= R300_PIPE_COUNT_R420_3P;
456                 break;
457         case 4:
458                 gb_tile_config |= R300_PIPE_COUNT_R420;
459                 break;
460         default:
461         case 1:
462                 gb_tile_config |= R300_PIPE_COUNT_RV350;
463                 break;
464         }
465         WREG32(R300_GB_TILE_CONFIG, gb_tile_config);
466
467         if (r100_gui_wait_for_idle(rdev)) {
468                 printk(KERN_WARNING "Failed to wait GUI idle while "
469                        "programming pipes. Bad things might happen.\n");
470         }
471
472         tmp = RREG32(0x170C);
473         WREG32(0x170C, tmp | (1 << 31));
474
475         WREG32(R300_RB2D_DSTCACHE_MODE,
476                R300_DC_AUTOFLUSH_ENABLE |
477                R300_DC_DC_DISABLE_IGNORE_PE);
478
479         if (r100_gui_wait_for_idle(rdev)) {
480                 printk(KERN_WARNING "Failed to wait GUI idle while "
481                        "programming pipes. Bad things might happen.\n");
482         }
483         if (r300_mc_wait_for_idle(rdev)) {
484                 printk(KERN_WARNING "Failed to wait MC idle while "
485                        "programming pipes. Bad things might happen.\n");
486         }
487         DRM_INFO("radeon: %d pipes initialized.\n", rdev->num_gb_pipes);
488 }
489
490 int r300_ga_reset(struct radeon_device *rdev)
491 {
492         uint32_t tmp;
493         bool reinit_cp;
494         int i;
495
496         reinit_cp = rdev->cp.ready;
497         rdev->cp.ready = false;
498         for (i = 0; i < rdev->usec_timeout; i++) {
499                 WREG32(RADEON_CP_CSQ_MODE, 0);
500                 WREG32(RADEON_CP_CSQ_CNTL, 0);
501                 WREG32(RADEON_RBBM_SOFT_RESET, 0x32005);
502                 (void)RREG32(RADEON_RBBM_SOFT_RESET);
503                 udelay(200);
504                 WREG32(RADEON_RBBM_SOFT_RESET, 0);
505                 /* Wait to prevent race in RBBM_STATUS */
506                 mdelay(1);
507                 tmp = RREG32(RADEON_RBBM_STATUS);
508                 if (tmp & ((1 << 20) | (1 << 26))) {
509                         DRM_ERROR("VAP & CP still busy (RBBM_STATUS=0x%08X)", tmp);
510                         /* GA still busy soft reset it */
511                         WREG32(0x429C, 0x200);
512                         WREG32(R300_VAP_PVS_STATE_FLUSH_REG, 0);
513                         WREG32(0x43E0, 0);
514                         WREG32(0x43E4, 0);
515                         WREG32(0x24AC, 0);
516                 }
517                 /* Wait to prevent race in RBBM_STATUS */
518                 mdelay(1);
519                 tmp = RREG32(RADEON_RBBM_STATUS);
520                 if (!(tmp & ((1 << 20) | (1 << 26)))) {
521                         break;
522                 }
523         }
524         for (i = 0; i < rdev->usec_timeout; i++) {
525                 tmp = RREG32(RADEON_RBBM_STATUS);
526                 if (!(tmp & ((1 << 20) | (1 << 26)))) {
527                         DRM_INFO("GA reset succeed (RBBM_STATUS=0x%08X)\n",
528                                  tmp);
529                         if (reinit_cp) {
530                                 return r100_cp_init(rdev, rdev->cp.ring_size);
531                         }
532                         return 0;
533                 }
534                 DRM_UDELAY(1);
535         }
536         tmp = RREG32(RADEON_RBBM_STATUS);
537         DRM_ERROR("Failed to reset GA ! (RBBM_STATUS=0x%08X)\n", tmp);
538         return -1;
539 }
540
541 int r300_gpu_reset(struct radeon_device *rdev)
542 {
543         uint32_t status;
544
545         /* reset order likely matter */
546         status = RREG32(RADEON_RBBM_STATUS);
547         /* reset HDP */
548         r100_hdp_reset(rdev);
549         /* reset rb2d */
550         if (status & ((1 << 17) | (1 << 18) | (1 << 27))) {
551                 r100_rb2d_reset(rdev);
552         }
553         /* reset GA */
554         if (status & ((1 << 20) | (1 << 26))) {
555                 r300_ga_reset(rdev);
556         }
557         /* reset CP */
558         status = RREG32(RADEON_RBBM_STATUS);
559         if (status & (1 << 16)) {
560                 r100_cp_reset(rdev);
561         }
562         /* Check if GPU is idle */
563         status = RREG32(RADEON_RBBM_STATUS);
564         if (status & (1 << 31)) {
565                 DRM_ERROR("Failed to reset GPU (RBBM_STATUS=0x%08X)\n", status);
566                 return -1;
567         }
568         DRM_INFO("GPU reset succeed (RBBM_STATUS=0x%08X)\n", status);
569         return 0;
570 }
571
572
573 /*
574  * r300,r350,rv350,rv380 VRAM info
575  */
576 void r300_vram_info(struct radeon_device *rdev)
577 {
578         uint32_t tmp;
579
580         /* DDR for all card after R300 & IGP */
581         rdev->mc.vram_is_ddr = true;
582         tmp = RREG32(RADEON_MEM_CNTL);
583         if (tmp & R300_MEM_NUM_CHANNELS_MASK) {
584                 rdev->mc.vram_width = 128;
585         } else {
586                 rdev->mc.vram_width = 64;
587         }
588         rdev->mc.vram_size = RREG32(RADEON_CONFIG_MEMSIZE);
589
590         rdev->mc.aper_base = drm_get_resource_start(rdev->ddev, 0);
591         rdev->mc.aper_size = drm_get_resource_len(rdev->ddev, 0);
592 }
593
594
595 /*
596  * Indirect registers accessor
597  */
598 uint32_t rv370_pcie_rreg(struct radeon_device *rdev, uint32_t reg)
599 {
600         uint32_t r;
601
602         WREG8(RADEON_PCIE_INDEX, ((reg) & 0xff));
603         (void)RREG32(RADEON_PCIE_INDEX);
604         r = RREG32(RADEON_PCIE_DATA);
605         return r;
606 }
607
608 void rv370_pcie_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v)
609 {
610         WREG8(RADEON_PCIE_INDEX, ((reg) & 0xff));
611         (void)RREG32(RADEON_PCIE_INDEX);
612         WREG32(RADEON_PCIE_DATA, (v));
613         (void)RREG32(RADEON_PCIE_DATA);
614 }
615
616 /*
617  * PCIE Lanes
618  */
619
620 void rv370_set_pcie_lanes(struct radeon_device *rdev, int lanes)
621 {
622         uint32_t link_width_cntl, mask;
623
624         if (rdev->flags & RADEON_IS_IGP)
625                 return;
626
627         if (!(rdev->flags & RADEON_IS_PCIE))
628                 return;
629
630         /* FIXME wait for idle */
631
632         switch (lanes) {
633         case 0:
634                 mask = RADEON_PCIE_LC_LINK_WIDTH_X0;
635                 break;
636         case 1:
637                 mask = RADEON_PCIE_LC_LINK_WIDTH_X1;
638                 break;
639         case 2:
640                 mask = RADEON_PCIE_LC_LINK_WIDTH_X2;
641                 break;
642         case 4:
643                 mask = RADEON_PCIE_LC_LINK_WIDTH_X4;
644                 break;
645         case 8:
646                 mask = RADEON_PCIE_LC_LINK_WIDTH_X8;
647                 break;
648         case 12:
649                 mask = RADEON_PCIE_LC_LINK_WIDTH_X12;
650                 break;
651         case 16:
652         default:
653                 mask = RADEON_PCIE_LC_LINK_WIDTH_X16;
654                 break;
655         }
656
657         link_width_cntl = RREG32_PCIE(RADEON_PCIE_LC_LINK_WIDTH_CNTL);
658
659         if ((link_width_cntl & RADEON_PCIE_LC_LINK_WIDTH_RD_MASK) ==
660             (mask << RADEON_PCIE_LC_LINK_WIDTH_RD_SHIFT))
661                 return;
662
663         link_width_cntl &= ~(RADEON_PCIE_LC_LINK_WIDTH_MASK |
664                              RADEON_PCIE_LC_RECONFIG_NOW |
665                              RADEON_PCIE_LC_RECONFIG_LATER |
666                              RADEON_PCIE_LC_SHORT_RECONFIG_EN);
667         link_width_cntl |= mask;
668         WREG32_PCIE(RADEON_PCIE_LC_LINK_WIDTH_CNTL, link_width_cntl);
669         WREG32_PCIE(RADEON_PCIE_LC_LINK_WIDTH_CNTL, (link_width_cntl |
670                                                      RADEON_PCIE_LC_RECONFIG_NOW));
671
672         /* wait for lane set to complete */
673         link_width_cntl = RREG32_PCIE(RADEON_PCIE_LC_LINK_WIDTH_CNTL);
674         while (link_width_cntl == 0xffffffff)
675                 link_width_cntl = RREG32_PCIE(RADEON_PCIE_LC_LINK_WIDTH_CNTL);
676
677 }
678
679
680 /*
681  * Debugfs info
682  */
683 #if defined(CONFIG_DEBUG_FS)
684 static int rv370_debugfs_pcie_gart_info(struct seq_file *m, void *data)
685 {
686         struct drm_info_node *node = (struct drm_info_node *) m->private;
687         struct drm_device *dev = node->minor->dev;
688         struct radeon_device *rdev = dev->dev_private;
689         uint32_t tmp;
690
691         tmp = RREG32_PCIE(RADEON_PCIE_TX_GART_CNTL);
692         seq_printf(m, "PCIE_TX_GART_CNTL 0x%08x\n", tmp);
693         tmp = RREG32_PCIE(RADEON_PCIE_TX_GART_BASE);
694         seq_printf(m, "PCIE_TX_GART_BASE 0x%08x\n", tmp);
695         tmp = RREG32_PCIE(RADEON_PCIE_TX_GART_START_LO);
696         seq_printf(m, "PCIE_TX_GART_START_LO 0x%08x\n", tmp);
697         tmp = RREG32_PCIE(RADEON_PCIE_TX_GART_START_HI);
698         seq_printf(m, "PCIE_TX_GART_START_HI 0x%08x\n", tmp);
699         tmp = RREG32_PCIE(RADEON_PCIE_TX_GART_END_LO);
700         seq_printf(m, "PCIE_TX_GART_END_LO 0x%08x\n", tmp);
701         tmp = RREG32_PCIE(RADEON_PCIE_TX_GART_END_HI);
702         seq_printf(m, "PCIE_TX_GART_END_HI 0x%08x\n", tmp);
703         tmp = RREG32_PCIE(RADEON_PCIE_TX_GART_ERROR);
704         seq_printf(m, "PCIE_TX_GART_ERROR 0x%08x\n", tmp);
705         return 0;
706 }
707
708 static struct drm_info_list rv370_pcie_gart_info_list[] = {
709         {"rv370_pcie_gart_info", rv370_debugfs_pcie_gart_info, 0, NULL},
710 };
711 #endif
712
713 int rv370_debugfs_pcie_gart_info_init(struct radeon_device *rdev)
714 {
715 #if defined(CONFIG_DEBUG_FS)
716         return radeon_debugfs_add_files(rdev, rv370_pcie_gart_info_list, 1);
717 #else
718         return 0;
719 #endif
720 }
721
722
723 /*
724  * CS functions
725  */
726 struct r300_cs_track_cb {
727         struct radeon_object    *robj;
728         unsigned                pitch;
729         unsigned                cpp;
730         unsigned                offset;
731 };
732
733 struct r300_cs_track_array {
734         struct radeon_object    *robj;
735         unsigned                esize;
736 };
737
738 struct r300_cs_track_texture {
739         struct radeon_object    *robj;
740         unsigned                pitch;
741         unsigned                width;
742         unsigned                height;
743         unsigned                num_levels;
744         unsigned                cpp;
745         unsigned                tex_coord_type;
746         unsigned                txdepth;
747         unsigned                width_11;
748         unsigned                height_11;
749         bool                    use_pitch;
750         bool                    enabled;
751         bool                    roundup_w;
752         bool                    roundup_h;
753 };
754
755 struct r300_cs_track {
756         unsigned                        num_cb;
757         unsigned                        maxy;
758         unsigned                        vtx_size;
759         unsigned                        vap_vf_cntl;
760         unsigned                        immd_dwords;
761         unsigned                        num_arrays;
762         unsigned                        max_indx;
763         struct r300_cs_track_array      arrays[11];
764         struct r300_cs_track_cb         cb[4];
765         struct r300_cs_track_cb         zb;
766         struct r300_cs_track_texture    textures[16];
767         bool                            z_enabled;
768 };
769
770 static inline void r300_cs_track_texture_print(struct r300_cs_track_texture *t)
771 {
772         DRM_ERROR("pitch                      %d\n", t->pitch);
773         DRM_ERROR("width                      %d\n", t->width);
774         DRM_ERROR("height                     %d\n", t->height);
775         DRM_ERROR("num levels                 %d\n", t->num_levels);
776         DRM_ERROR("depth                      %d\n", t->txdepth);
777         DRM_ERROR("bpp                        %d\n", t->cpp);
778         DRM_ERROR("coordinate type            %d\n", t->tex_coord_type);
779         DRM_ERROR("width round to power of 2  %d\n", t->roundup_w);
780         DRM_ERROR("height round to power of 2 %d\n", t->roundup_h);
781 }
782
783 static inline int r300_cs_track_texture_check(struct radeon_device *rdev,
784                                               struct r300_cs_track *track)
785 {
786         struct radeon_object *robj;
787         unsigned long size;
788         unsigned u, i, w, h;
789
790         for (u = 0; u < 16; u++) {
791                 if (!track->textures[u].enabled)
792                         continue;
793                 robj = track->textures[u].robj;
794                 if (robj == NULL) {
795                         DRM_ERROR("No texture bound to unit %u\n", u);
796                         return -EINVAL;
797                 }
798                 size = 0;
799                 for (i = 0; i <= track->textures[u].num_levels; i++) {
800                         if (track->textures[u].use_pitch) {
801                                 w = track->textures[u].pitch / (1 << i);
802                         } else {
803                                 w = track->textures[u].width / (1 << i);
804                                 if (rdev->family >= CHIP_RV515)
805                                         w |= track->textures[u].width_11;
806                                 if (track->textures[u].roundup_w)
807                                         w = roundup_pow_of_two(w);
808                         }
809                         h = track->textures[u].height / (1 << i);
810                         if (rdev->family >= CHIP_RV515)
811                                 h |= track->textures[u].height_11;
812                         if (track->textures[u].roundup_h)
813                                 h = roundup_pow_of_two(h);
814                         size += w * h;
815                 }
816                 size *= track->textures[u].cpp;
817                 switch (track->textures[u].tex_coord_type) {
818                 case 0:
819                         break;
820                 case 1:
821                         size *= (1 << track->textures[u].txdepth);
822                         break;
823                 case 2:
824                         size *= 6;
825                         break;
826                 default:
827                         DRM_ERROR("Invalid texture coordinate type %u for unit "
828                                   "%u\n", track->textures[u].tex_coord_type, u);
829                         return -EINVAL;
830                 }
831                 if (size > radeon_object_size(robj)) {
832                         DRM_ERROR("Texture of unit %u needs %lu bytes but is "
833                                   "%lu\n", u, size, radeon_object_size(robj));
834                         r300_cs_track_texture_print(&track->textures[u]);
835                         return -EINVAL;
836                 }
837         }
838         return 0;
839 }
840
841 int r300_cs_track_check(struct radeon_device *rdev, struct r300_cs_track *track)
842 {
843         unsigned i;
844         unsigned long size;
845         unsigned prim_walk;
846         unsigned nverts;
847
848         for (i = 0; i < track->num_cb; i++) {
849                 if (track->cb[i].robj == NULL) {
850                         DRM_ERROR("[drm] No buffer for color buffer %d !\n", i);
851                         return -EINVAL;
852                 }
853                 size = track->cb[i].pitch * track->cb[i].cpp * track->maxy;
854                 size += track->cb[i].offset;
855                 if (size > radeon_object_size(track->cb[i].robj)) {
856                         DRM_ERROR("[drm] Buffer too small for color buffer %d "
857                                   "(need %lu have %lu) !\n", i, size,
858                                   radeon_object_size(track->cb[i].robj));
859                         DRM_ERROR("[drm] color buffer %d (%u %u %u %u)\n",
860                                   i, track->cb[i].pitch, track->cb[i].cpp,
861                                   track->cb[i].offset, track->maxy);
862                         return -EINVAL;
863                 }
864         }
865         if (track->z_enabled) {
866                 if (track->zb.robj == NULL) {
867                         DRM_ERROR("[drm] No buffer for z buffer !\n");
868                         return -EINVAL;
869                 }
870                 size = track->zb.pitch * track->zb.cpp * track->maxy;
871                 size += track->zb.offset;
872                 if (size > radeon_object_size(track->zb.robj)) {
873                         DRM_ERROR("[drm] Buffer too small for z buffer "
874                                   "(need %lu have %lu) !\n", size,
875                                   radeon_object_size(track->zb.robj));
876                         return -EINVAL;
877                 }
878         }
879         prim_walk = (track->vap_vf_cntl >> 4) & 0x3;
880         nverts = (track->vap_vf_cntl >> 16) & 0xFFFF;
881         switch (prim_walk) {
882         case 1:
883                 for (i = 0; i < track->num_arrays; i++) {
884                         size = track->arrays[i].esize * track->max_indx * 4;
885                         if (track->arrays[i].robj == NULL) {
886                                 DRM_ERROR("(PW %u) Vertex array %u no buffer "
887                                           "bound\n", prim_walk, i);
888                                 return -EINVAL;
889                         }
890                         if (size > radeon_object_size(track->arrays[i].robj)) {
891                                 DRM_ERROR("(PW %u) Vertex array %u need %lu dwords "
892                                            "have %lu dwords\n", prim_walk, i,
893                                            size >> 2,
894                                            radeon_object_size(track->arrays[i].robj) >> 2);
895                                 DRM_ERROR("Max indices %u\n", track->max_indx);
896                                 return -EINVAL;
897                         }
898                 }
899                 break;
900         case 2:
901                 for (i = 0; i < track->num_arrays; i++) {
902                         size = track->arrays[i].esize * (nverts - 1) * 4;
903                         if (track->arrays[i].robj == NULL) {
904                                 DRM_ERROR("(PW %u) Vertex array %u no buffer "
905                                           "bound\n", prim_walk, i);
906                                 return -EINVAL;
907                         }
908                         if (size > radeon_object_size(track->arrays[i].robj)) {
909                                 DRM_ERROR("(PW %u) Vertex array %u need %lu dwords "
910                                            "have %lu dwords\n", prim_walk, i, size >> 2,
911                                            radeon_object_size(track->arrays[i].robj) >> 2);
912                                 return -EINVAL;
913                         }
914                 }
915                 break;
916         case 3:
917                 size = track->vtx_size * nverts;
918                 if (size != track->immd_dwords) {
919                         DRM_ERROR("IMMD draw %u dwors but needs %lu dwords\n",
920                                   track->immd_dwords, size);
921                         DRM_ERROR("VAP_VF_CNTL.NUM_VERTICES %u, VTX_SIZE %u\n",
922                                   nverts, track->vtx_size);
923                         return -EINVAL;
924                 }
925                 break;
926         default:
927                 DRM_ERROR("[drm] Invalid primitive walk %d for VAP_VF_CNTL\n",
928                           prim_walk);
929                 return -EINVAL;
930         }
931         return r300_cs_track_texture_check(rdev, track);
932 }
933
934 static inline void r300_cs_track_clear(struct r300_cs_track *track)
935 {
936         unsigned i;
937
938         track->num_cb = 4;
939         track->maxy = 4096;
940         for (i = 0; i < track->num_cb; i++) {
941                 track->cb[i].robj = NULL;
942                 track->cb[i].pitch = 8192;
943                 track->cb[i].cpp = 16;
944                 track->cb[i].offset = 0;
945         }
946         track->z_enabled = true;
947         track->zb.robj = NULL;
948         track->zb.pitch = 8192;
949         track->zb.cpp = 4;
950         track->zb.offset = 0;
951         track->vtx_size = 0x7F;
952         track->immd_dwords = 0xFFFFFFFFUL;
953         track->num_arrays = 11;
954         track->max_indx = 0x00FFFFFFUL;
955         for (i = 0; i < track->num_arrays; i++) {
956                 track->arrays[i].robj = NULL;
957                 track->arrays[i].esize = 0x7F;
958         }
959         for (i = 0; i < 16; i++) {
960                 track->textures[i].pitch = 16536;
961                 track->textures[i].width = 16536;
962                 track->textures[i].height = 16536;
963                 track->textures[i].width_11 = 1 << 11;
964                 track->textures[i].height_11 = 1 << 11;
965                 track->textures[i].num_levels = 12;
966                 track->textures[i].txdepth = 16;
967                 track->textures[i].cpp = 64;
968                 track->textures[i].tex_coord_type = 1;
969                 track->textures[i].robj = NULL;
970                 /* CS IB emission code makes sure texture unit are disabled */
971                 track->textures[i].enabled = false;
972                 track->textures[i].roundup_w = true;
973                 track->textures[i].roundup_h = true;
974         }
975 }
976
977 static const unsigned r300_reg_safe_bm[159] = {
978         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
979         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
980         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
981         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
982         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
983         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
984         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
985         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
986         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
987         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
988         0x17FF1FFF, 0xFFFFFFFC, 0xFFFFFFFF, 0xFF30FFBF,
989         0xFFFFFFF8, 0xC3E6FFFF, 0xFFFFF6DF, 0xFFFFFFFF,
990         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
991         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
992         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF03F,
993         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
994         0xFFFFFFFF, 0xFFFFEFCE, 0xF00EBFFF, 0x007C0000,
995         0xF0000078, 0xFF000009, 0xFFFFFFFF, 0xFFFFFFFF,
996         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
997         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
998         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
999         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
1000         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
1001         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
1002         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
1003         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
1004         0xFFFFF7FF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
1005         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
1006         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
1007         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
1008         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
1009         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
1010         0xFFFFFC78, 0xFFFFFFFF, 0xFFFFFFFE, 0xFFFFFFFF,
1011         0x38FF8F50, 0xFFF88082, 0xF000000C, 0xFAE009FF,
1012         0x0000FFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000,
1013         0x00000000, 0x0000C100, 0x00000000, 0x00000000,
1014         0x00000000, 0x00000000, 0x00000000, 0x00000000,
1015         0x00000000, 0xFFFF0000, 0xFFFFFFFF, 0xFF80FFFF,
1016         0x00000000, 0x00000000, 0x00000000, 0x00000000,
1017         0x0003FC01, 0xFFFFFFF8, 0xFE800B19,
1018 };
1019
1020 static int r300_packet0_check(struct radeon_cs_parser *p,
1021                 struct radeon_cs_packet *pkt,
1022                 unsigned idx, unsigned reg)
1023 {
1024         struct radeon_cs_chunk *ib_chunk;
1025         struct radeon_cs_reloc *reloc;
1026         struct r300_cs_track *track;
1027         volatile uint32_t *ib;
1028         uint32_t tmp;
1029         unsigned i;
1030         int r;
1031
1032         ib = p->ib->ptr;
1033         ib_chunk = &p->chunks[p->chunk_ib_idx];
1034         track = (struct r300_cs_track*)p->track;
1035         switch(reg) {
1036         case AVIVO_D1MODE_VLINE_START_END:
1037         case RADEON_CRTC_GUI_TRIG_VLINE:
1038                 r = r100_cs_packet_parse_vline(p);
1039                 if (r) {
1040                         DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1041                                         idx, reg);
1042                         r100_cs_dump_packet(p, pkt);
1043                         return r;
1044                 }
1045                 break;
1046         case RADEON_DST_PITCH_OFFSET:
1047         case RADEON_SRC_PITCH_OFFSET:
1048                 r = r100_cs_packet_next_reloc(p, &reloc);
1049                 if (r) {
1050                         DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1051                                         idx, reg);
1052                         r100_cs_dump_packet(p, pkt);
1053                         return r;
1054                 }
1055                 tmp = ib_chunk->kdata[idx] & 0x003fffff;
1056                 tmp += (((u32)reloc->lobj.gpu_offset) >> 10);
1057                 ib[idx] = (ib_chunk->kdata[idx] & 0xffc00000) | tmp;
1058                 break;
1059         case R300_RB3D_COLOROFFSET0:
1060         case R300_RB3D_COLOROFFSET1:
1061         case R300_RB3D_COLOROFFSET2:
1062         case R300_RB3D_COLOROFFSET3:
1063                 i = (reg - R300_RB3D_COLOROFFSET0) >> 2;
1064                 r = r100_cs_packet_next_reloc(p, &reloc);
1065                 if (r) {
1066                         DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1067                                         idx, reg);
1068                         r100_cs_dump_packet(p, pkt);
1069                         return r;
1070                 }
1071                 track->cb[i].robj = reloc->robj;
1072                 track->cb[i].offset = ib_chunk->kdata[idx];
1073                 ib[idx] = ib_chunk->kdata[idx] + ((u32)reloc->lobj.gpu_offset);
1074                 break;
1075         case R300_ZB_DEPTHOFFSET:
1076                 r = r100_cs_packet_next_reloc(p, &reloc);
1077                 if (r) {
1078                         DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1079                                         idx, reg);
1080                         r100_cs_dump_packet(p, pkt);
1081                         return r;
1082                 }
1083                 track->zb.robj = reloc->robj;
1084                 track->zb.offset = ib_chunk->kdata[idx];
1085                 ib[idx] = ib_chunk->kdata[idx] + ((u32)reloc->lobj.gpu_offset);
1086                 break;
1087         case R300_TX_OFFSET_0:
1088         case R300_TX_OFFSET_0+4:
1089         case R300_TX_OFFSET_0+8:
1090         case R300_TX_OFFSET_0+12:
1091         case R300_TX_OFFSET_0+16:
1092         case R300_TX_OFFSET_0+20:
1093         case R300_TX_OFFSET_0+24:
1094         case R300_TX_OFFSET_0+28:
1095         case R300_TX_OFFSET_0+32:
1096         case R300_TX_OFFSET_0+36:
1097         case R300_TX_OFFSET_0+40:
1098         case R300_TX_OFFSET_0+44:
1099         case R300_TX_OFFSET_0+48:
1100         case R300_TX_OFFSET_0+52:
1101         case R300_TX_OFFSET_0+56:
1102         case R300_TX_OFFSET_0+60:
1103                 i = (reg - R300_TX_OFFSET_0) >> 2;
1104                 r = r100_cs_packet_next_reloc(p, &reloc);
1105                 if (r) {
1106                         DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1107                                         idx, reg);
1108                         r100_cs_dump_packet(p, pkt);
1109                         return r;
1110                 }
1111                 ib[idx] = ib_chunk->kdata[idx] + ((u32)reloc->lobj.gpu_offset);
1112                 track->textures[i].robj = reloc->robj;
1113                 break;
1114         /* Tracked registers */
1115         case 0x2084:
1116                 /* VAP_VF_CNTL */
1117                 track->vap_vf_cntl = ib_chunk->kdata[idx];
1118                 break;
1119         case 0x20B4:
1120                 /* VAP_VTX_SIZE */
1121                 track->vtx_size = ib_chunk->kdata[idx] & 0x7F;
1122                 break;
1123         case 0x2134:
1124                 /* VAP_VF_MAX_VTX_INDX */
1125                 track->max_indx = ib_chunk->kdata[idx] & 0x00FFFFFFUL;
1126                 break;
1127         case 0x43E4:
1128                 /* SC_SCISSOR1 */
1129                 track->maxy = ((ib_chunk->kdata[idx] >> 13) & 0x1FFF) + 1;
1130                 if (p->rdev->family < CHIP_RV515) {
1131                         track->maxy -= 1440;
1132                 }
1133                 break;
1134         case 0x4E00:
1135                 /* RB3D_CCTL */
1136                 track->num_cb = ((ib_chunk->kdata[idx] >> 5) & 0x3) + 1;
1137                 break;
1138         case 0x4E38:
1139         case 0x4E3C:
1140         case 0x4E40:
1141         case 0x4E44:
1142                 /* RB3D_COLORPITCH0 */
1143                 /* RB3D_COLORPITCH1 */
1144                 /* RB3D_COLORPITCH2 */
1145                 /* RB3D_COLORPITCH3 */
1146                 i = (reg - 0x4E38) >> 2;
1147                 track->cb[i].pitch = ib_chunk->kdata[idx] & 0x3FFE;
1148                 switch (((ib_chunk->kdata[idx] >> 21) & 0xF)) {
1149                 case 9:
1150                 case 11:
1151                 case 12:
1152                         track->cb[i].cpp = 1;
1153                         break;
1154                 case 3:
1155                 case 4:
1156                 case 13:
1157                 case 15:
1158                         track->cb[i].cpp = 2;
1159                         break;
1160                 case 6:
1161                         track->cb[i].cpp = 4;
1162                         break;
1163                 case 10:
1164                         track->cb[i].cpp = 8;
1165                         break;
1166                 case 7:
1167                         track->cb[i].cpp = 16;
1168                         break;
1169                 default:
1170                         DRM_ERROR("Invalid color buffer format (%d) !\n",
1171                                   ((ib_chunk->kdata[idx] >> 21) & 0xF));
1172                         return -EINVAL;
1173                 }
1174                 break;
1175         case 0x4F00:
1176                 /* ZB_CNTL */
1177                 if (ib_chunk->kdata[idx] & 2) {
1178                         track->z_enabled = true;
1179                 } else {
1180                         track->z_enabled = false;
1181                 }
1182                 break;
1183         case 0x4F10:
1184                 /* ZB_FORMAT */
1185                 switch ((ib_chunk->kdata[idx] & 0xF)) {
1186                 case 0:
1187                 case 1:
1188                         track->zb.cpp = 2;
1189                         break;
1190                 case 2:
1191                         track->zb.cpp = 4;
1192                         break;
1193                 default:
1194                         DRM_ERROR("Invalid z buffer format (%d) !\n",
1195                                   (ib_chunk->kdata[idx] & 0xF));
1196                         return -EINVAL;
1197                 }
1198                 break;
1199         case 0x4F24:
1200                 /* ZB_DEPTHPITCH */
1201                 track->zb.pitch = ib_chunk->kdata[idx] & 0x3FFC;
1202                 break;
1203         case 0x4104:
1204                 for (i = 0; i < 16; i++) {
1205                         bool enabled;
1206
1207                         enabled = !!(ib_chunk->kdata[idx] & (1 << i));
1208                         track->textures[i].enabled = enabled;
1209                 }
1210                 break;
1211         case 0x44C0:
1212         case 0x44C4:
1213         case 0x44C8:
1214         case 0x44CC:
1215         case 0x44D0:
1216         case 0x44D4:
1217         case 0x44D8:
1218         case 0x44DC:
1219         case 0x44E0:
1220         case 0x44E4:
1221         case 0x44E8:
1222         case 0x44EC:
1223         case 0x44F0:
1224         case 0x44F4:
1225         case 0x44F8:
1226         case 0x44FC:
1227                 /* TX_FORMAT1_[0-15] */
1228                 i = (reg - 0x44C0) >> 2;
1229                 tmp = (ib_chunk->kdata[idx] >> 25) & 0x3;
1230                 track->textures[i].tex_coord_type = tmp;
1231                 switch ((ib_chunk->kdata[idx] & 0x1F)) {
1232                 case 0:
1233                 case 2:
1234                 case 5:
1235                 case 18:
1236                 case 20:
1237                 case 21:
1238                         track->textures[i].cpp = 1;
1239                         break;
1240                 case 1:
1241                 case 3:
1242                 case 6:
1243                 case 7:
1244                 case 10:
1245                 case 11:
1246                 case 19:
1247                 case 22:
1248                 case 24:
1249                         track->textures[i].cpp = 2;
1250                         break;
1251                 case 4:
1252                 case 8:
1253                 case 9:
1254                 case 12:
1255                 case 13:
1256                 case 23:
1257                 case 25:
1258                 case 27:
1259                 case 30:
1260                         track->textures[i].cpp = 4;
1261                         break;
1262                 case 14:
1263                 case 26:
1264                 case 28:
1265                         track->textures[i].cpp = 8;
1266                         break;
1267                 case 29:
1268                         track->textures[i].cpp = 16;
1269                         break;
1270                 default:
1271                         DRM_ERROR("Invalid texture format %u\n",
1272                                   (ib_chunk->kdata[idx] & 0x1F));
1273                         return -EINVAL;
1274                         break;
1275                 }
1276                 break;
1277         case 0x4400:
1278         case 0x4404:
1279         case 0x4408:
1280         case 0x440C:
1281         case 0x4410:
1282         case 0x4414:
1283         case 0x4418:
1284         case 0x441C:
1285         case 0x4420:
1286         case 0x4424:
1287         case 0x4428:
1288         case 0x442C:
1289         case 0x4430:
1290         case 0x4434:
1291         case 0x4438:
1292         case 0x443C:
1293                 /* TX_FILTER0_[0-15] */
1294                 i = (reg - 0x4400) >> 2;
1295                 tmp = ib_chunk->kdata[idx] & 0x7;;
1296                 if (tmp == 2 || tmp == 4 || tmp == 6) {
1297                         track->textures[i].roundup_w = false;
1298                 }
1299                 tmp = (ib_chunk->kdata[idx] >> 3) & 0x7;;
1300                 if (tmp == 2 || tmp == 4 || tmp == 6) {
1301                         track->textures[i].roundup_h = false;
1302                 }
1303                 break;
1304         case 0x4500:
1305         case 0x4504:
1306         case 0x4508:
1307         case 0x450C:
1308         case 0x4510:
1309         case 0x4514:
1310         case 0x4518:
1311         case 0x451C:
1312         case 0x4520:
1313         case 0x4524:
1314         case 0x4528:
1315         case 0x452C:
1316         case 0x4530:
1317         case 0x4534:
1318         case 0x4538:
1319         case 0x453C:
1320                 /* TX_FORMAT2_[0-15] */
1321                 i = (reg - 0x4500) >> 2;
1322                 tmp = ib_chunk->kdata[idx] & 0x3FFF;
1323                 track->textures[i].pitch = tmp + 1;
1324                 if (p->rdev->family >= CHIP_RV515) {
1325                         tmp = ((ib_chunk->kdata[idx] >> 15) & 1) << 11;
1326                         track->textures[i].width_11 = tmp;
1327                         tmp = ((ib_chunk->kdata[idx] >> 16) & 1) << 11;
1328                         track->textures[i].height_11 = tmp;
1329                 }
1330                 break;
1331         case 0x4480:
1332         case 0x4484:
1333         case 0x4488:
1334         case 0x448C:
1335         case 0x4490:
1336         case 0x4494:
1337         case 0x4498:
1338         case 0x449C:
1339         case 0x44A0:
1340         case 0x44A4:
1341         case 0x44A8:
1342         case 0x44AC:
1343         case 0x44B0:
1344         case 0x44B4:
1345         case 0x44B8:
1346         case 0x44BC:
1347                 /* TX_FORMAT0_[0-15] */
1348                 i = (reg - 0x4480) >> 2;
1349                 tmp = ib_chunk->kdata[idx] & 0x7FF;
1350                 track->textures[i].width = tmp + 1;
1351                 tmp = (ib_chunk->kdata[idx] >> 11) & 0x7FF;
1352                 track->textures[i].height = tmp + 1;
1353                 tmp = (ib_chunk->kdata[idx] >> 26) & 0xF;
1354                 track->textures[i].num_levels = tmp;
1355                 tmp = ib_chunk->kdata[idx] & (1 << 31);
1356                 track->textures[i].use_pitch = !!tmp;
1357                 tmp = (ib_chunk->kdata[idx] >> 22) & 0xF;
1358                 track->textures[i].txdepth = tmp;
1359                 break;
1360         default:
1361                 printk(KERN_ERR "Forbidden register 0x%04X in cs at %d\n",
1362                        reg, idx);
1363                 return -EINVAL;
1364         }
1365         return 0;
1366 }
1367
1368 static int r300_packet3_check(struct radeon_cs_parser *p,
1369                               struct radeon_cs_packet *pkt)
1370 {
1371         struct radeon_cs_chunk *ib_chunk;
1372         struct radeon_cs_reloc *reloc;
1373         struct r300_cs_track *track;
1374         volatile uint32_t *ib;
1375         unsigned idx;
1376         unsigned i, c;
1377         int r;
1378
1379         ib = p->ib->ptr;
1380         ib_chunk = &p->chunks[p->chunk_ib_idx];
1381         idx = pkt->idx + 1;
1382         track = (struct r300_cs_track*)p->track;
1383         switch(pkt->opcode) {
1384         case PACKET3_3D_LOAD_VBPNTR:
1385                 c = ib_chunk->kdata[idx++] & 0x1F;
1386                 track->num_arrays = c;
1387                 for (i = 0; i < (c - 1); i+=2, idx+=3) {
1388                         r = r100_cs_packet_next_reloc(p, &reloc);
1389                         if (r) {
1390                                 DRM_ERROR("No reloc for packet3 %d\n",
1391                                           pkt->opcode);
1392                                 r100_cs_dump_packet(p, pkt);
1393                                 return r;
1394                         }
1395                         ib[idx+1] = ib_chunk->kdata[idx+1] + ((u32)reloc->lobj.gpu_offset);
1396                         track->arrays[i + 0].robj = reloc->robj;
1397                         track->arrays[i + 0].esize = ib_chunk->kdata[idx] >> 8;
1398                         track->arrays[i + 0].esize &= 0x7F;
1399                         r = r100_cs_packet_next_reloc(p, &reloc);
1400                         if (r) {
1401                                 DRM_ERROR("No reloc for packet3 %d\n",
1402                                           pkt->opcode);
1403                                 r100_cs_dump_packet(p, pkt);
1404                                 return r;
1405                         }
1406                         ib[idx+2] = ib_chunk->kdata[idx+2] + ((u32)reloc->lobj.gpu_offset);
1407                         track->arrays[i + 1].robj = reloc->robj;
1408                         track->arrays[i + 1].esize = ib_chunk->kdata[idx] >> 24;
1409                         track->arrays[i + 1].esize &= 0x7F;
1410                 }
1411                 if (c & 1) {
1412                         r = r100_cs_packet_next_reloc(p, &reloc);
1413                         if (r) {
1414                                 DRM_ERROR("No reloc for packet3 %d\n",
1415                                           pkt->opcode);
1416                                 r100_cs_dump_packet(p, pkt);
1417                                 return r;
1418                         }
1419                         ib[idx+1] = ib_chunk->kdata[idx+1] + ((u32)reloc->lobj.gpu_offset);
1420                         track->arrays[i + 0].robj = reloc->robj;
1421                         track->arrays[i + 0].esize = ib_chunk->kdata[idx] >> 8;
1422                         track->arrays[i + 0].esize &= 0x7F;
1423                 }
1424                 break;
1425         case PACKET3_INDX_BUFFER:
1426                 r = r100_cs_packet_next_reloc(p, &reloc);
1427                 if (r) {
1428                         DRM_ERROR("No reloc for packet3 %d\n", pkt->opcode);
1429                         r100_cs_dump_packet(p, pkt);
1430                         return r;
1431                 }
1432                 ib[idx+1] = ib_chunk->kdata[idx+1] + ((u32)reloc->lobj.gpu_offset);
1433                 r = r100_cs_track_check_pkt3_indx_buffer(p, pkt, reloc->robj);
1434                 if (r) {
1435                         return r;
1436                 }
1437                 break;
1438         /* Draw packet */
1439         case PACKET3_3D_DRAW_IMMD:
1440                 /* Number of dwords is vtx_size * (num_vertices - 1)
1441                  * PRIM_WALK must be equal to 3 vertex data in embedded
1442                  * in cmd stream */
1443                 if (((ib_chunk->kdata[idx+1] >> 4) & 0x3) != 3) {
1444                         DRM_ERROR("PRIM_WALK must be 3 for IMMD draw\n");
1445                         return -EINVAL;
1446                 }
1447                 track->vap_vf_cntl = ib_chunk->kdata[idx+1];
1448                 track->immd_dwords = pkt->count - 1;
1449                 r = r300_cs_track_check(p->rdev, track);
1450                 if (r) {
1451                         return r;
1452                 }
1453                 break;
1454         case PACKET3_3D_DRAW_IMMD_2:
1455                 /* Number of dwords is vtx_size * (num_vertices - 1)
1456                  * PRIM_WALK must be equal to 3 vertex data in embedded
1457                  * in cmd stream */
1458                 if (((ib_chunk->kdata[idx] >> 4) & 0x3) != 3) {
1459                         DRM_ERROR("PRIM_WALK must be 3 for IMMD draw\n");
1460                         return -EINVAL;
1461                 }
1462                 track->vap_vf_cntl = ib_chunk->kdata[idx];
1463                 track->immd_dwords = pkt->count;
1464                 r = r300_cs_track_check(p->rdev, track);
1465                 if (r) {
1466                         return r;
1467                 }
1468                 break;
1469         case PACKET3_3D_DRAW_VBUF:
1470                 track->vap_vf_cntl = ib_chunk->kdata[idx + 1];
1471                 r = r300_cs_track_check(p->rdev, track);
1472                 if (r) {
1473                         return r;
1474                 }
1475                 break;
1476         case PACKET3_3D_DRAW_VBUF_2:
1477                 track->vap_vf_cntl = ib_chunk->kdata[idx];
1478                 r = r300_cs_track_check(p->rdev, track);
1479                 if (r) {
1480                         return r;
1481                 }
1482                 break;
1483         case PACKET3_3D_DRAW_INDX:
1484                 track->vap_vf_cntl = ib_chunk->kdata[idx + 1];
1485                 r = r300_cs_track_check(p->rdev, track);
1486                 if (r) {
1487                         return r;
1488                 }
1489                 break;
1490         case PACKET3_3D_DRAW_INDX_2:
1491                 track->vap_vf_cntl = ib_chunk->kdata[idx];
1492                 r = r300_cs_track_check(p->rdev, track);
1493                 if (r) {
1494                         return r;
1495                 }
1496                 break;
1497         case PACKET3_NOP:
1498                 break;
1499         default:
1500                 DRM_ERROR("Packet3 opcode %x not supported\n", pkt->opcode);
1501                 return -EINVAL;
1502         }
1503         return 0;
1504 }
1505
1506 int r300_cs_parse(struct radeon_cs_parser *p)
1507 {
1508         struct radeon_cs_packet pkt;
1509         struct r300_cs_track track;
1510         int r;
1511
1512         r300_cs_track_clear(&track);
1513         p->track = &track;
1514         do {
1515                 r = r100_cs_packet_parse(p, &pkt, p->idx);
1516                 if (r) {
1517                         return r;
1518                 }
1519                 p->idx += pkt.count + 2;
1520                 switch (pkt.type) {
1521                 case PACKET_TYPE0:
1522                         r = r100_cs_parse_packet0(p, &pkt,
1523                                                   p->rdev->config.r300.reg_safe_bm,
1524                                                   p->rdev->config.r300.reg_safe_bm_size,
1525                                                   &r300_packet0_check);
1526                         break;
1527                 case PACKET_TYPE2:
1528                         break;
1529                 case PACKET_TYPE3:
1530                         r = r300_packet3_check(p, &pkt);
1531                         break;
1532                 default:
1533                         DRM_ERROR("Unknown packet type %d !\n", pkt.type);
1534                         return -EINVAL;
1535                 }
1536                 if (r) {
1537                         return r;
1538                 }
1539         } while (p->idx < p->chunks[p->chunk_ib_idx].length_dw);
1540         return 0;
1541 }
1542
1543 int r300_init(struct radeon_device *rdev)
1544 {
1545         rdev->config.r300.reg_safe_bm = r300_reg_safe_bm;
1546         rdev->config.r300.reg_safe_bm_size = ARRAY_SIZE(r300_reg_safe_bm);
1547         return 0;
1548 }