drm/radeon/kms: add initial colortiling support.
[linux-2.6.git] / drivers / gpu / drm / radeon / r300.c
1 /*
2  * Copyright 2008 Advanced Micro Devices, Inc.
3  * Copyright 2008 Red Hat Inc.
4  * Copyright 2009 Jerome Glisse.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22  * OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors: Dave Airlie
25  *          Alex Deucher
26  *          Jerome Glisse
27  */
28 #include <linux/seq_file.h>
29 #include "drmP.h"
30 #include "drm.h"
31 #include "radeon_reg.h"
32 #include "radeon.h"
33 #include "radeon_drm.h"
34
35 /* r300,r350,rv350,rv370,rv380 depends on : */
36 void r100_hdp_reset(struct radeon_device *rdev);
37 int r100_cp_reset(struct radeon_device *rdev);
38 int r100_rb2d_reset(struct radeon_device *rdev);
39 int r100_cp_init(struct radeon_device *rdev, unsigned ring_size);
40 int r100_pci_gart_enable(struct radeon_device *rdev);
41 void r100_pci_gart_disable(struct radeon_device *rdev);
42 void r100_mc_setup(struct radeon_device *rdev);
43 void r100_mc_disable_clients(struct radeon_device *rdev);
44 int r100_gui_wait_for_idle(struct radeon_device *rdev);
45 int r100_cs_packet_parse(struct radeon_cs_parser *p,
46                          struct radeon_cs_packet *pkt,
47                          unsigned idx);
48 int r100_cs_packet_parse_vline(struct radeon_cs_parser *p);
49 int r100_cs_packet_next_reloc(struct radeon_cs_parser *p,
50                               struct radeon_cs_reloc **cs_reloc);
51 int r100_cs_parse_packet0(struct radeon_cs_parser *p,
52                           struct radeon_cs_packet *pkt,
53                           const unsigned *auth, unsigned n,
54                           radeon_packet0_check_t check);
55 void r100_cs_dump_packet(struct radeon_cs_parser *p,
56                          struct radeon_cs_packet *pkt);
57 int r100_cs_track_check_pkt3_indx_buffer(struct radeon_cs_parser *p,
58                                          struct radeon_cs_packet *pkt,
59                                          struct radeon_object *robj);
60
61 /* This files gather functions specifics to:
62  * r300,r350,rv350,rv370,rv380
63  *
64  * Some of these functions might be used by newer ASICs.
65  */
66 void r300_gpu_init(struct radeon_device *rdev);
67 int r300_mc_wait_for_idle(struct radeon_device *rdev);
68 int rv370_debugfs_pcie_gart_info_init(struct radeon_device *rdev);
69
70
71 /*
72  * rv370,rv380 PCIE GART
73  */
74 void rv370_pcie_gart_tlb_flush(struct radeon_device *rdev)
75 {
76         uint32_t tmp;
77         int i;
78
79         /* Workaround HW bug do flush 2 times */
80         for (i = 0; i < 2; i++) {
81                 tmp = RREG32_PCIE(RADEON_PCIE_TX_GART_CNTL);
82                 WREG32_PCIE(RADEON_PCIE_TX_GART_CNTL, tmp | RADEON_PCIE_TX_GART_INVALIDATE_TLB);
83                 (void)RREG32_PCIE(RADEON_PCIE_TX_GART_CNTL);
84                 WREG32_PCIE(RADEON_PCIE_TX_GART_CNTL, tmp);
85                 mb();
86         }
87 }
88
89 int rv370_pcie_gart_enable(struct radeon_device *rdev)
90 {
91         uint32_t table_addr;
92         uint32_t tmp;
93         int r;
94
95         /* Initialize common gart structure */
96         r = radeon_gart_init(rdev);
97         if (r) {
98                 return r;
99         }
100         r = rv370_debugfs_pcie_gart_info_init(rdev);
101         if (r) {
102                 DRM_ERROR("Failed to register debugfs file for PCIE gart !\n");
103         }
104         rdev->gart.table_size = rdev->gart.num_gpu_pages * 4;
105         r = radeon_gart_table_vram_alloc(rdev);
106         if (r) {
107                 return r;
108         }
109         /* discard memory request outside of configured range */
110         tmp = RADEON_PCIE_TX_GART_UNMAPPED_ACCESS_DISCARD;
111         WREG32_PCIE(RADEON_PCIE_TX_GART_CNTL, tmp);
112         WREG32_PCIE(RADEON_PCIE_TX_GART_START_LO, rdev->mc.gtt_location);
113         tmp = rdev->mc.gtt_location + rdev->mc.gtt_size - 4096;
114         WREG32_PCIE(RADEON_PCIE_TX_GART_END_LO, tmp);
115         WREG32_PCIE(RADEON_PCIE_TX_GART_START_HI, 0);
116         WREG32_PCIE(RADEON_PCIE_TX_GART_END_HI, 0);
117         table_addr = rdev->gart.table_addr;
118         WREG32_PCIE(RADEON_PCIE_TX_GART_BASE, table_addr);
119         /* FIXME: setup default page */
120         WREG32_PCIE(RADEON_PCIE_TX_DISCARD_RD_ADDR_LO, rdev->mc.vram_location);
121         WREG32_PCIE(RADEON_PCIE_TX_DISCARD_RD_ADDR_HI, 0);
122         /* Clear error */
123         WREG32_PCIE(0x18, 0);
124         tmp = RREG32_PCIE(RADEON_PCIE_TX_GART_CNTL);
125         tmp |= RADEON_PCIE_TX_GART_EN;
126         tmp |= RADEON_PCIE_TX_GART_UNMAPPED_ACCESS_DISCARD;
127         WREG32_PCIE(RADEON_PCIE_TX_GART_CNTL, tmp);
128         rv370_pcie_gart_tlb_flush(rdev);
129         DRM_INFO("PCIE GART of %uM enabled (table at 0x%08X).\n",
130                  rdev->mc.gtt_size >> 20, table_addr);
131         rdev->gart.ready = true;
132         return 0;
133 }
134
135 void rv370_pcie_gart_disable(struct radeon_device *rdev)
136 {
137         uint32_t tmp;
138
139         tmp = RREG32_PCIE(RADEON_PCIE_TX_GART_CNTL);
140         tmp |= RADEON_PCIE_TX_GART_UNMAPPED_ACCESS_DISCARD;
141         WREG32_PCIE(RADEON_PCIE_TX_GART_CNTL, tmp & ~RADEON_PCIE_TX_GART_EN);
142         if (rdev->gart.table.vram.robj) {
143                 radeon_object_kunmap(rdev->gart.table.vram.robj);
144                 radeon_object_unpin(rdev->gart.table.vram.robj);
145         }
146 }
147
148 int rv370_pcie_gart_set_page(struct radeon_device *rdev, int i, uint64_t addr)
149 {
150         void __iomem *ptr = (void *)rdev->gart.table.vram.ptr;
151
152         if (i < 0 || i > rdev->gart.num_gpu_pages) {
153                 return -EINVAL;
154         }
155         addr = (lower_32_bits(addr) >> 8) |
156                ((upper_32_bits(addr) & 0xff) << 24) |
157                0xc;
158         /* on x86 we want this to be CPU endian, on powerpc
159          * on powerpc without HW swappers, it'll get swapped on way
160          * into VRAM - so no need for cpu_to_le32 on VRAM tables */
161         writel(addr, ((void __iomem *)ptr) + (i * 4));
162         return 0;
163 }
164
165 int r300_gart_enable(struct radeon_device *rdev)
166 {
167 #if __OS_HAS_AGP
168         if (rdev->flags & RADEON_IS_AGP) {
169                 if (rdev->family > CHIP_RV350) {
170                         rv370_pcie_gart_disable(rdev);
171                 } else {
172                         r100_pci_gart_disable(rdev);
173                 }
174                 return 0;
175         }
176 #endif
177         if (rdev->flags & RADEON_IS_PCIE) {
178                 rdev->asic->gart_disable = &rv370_pcie_gart_disable;
179                 rdev->asic->gart_tlb_flush = &rv370_pcie_gart_tlb_flush;
180                 rdev->asic->gart_set_page = &rv370_pcie_gart_set_page;
181                 return rv370_pcie_gart_enable(rdev);
182         }
183         return r100_pci_gart_enable(rdev);
184 }
185
186
187 /*
188  * MC
189  */
190 int r300_mc_init(struct radeon_device *rdev)
191 {
192         int r;
193
194         if (r100_debugfs_rbbm_init(rdev)) {
195                 DRM_ERROR("Failed to register debugfs file for RBBM !\n");
196         }
197
198         r300_gpu_init(rdev);
199         r100_pci_gart_disable(rdev);
200         if (rdev->flags & RADEON_IS_PCIE) {
201                 rv370_pcie_gart_disable(rdev);
202         }
203
204         /* Setup GPU memory space */
205         rdev->mc.vram_location = 0xFFFFFFFFUL;
206         rdev->mc.gtt_location = 0xFFFFFFFFUL;
207         if (rdev->flags & RADEON_IS_AGP) {
208                 r = radeon_agp_init(rdev);
209                 if (r) {
210                         printk(KERN_WARNING "[drm] Disabling AGP\n");
211                         rdev->flags &= ~RADEON_IS_AGP;
212                         rdev->mc.gtt_size = radeon_gart_size * 1024 * 1024;
213                 } else {
214                         rdev->mc.gtt_location = rdev->mc.agp_base;
215                 }
216         }
217         r = radeon_mc_setup(rdev);
218         if (r) {
219                 return r;
220         }
221
222         /* Program GPU memory space */
223         r100_mc_disable_clients(rdev);
224         if (r300_mc_wait_for_idle(rdev)) {
225                 printk(KERN_WARNING "Failed to wait MC idle while "
226                        "programming pipes. Bad things might happen.\n");
227         }
228         r100_mc_setup(rdev);
229         return 0;
230 }
231
232 void r300_mc_fini(struct radeon_device *rdev)
233 {
234         if (rdev->flags & RADEON_IS_PCIE) {
235                 rv370_pcie_gart_disable(rdev);
236                 radeon_gart_table_vram_free(rdev);
237         } else {
238                 r100_pci_gart_disable(rdev);
239                 radeon_gart_table_ram_free(rdev);
240         }
241         radeon_gart_fini(rdev);
242 }
243
244
245 /*
246  * Fence emission
247  */
248 void r300_fence_ring_emit(struct radeon_device *rdev,
249                           struct radeon_fence *fence)
250 {
251         /* Who ever call radeon_fence_emit should call ring_lock and ask
252          * for enough space (today caller are ib schedule and buffer move) */
253         /* Write SC register so SC & US assert idle */
254         radeon_ring_write(rdev, PACKET0(0x43E0, 0));
255         radeon_ring_write(rdev, 0);
256         radeon_ring_write(rdev, PACKET0(0x43E4, 0));
257         radeon_ring_write(rdev, 0);
258         /* Flush 3D cache */
259         radeon_ring_write(rdev, PACKET0(0x4E4C, 0));
260         radeon_ring_write(rdev, (2 << 0));
261         radeon_ring_write(rdev, PACKET0(0x4F18, 0));
262         radeon_ring_write(rdev, (1 << 0));
263         /* Wait until IDLE & CLEAN */
264         radeon_ring_write(rdev, PACKET0(0x1720, 0));
265         radeon_ring_write(rdev, (1 << 17) | (1 << 16)  | (1 << 9));
266         /* Emit fence sequence & fire IRQ */
267         radeon_ring_write(rdev, PACKET0(rdev->fence_drv.scratch_reg, 0));
268         radeon_ring_write(rdev, fence->seq);
269         radeon_ring_write(rdev, PACKET0(RADEON_GEN_INT_STATUS, 0));
270         radeon_ring_write(rdev, RADEON_SW_INT_FIRE);
271 }
272
273
274 /*
275  * Global GPU functions
276  */
277 int r300_copy_dma(struct radeon_device *rdev,
278                   uint64_t src_offset,
279                   uint64_t dst_offset,
280                   unsigned num_pages,
281                   struct radeon_fence *fence)
282 {
283         uint32_t size;
284         uint32_t cur_size;
285         int i, num_loops;
286         int r = 0;
287
288         /* radeon pitch is /64 */
289         size = num_pages << PAGE_SHIFT;
290         num_loops = DIV_ROUND_UP(size, 0x1FFFFF);
291         r = radeon_ring_lock(rdev, num_loops * 4 + 64);
292         if (r) {
293                 DRM_ERROR("radeon: moving bo (%d).\n", r);
294                 return r;
295         }
296         /* Must wait for 2D idle & clean before DMA or hangs might happen */
297         radeon_ring_write(rdev, PACKET0(RADEON_WAIT_UNTIL, 0 ));
298         radeon_ring_write(rdev, (1 << 16));
299         for (i = 0; i < num_loops; i++) {
300                 cur_size = size;
301                 if (cur_size > 0x1FFFFF) {
302                         cur_size = 0x1FFFFF;
303                 }
304                 size -= cur_size;
305                 radeon_ring_write(rdev, PACKET0(0x720, 2));
306                 radeon_ring_write(rdev, src_offset);
307                 radeon_ring_write(rdev, dst_offset);
308                 radeon_ring_write(rdev, cur_size | (1 << 31) | (1 << 30));
309                 src_offset += cur_size;
310                 dst_offset += cur_size;
311         }
312         radeon_ring_write(rdev, PACKET0(RADEON_WAIT_UNTIL, 0));
313         radeon_ring_write(rdev, RADEON_WAIT_DMA_GUI_IDLE);
314         if (fence) {
315                 r = radeon_fence_emit(rdev, fence);
316         }
317         radeon_ring_unlock_commit(rdev);
318         return r;
319 }
320
321 void r300_ring_start(struct radeon_device *rdev)
322 {
323         unsigned gb_tile_config;
324         int r;
325
326         /* Sub pixel 1/12 so we can have 4K rendering according to doc */
327         gb_tile_config = (R300_ENABLE_TILING | R300_TILE_SIZE_16);
328         switch(rdev->num_gb_pipes) {
329         case 2:
330                 gb_tile_config |= R300_PIPE_COUNT_R300;
331                 break;
332         case 3:
333                 gb_tile_config |= R300_PIPE_COUNT_R420_3P;
334                 break;
335         case 4:
336                 gb_tile_config |= R300_PIPE_COUNT_R420;
337                 break;
338         case 1:
339         default:
340                 gb_tile_config |= R300_PIPE_COUNT_RV350;
341                 break;
342         }
343
344         r = radeon_ring_lock(rdev, 64);
345         if (r) {
346                 return;
347         }
348         radeon_ring_write(rdev, PACKET0(RADEON_ISYNC_CNTL, 0));
349         radeon_ring_write(rdev,
350                           RADEON_ISYNC_ANY2D_IDLE3D |
351                           RADEON_ISYNC_ANY3D_IDLE2D |
352                           RADEON_ISYNC_WAIT_IDLEGUI |
353                           RADEON_ISYNC_CPSCRATCH_IDLEGUI);
354         radeon_ring_write(rdev, PACKET0(R300_GB_TILE_CONFIG, 0));
355         radeon_ring_write(rdev, gb_tile_config);
356         radeon_ring_write(rdev, PACKET0(RADEON_WAIT_UNTIL, 0));
357         radeon_ring_write(rdev,
358                           RADEON_WAIT_2D_IDLECLEAN |
359                           RADEON_WAIT_3D_IDLECLEAN);
360         radeon_ring_write(rdev, PACKET0(0x170C, 0));
361         radeon_ring_write(rdev, 1 << 31);
362         radeon_ring_write(rdev, PACKET0(R300_GB_SELECT, 0));
363         radeon_ring_write(rdev, 0);
364         radeon_ring_write(rdev, PACKET0(R300_GB_ENABLE, 0));
365         radeon_ring_write(rdev, 0);
366         radeon_ring_write(rdev, PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0));
367         radeon_ring_write(rdev, R300_RB3D_DC_FLUSH | R300_RB3D_DC_FREE);
368         radeon_ring_write(rdev, PACKET0(R300_RB3D_ZCACHE_CTLSTAT, 0));
369         radeon_ring_write(rdev, R300_ZC_FLUSH | R300_ZC_FREE);
370         radeon_ring_write(rdev, PACKET0(RADEON_WAIT_UNTIL, 0));
371         radeon_ring_write(rdev,
372                           RADEON_WAIT_2D_IDLECLEAN |
373                           RADEON_WAIT_3D_IDLECLEAN);
374         radeon_ring_write(rdev, PACKET0(R300_GB_AA_CONFIG, 0));
375         radeon_ring_write(rdev, 0);
376         radeon_ring_write(rdev, PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0));
377         radeon_ring_write(rdev, R300_RB3D_DC_FLUSH | R300_RB3D_DC_FREE);
378         radeon_ring_write(rdev, PACKET0(R300_RB3D_ZCACHE_CTLSTAT, 0));
379         radeon_ring_write(rdev, R300_ZC_FLUSH | R300_ZC_FREE);
380         radeon_ring_write(rdev, PACKET0(R300_GB_MSPOS0, 0));
381         radeon_ring_write(rdev,
382                           ((6 << R300_MS_X0_SHIFT) |
383                            (6 << R300_MS_Y0_SHIFT) |
384                            (6 << R300_MS_X1_SHIFT) |
385                            (6 << R300_MS_Y1_SHIFT) |
386                            (6 << R300_MS_X2_SHIFT) |
387                            (6 << R300_MS_Y2_SHIFT) |
388                            (6 << R300_MSBD0_Y_SHIFT) |
389                            (6 << R300_MSBD0_X_SHIFT)));
390         radeon_ring_write(rdev, PACKET0(R300_GB_MSPOS1, 0));
391         radeon_ring_write(rdev,
392                           ((6 << R300_MS_X3_SHIFT) |
393                            (6 << R300_MS_Y3_SHIFT) |
394                            (6 << R300_MS_X4_SHIFT) |
395                            (6 << R300_MS_Y4_SHIFT) |
396                            (6 << R300_MS_X5_SHIFT) |
397                            (6 << R300_MS_Y5_SHIFT) |
398                            (6 << R300_MSBD1_SHIFT)));
399         radeon_ring_write(rdev, PACKET0(R300_GA_ENHANCE, 0));
400         radeon_ring_write(rdev, R300_GA_DEADLOCK_CNTL | R300_GA_FASTSYNC_CNTL);
401         radeon_ring_write(rdev, PACKET0(R300_GA_POLY_MODE, 0));
402         radeon_ring_write(rdev,
403                           R300_FRONT_PTYPE_TRIANGE | R300_BACK_PTYPE_TRIANGE);
404         radeon_ring_write(rdev, PACKET0(R300_GA_ROUND_MODE, 0));
405         radeon_ring_write(rdev,
406                           R300_GEOMETRY_ROUND_NEAREST |
407                           R300_COLOR_ROUND_NEAREST);
408         radeon_ring_unlock_commit(rdev);
409 }
410
411 void r300_errata(struct radeon_device *rdev)
412 {
413         rdev->pll_errata = 0;
414
415         if (rdev->family == CHIP_R300 &&
416             (RREG32(RADEON_CONFIG_CNTL) & RADEON_CFG_ATI_REV_ID_MASK) == RADEON_CFG_ATI_REV_A11) {
417                 rdev->pll_errata |= CHIP_ERRATA_R300_CG;
418         }
419 }
420
421 int r300_mc_wait_for_idle(struct radeon_device *rdev)
422 {
423         unsigned i;
424         uint32_t tmp;
425
426         for (i = 0; i < rdev->usec_timeout; i++) {
427                 /* read MC_STATUS */
428                 tmp = RREG32(0x0150);
429                 if (tmp & (1 << 4)) {
430                         return 0;
431                 }
432                 DRM_UDELAY(1);
433         }
434         return -1;
435 }
436
437 void r300_gpu_init(struct radeon_device *rdev)
438 {
439         uint32_t gb_tile_config, tmp;
440
441         r100_hdp_reset(rdev);
442         /* FIXME: rv380 one pipes ? */
443         if ((rdev->family == CHIP_R300) || (rdev->family == CHIP_R350)) {
444                 /* r300,r350 */
445                 rdev->num_gb_pipes = 2;
446         } else {
447                 /* rv350,rv370,rv380 */
448                 rdev->num_gb_pipes = 1;
449         }
450         gb_tile_config = (R300_ENABLE_TILING | R300_TILE_SIZE_16);
451         switch (rdev->num_gb_pipes) {
452         case 2:
453                 gb_tile_config |= R300_PIPE_COUNT_R300;
454                 break;
455         case 3:
456                 gb_tile_config |= R300_PIPE_COUNT_R420_3P;
457                 break;
458         case 4:
459                 gb_tile_config |= R300_PIPE_COUNT_R420;
460                 break;
461         default:
462         case 1:
463                 gb_tile_config |= R300_PIPE_COUNT_RV350;
464                 break;
465         }
466         WREG32(R300_GB_TILE_CONFIG, gb_tile_config);
467
468         if (r100_gui_wait_for_idle(rdev)) {
469                 printk(KERN_WARNING "Failed to wait GUI idle while "
470                        "programming pipes. Bad things might happen.\n");
471         }
472
473         tmp = RREG32(0x170C);
474         WREG32(0x170C, tmp | (1 << 31));
475
476         WREG32(R300_RB2D_DSTCACHE_MODE,
477                R300_DC_AUTOFLUSH_ENABLE |
478                R300_DC_DC_DISABLE_IGNORE_PE);
479
480         if (r100_gui_wait_for_idle(rdev)) {
481                 printk(KERN_WARNING "Failed to wait GUI idle while "
482                        "programming pipes. Bad things might happen.\n");
483         }
484         if (r300_mc_wait_for_idle(rdev)) {
485                 printk(KERN_WARNING "Failed to wait MC idle while "
486                        "programming pipes. Bad things might happen.\n");
487         }
488         DRM_INFO("radeon: %d pipes initialized.\n", rdev->num_gb_pipes);
489 }
490
491 int r300_ga_reset(struct radeon_device *rdev)
492 {
493         uint32_t tmp;
494         bool reinit_cp;
495         int i;
496
497         reinit_cp = rdev->cp.ready;
498         rdev->cp.ready = false;
499         for (i = 0; i < rdev->usec_timeout; i++) {
500                 WREG32(RADEON_CP_CSQ_MODE, 0);
501                 WREG32(RADEON_CP_CSQ_CNTL, 0);
502                 WREG32(RADEON_RBBM_SOFT_RESET, 0x32005);
503                 (void)RREG32(RADEON_RBBM_SOFT_RESET);
504                 udelay(200);
505                 WREG32(RADEON_RBBM_SOFT_RESET, 0);
506                 /* Wait to prevent race in RBBM_STATUS */
507                 mdelay(1);
508                 tmp = RREG32(RADEON_RBBM_STATUS);
509                 if (tmp & ((1 << 20) | (1 << 26))) {
510                         DRM_ERROR("VAP & CP still busy (RBBM_STATUS=0x%08X)", tmp);
511                         /* GA still busy soft reset it */
512                         WREG32(0x429C, 0x200);
513                         WREG32(R300_VAP_PVS_STATE_FLUSH_REG, 0);
514                         WREG32(0x43E0, 0);
515                         WREG32(0x43E4, 0);
516                         WREG32(0x24AC, 0);
517                 }
518                 /* Wait to prevent race in RBBM_STATUS */
519                 mdelay(1);
520                 tmp = RREG32(RADEON_RBBM_STATUS);
521                 if (!(tmp & ((1 << 20) | (1 << 26)))) {
522                         break;
523                 }
524         }
525         for (i = 0; i < rdev->usec_timeout; i++) {
526                 tmp = RREG32(RADEON_RBBM_STATUS);
527                 if (!(tmp & ((1 << 20) | (1 << 26)))) {
528                         DRM_INFO("GA reset succeed (RBBM_STATUS=0x%08X)\n",
529                                  tmp);
530                         if (reinit_cp) {
531                                 return r100_cp_init(rdev, rdev->cp.ring_size);
532                         }
533                         return 0;
534                 }
535                 DRM_UDELAY(1);
536         }
537         tmp = RREG32(RADEON_RBBM_STATUS);
538         DRM_ERROR("Failed to reset GA ! (RBBM_STATUS=0x%08X)\n", tmp);
539         return -1;
540 }
541
542 int r300_gpu_reset(struct radeon_device *rdev)
543 {
544         uint32_t status;
545
546         /* reset order likely matter */
547         status = RREG32(RADEON_RBBM_STATUS);
548         /* reset HDP */
549         r100_hdp_reset(rdev);
550         /* reset rb2d */
551         if (status & ((1 << 17) | (1 << 18) | (1 << 27))) {
552                 r100_rb2d_reset(rdev);
553         }
554         /* reset GA */
555         if (status & ((1 << 20) | (1 << 26))) {
556                 r300_ga_reset(rdev);
557         }
558         /* reset CP */
559         status = RREG32(RADEON_RBBM_STATUS);
560         if (status & (1 << 16)) {
561                 r100_cp_reset(rdev);
562         }
563         /* Check if GPU is idle */
564         status = RREG32(RADEON_RBBM_STATUS);
565         if (status & (1 << 31)) {
566                 DRM_ERROR("Failed to reset GPU (RBBM_STATUS=0x%08X)\n", status);
567                 return -1;
568         }
569         DRM_INFO("GPU reset succeed (RBBM_STATUS=0x%08X)\n", status);
570         return 0;
571 }
572
573
574 /*
575  * r300,r350,rv350,rv380 VRAM info
576  */
577 void r300_vram_info(struct radeon_device *rdev)
578 {
579         uint32_t tmp;
580
581         /* DDR for all card after R300 & IGP */
582         rdev->mc.vram_is_ddr = true;
583         tmp = RREG32(RADEON_MEM_CNTL);
584         if (tmp & R300_MEM_NUM_CHANNELS_MASK) {
585                 rdev->mc.vram_width = 128;
586         } else {
587                 rdev->mc.vram_width = 64;
588         }
589
590         r100_vram_init_sizes(rdev);
591 }
592
593
594 /*
595  * Indirect registers accessor
596  */
597 uint32_t rv370_pcie_rreg(struct radeon_device *rdev, uint32_t reg)
598 {
599         uint32_t r;
600
601         WREG8(RADEON_PCIE_INDEX, ((reg) & 0xff));
602         (void)RREG32(RADEON_PCIE_INDEX);
603         r = RREG32(RADEON_PCIE_DATA);
604         return r;
605 }
606
607 void rv370_pcie_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v)
608 {
609         WREG8(RADEON_PCIE_INDEX, ((reg) & 0xff));
610         (void)RREG32(RADEON_PCIE_INDEX);
611         WREG32(RADEON_PCIE_DATA, (v));
612         (void)RREG32(RADEON_PCIE_DATA);
613 }
614
615 /*
616  * PCIE Lanes
617  */
618
619 void rv370_set_pcie_lanes(struct radeon_device *rdev, int lanes)
620 {
621         uint32_t link_width_cntl, mask;
622
623         if (rdev->flags & RADEON_IS_IGP)
624                 return;
625
626         if (!(rdev->flags & RADEON_IS_PCIE))
627                 return;
628
629         /* FIXME wait for idle */
630
631         switch (lanes) {
632         case 0:
633                 mask = RADEON_PCIE_LC_LINK_WIDTH_X0;
634                 break;
635         case 1:
636                 mask = RADEON_PCIE_LC_LINK_WIDTH_X1;
637                 break;
638         case 2:
639                 mask = RADEON_PCIE_LC_LINK_WIDTH_X2;
640                 break;
641         case 4:
642                 mask = RADEON_PCIE_LC_LINK_WIDTH_X4;
643                 break;
644         case 8:
645                 mask = RADEON_PCIE_LC_LINK_WIDTH_X8;
646                 break;
647         case 12:
648                 mask = RADEON_PCIE_LC_LINK_WIDTH_X12;
649                 break;
650         case 16:
651         default:
652                 mask = RADEON_PCIE_LC_LINK_WIDTH_X16;
653                 break;
654         }
655
656         link_width_cntl = RREG32_PCIE(RADEON_PCIE_LC_LINK_WIDTH_CNTL);
657
658         if ((link_width_cntl & RADEON_PCIE_LC_LINK_WIDTH_RD_MASK) ==
659             (mask << RADEON_PCIE_LC_LINK_WIDTH_RD_SHIFT))
660                 return;
661
662         link_width_cntl &= ~(RADEON_PCIE_LC_LINK_WIDTH_MASK |
663                              RADEON_PCIE_LC_RECONFIG_NOW |
664                              RADEON_PCIE_LC_RECONFIG_LATER |
665                              RADEON_PCIE_LC_SHORT_RECONFIG_EN);
666         link_width_cntl |= mask;
667         WREG32_PCIE(RADEON_PCIE_LC_LINK_WIDTH_CNTL, link_width_cntl);
668         WREG32_PCIE(RADEON_PCIE_LC_LINK_WIDTH_CNTL, (link_width_cntl |
669                                                      RADEON_PCIE_LC_RECONFIG_NOW));
670
671         /* wait for lane set to complete */
672         link_width_cntl = RREG32_PCIE(RADEON_PCIE_LC_LINK_WIDTH_CNTL);
673         while (link_width_cntl == 0xffffffff)
674                 link_width_cntl = RREG32_PCIE(RADEON_PCIE_LC_LINK_WIDTH_CNTL);
675
676 }
677
678
679 /*
680  * Debugfs info
681  */
682 #if defined(CONFIG_DEBUG_FS)
683 static int rv370_debugfs_pcie_gart_info(struct seq_file *m, void *data)
684 {
685         struct drm_info_node *node = (struct drm_info_node *) m->private;
686         struct drm_device *dev = node->minor->dev;
687         struct radeon_device *rdev = dev->dev_private;
688         uint32_t tmp;
689
690         tmp = RREG32_PCIE(RADEON_PCIE_TX_GART_CNTL);
691         seq_printf(m, "PCIE_TX_GART_CNTL 0x%08x\n", tmp);
692         tmp = RREG32_PCIE(RADEON_PCIE_TX_GART_BASE);
693         seq_printf(m, "PCIE_TX_GART_BASE 0x%08x\n", tmp);
694         tmp = RREG32_PCIE(RADEON_PCIE_TX_GART_START_LO);
695         seq_printf(m, "PCIE_TX_GART_START_LO 0x%08x\n", tmp);
696         tmp = RREG32_PCIE(RADEON_PCIE_TX_GART_START_HI);
697         seq_printf(m, "PCIE_TX_GART_START_HI 0x%08x\n", tmp);
698         tmp = RREG32_PCIE(RADEON_PCIE_TX_GART_END_LO);
699         seq_printf(m, "PCIE_TX_GART_END_LO 0x%08x\n", tmp);
700         tmp = RREG32_PCIE(RADEON_PCIE_TX_GART_END_HI);
701         seq_printf(m, "PCIE_TX_GART_END_HI 0x%08x\n", tmp);
702         tmp = RREG32_PCIE(RADEON_PCIE_TX_GART_ERROR);
703         seq_printf(m, "PCIE_TX_GART_ERROR 0x%08x\n", tmp);
704         return 0;
705 }
706
707 static struct drm_info_list rv370_pcie_gart_info_list[] = {
708         {"rv370_pcie_gart_info", rv370_debugfs_pcie_gart_info, 0, NULL},
709 };
710 #endif
711
712 int rv370_debugfs_pcie_gart_info_init(struct radeon_device *rdev)
713 {
714 #if defined(CONFIG_DEBUG_FS)
715         return radeon_debugfs_add_files(rdev, rv370_pcie_gart_info_list, 1);
716 #else
717         return 0;
718 #endif
719 }
720
721
722 /*
723  * CS functions
724  */
725 struct r300_cs_track_cb {
726         struct radeon_object    *robj;
727         unsigned                pitch;
728         unsigned                cpp;
729         unsigned                offset;
730 };
731
732 struct r300_cs_track_array {
733         struct radeon_object    *robj;
734         unsigned                esize;
735 };
736
737 struct r300_cs_track_texture {
738         struct radeon_object    *robj;
739         unsigned                pitch;
740         unsigned                width;
741         unsigned                height;
742         unsigned                num_levels;
743         unsigned                cpp;
744         unsigned                tex_coord_type;
745         unsigned                txdepth;
746         unsigned                width_11;
747         unsigned                height_11;
748         bool                    use_pitch;
749         bool                    enabled;
750         bool                    roundup_w;
751         bool                    roundup_h;
752 };
753
754 struct r300_cs_track {
755         unsigned                        num_cb;
756         unsigned                        maxy;
757         unsigned                        vtx_size;
758         unsigned                        vap_vf_cntl;
759         unsigned                        immd_dwords;
760         unsigned                        num_arrays;
761         unsigned                        max_indx;
762         struct r300_cs_track_array      arrays[11];
763         struct r300_cs_track_cb         cb[4];
764         struct r300_cs_track_cb         zb;
765         struct r300_cs_track_texture    textures[16];
766         bool                            z_enabled;
767 };
768
769 static inline void r300_cs_track_texture_print(struct r300_cs_track_texture *t)
770 {
771         DRM_ERROR("pitch                      %d\n", t->pitch);
772         DRM_ERROR("width                      %d\n", t->width);
773         DRM_ERROR("height                     %d\n", t->height);
774         DRM_ERROR("num levels                 %d\n", t->num_levels);
775         DRM_ERROR("depth                      %d\n", t->txdepth);
776         DRM_ERROR("bpp                        %d\n", t->cpp);
777         DRM_ERROR("coordinate type            %d\n", t->tex_coord_type);
778         DRM_ERROR("width round to power of 2  %d\n", t->roundup_w);
779         DRM_ERROR("height round to power of 2 %d\n", t->roundup_h);
780 }
781
782 static inline int r300_cs_track_texture_check(struct radeon_device *rdev,
783                                               struct r300_cs_track *track)
784 {
785         struct radeon_object *robj;
786         unsigned long size;
787         unsigned u, i, w, h;
788
789         for (u = 0; u < 16; u++) {
790                 if (!track->textures[u].enabled)
791                         continue;
792                 robj = track->textures[u].robj;
793                 if (robj == NULL) {
794                         DRM_ERROR("No texture bound to unit %u\n", u);
795                         return -EINVAL;
796                 }
797                 size = 0;
798                 for (i = 0; i <= track->textures[u].num_levels; i++) {
799                         if (track->textures[u].use_pitch) {
800                                 w = track->textures[u].pitch / (1 << i);
801                         } else {
802                                 w = track->textures[u].width / (1 << i);
803                                 if (rdev->family >= CHIP_RV515)
804                                         w |= track->textures[u].width_11;
805                                 if (track->textures[u].roundup_w)
806                                         w = roundup_pow_of_two(w);
807                         }
808                         h = track->textures[u].height / (1 << i);
809                         if (rdev->family >= CHIP_RV515)
810                                 h |= track->textures[u].height_11;
811                         if (track->textures[u].roundup_h)
812                                 h = roundup_pow_of_two(h);
813                         size += w * h;
814                 }
815                 size *= track->textures[u].cpp;
816                 switch (track->textures[u].tex_coord_type) {
817                 case 0:
818                         break;
819                 case 1:
820                         size *= (1 << track->textures[u].txdepth);
821                         break;
822                 case 2:
823                         size *= 6;
824                         break;
825                 default:
826                         DRM_ERROR("Invalid texture coordinate type %u for unit "
827                                   "%u\n", track->textures[u].tex_coord_type, u);
828                         return -EINVAL;
829                 }
830                 if (size > radeon_object_size(robj)) {
831                         DRM_ERROR("Texture of unit %u needs %lu bytes but is "
832                                   "%lu\n", u, size, radeon_object_size(robj));
833                         r300_cs_track_texture_print(&track->textures[u]);
834                         return -EINVAL;
835                 }
836         }
837         return 0;
838 }
839
840 int r300_cs_track_check(struct radeon_device *rdev, struct r300_cs_track *track)
841 {
842         unsigned i;
843         unsigned long size;
844         unsigned prim_walk;
845         unsigned nverts;
846
847         for (i = 0; i < track->num_cb; i++) {
848                 if (track->cb[i].robj == NULL) {
849                         DRM_ERROR("[drm] No buffer for color buffer %d !\n", i);
850                         return -EINVAL;
851                 }
852                 size = track->cb[i].pitch * track->cb[i].cpp * track->maxy;
853                 size += track->cb[i].offset;
854                 if (size > radeon_object_size(track->cb[i].robj)) {
855                         DRM_ERROR("[drm] Buffer too small for color buffer %d "
856                                   "(need %lu have %lu) !\n", i, size,
857                                   radeon_object_size(track->cb[i].robj));
858                         DRM_ERROR("[drm] color buffer %d (%u %u %u %u)\n",
859                                   i, track->cb[i].pitch, track->cb[i].cpp,
860                                   track->cb[i].offset, track->maxy);
861                         return -EINVAL;
862                 }
863         }
864         if (track->z_enabled) {
865                 if (track->zb.robj == NULL) {
866                         DRM_ERROR("[drm] No buffer for z buffer !\n");
867                         return -EINVAL;
868                 }
869                 size = track->zb.pitch * track->zb.cpp * track->maxy;
870                 size += track->zb.offset;
871                 if (size > radeon_object_size(track->zb.robj)) {
872                         DRM_ERROR("[drm] Buffer too small for z buffer "
873                                   "(need %lu have %lu) !\n", size,
874                                   radeon_object_size(track->zb.robj));
875                         return -EINVAL;
876                 }
877         }
878         prim_walk = (track->vap_vf_cntl >> 4) & 0x3;
879         nverts = (track->vap_vf_cntl >> 16) & 0xFFFF;
880         switch (prim_walk) {
881         case 1:
882                 for (i = 0; i < track->num_arrays; i++) {
883                         size = track->arrays[i].esize * track->max_indx * 4;
884                         if (track->arrays[i].robj == NULL) {
885                                 DRM_ERROR("(PW %u) Vertex array %u no buffer "
886                                           "bound\n", prim_walk, i);
887                                 return -EINVAL;
888                         }
889                         if (size > radeon_object_size(track->arrays[i].robj)) {
890                                 DRM_ERROR("(PW %u) Vertex array %u need %lu dwords "
891                                            "have %lu dwords\n", prim_walk, i,
892                                            size >> 2,
893                                            radeon_object_size(track->arrays[i].robj) >> 2);
894                                 DRM_ERROR("Max indices %u\n", track->max_indx);
895                                 return -EINVAL;
896                         }
897                 }
898                 break;
899         case 2:
900                 for (i = 0; i < track->num_arrays; i++) {
901                         size = track->arrays[i].esize * (nverts - 1) * 4;
902                         if (track->arrays[i].robj == NULL) {
903                                 DRM_ERROR("(PW %u) Vertex array %u no buffer "
904                                           "bound\n", prim_walk, i);
905                                 return -EINVAL;
906                         }
907                         if (size > radeon_object_size(track->arrays[i].robj)) {
908                                 DRM_ERROR("(PW %u) Vertex array %u need %lu dwords "
909                                            "have %lu dwords\n", prim_walk, i, size >> 2,
910                                            radeon_object_size(track->arrays[i].robj) >> 2);
911                                 return -EINVAL;
912                         }
913                 }
914                 break;
915         case 3:
916                 size = track->vtx_size * nverts;
917                 if (size != track->immd_dwords) {
918                         DRM_ERROR("IMMD draw %u dwors but needs %lu dwords\n",
919                                   track->immd_dwords, size);
920                         DRM_ERROR("VAP_VF_CNTL.NUM_VERTICES %u, VTX_SIZE %u\n",
921                                   nverts, track->vtx_size);
922                         return -EINVAL;
923                 }
924                 break;
925         default:
926                 DRM_ERROR("[drm] Invalid primitive walk %d for VAP_VF_CNTL\n",
927                           prim_walk);
928                 return -EINVAL;
929         }
930         return r300_cs_track_texture_check(rdev, track);
931 }
932
933 static inline void r300_cs_track_clear(struct r300_cs_track *track)
934 {
935         unsigned i;
936
937         track->num_cb = 4;
938         track->maxy = 4096;
939         for (i = 0; i < track->num_cb; i++) {
940                 track->cb[i].robj = NULL;
941                 track->cb[i].pitch = 8192;
942                 track->cb[i].cpp = 16;
943                 track->cb[i].offset = 0;
944         }
945         track->z_enabled = true;
946         track->zb.robj = NULL;
947         track->zb.pitch = 8192;
948         track->zb.cpp = 4;
949         track->zb.offset = 0;
950         track->vtx_size = 0x7F;
951         track->immd_dwords = 0xFFFFFFFFUL;
952         track->num_arrays = 11;
953         track->max_indx = 0x00FFFFFFUL;
954         for (i = 0; i < track->num_arrays; i++) {
955                 track->arrays[i].robj = NULL;
956                 track->arrays[i].esize = 0x7F;
957         }
958         for (i = 0; i < 16; i++) {
959                 track->textures[i].pitch = 16536;
960                 track->textures[i].width = 16536;
961                 track->textures[i].height = 16536;
962                 track->textures[i].width_11 = 1 << 11;
963                 track->textures[i].height_11 = 1 << 11;
964                 track->textures[i].num_levels = 12;
965                 track->textures[i].txdepth = 16;
966                 track->textures[i].cpp = 64;
967                 track->textures[i].tex_coord_type = 1;
968                 track->textures[i].robj = NULL;
969                 /* CS IB emission code makes sure texture unit are disabled */
970                 track->textures[i].enabled = false;
971                 track->textures[i].roundup_w = true;
972                 track->textures[i].roundup_h = true;
973         }
974 }
975
976 static const unsigned r300_reg_safe_bm[159] = {
977         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
978         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
979         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
980         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
981         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
982         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
983         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
984         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
985         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
986         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
987         0x17FF1FFF, 0xFFFFFFFC, 0xFFFFFFFF, 0xFF30FFBF,
988         0xFFFFFFF8, 0xC3E6FFFF, 0xFFFFF6DF, 0xFFFFFFFF,
989         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
990         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
991         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF03F,
992         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
993         0xFFFFFFFF, 0xFFFFEFCE, 0xF00EBFFF, 0x007C0000,
994         0xF0000078, 0xFF000009, 0xFFFFFFFF, 0xFFFFFFFF,
995         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
996         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
997         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
998         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
999         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
1000         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
1001         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
1002         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
1003         0xFFFFF7FF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
1004         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
1005         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
1006         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
1007         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
1008         0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
1009         0xFFFFFC78, 0xFFFFFFFF, 0xFFFFFFFE, 0xFFFFFFFF,
1010         0x38FF8F50, 0xFFF88082, 0xF000000C, 0xFAE009FF,
1011         0x0000FFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000,
1012         0x00000000, 0x0000C100, 0x00000000, 0x00000000,
1013         0x00000000, 0x00000000, 0x00000000, 0x00000000,
1014         0x00000000, 0xFFFF0000, 0xFFFFFFFF, 0xFF80FFFF,
1015         0x00000000, 0x00000000, 0x00000000, 0x00000000,
1016         0x0003FC01, 0xFFFFFFF8, 0xFE800B19,
1017 };
1018
1019 static int r300_packet0_check(struct radeon_cs_parser *p,
1020                 struct radeon_cs_packet *pkt,
1021                 unsigned idx, unsigned reg)
1022 {
1023         struct radeon_cs_chunk *ib_chunk;
1024         struct radeon_cs_reloc *reloc;
1025         struct r300_cs_track *track;
1026         volatile uint32_t *ib;
1027         uint32_t tmp, tile_flags = 0;
1028         unsigned i;
1029         int r;
1030
1031         ib = p->ib->ptr;
1032         ib_chunk = &p->chunks[p->chunk_ib_idx];
1033         track = (struct r300_cs_track*)p->track;
1034         switch(reg) {
1035         case AVIVO_D1MODE_VLINE_START_END:
1036         case RADEON_CRTC_GUI_TRIG_VLINE:
1037                 r = r100_cs_packet_parse_vline(p);
1038                 if (r) {
1039                         DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1040                                         idx, reg);
1041                         r100_cs_dump_packet(p, pkt);
1042                         return r;
1043                 }
1044                 break;
1045         case RADEON_DST_PITCH_OFFSET:
1046         case RADEON_SRC_PITCH_OFFSET:
1047                 r = r100_cs_packet_next_reloc(p, &reloc);
1048                 if (r) {
1049                         DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1050                                         idx, reg);
1051                         r100_cs_dump_packet(p, pkt);
1052                         return r;
1053                 }
1054                 tmp = ib_chunk->kdata[idx] & 0x003fffff;
1055                 tmp += (((u32)reloc->lobj.gpu_offset) >> 10);
1056
1057                 if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO)
1058                         tile_flags |= RADEON_DST_TILE_MACRO;
1059                 if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) {
1060                         if (reg == RADEON_SRC_PITCH_OFFSET) {
1061                                 DRM_ERROR("Cannot src blit from microtiled surface\n");
1062                                 r100_cs_dump_packet(p, pkt);
1063                                 return -EINVAL;
1064                         }
1065                         tile_flags |= RADEON_DST_TILE_MICRO;
1066                 }
1067                 tmp |= tile_flags;
1068                 ib[idx] = (ib_chunk->kdata[idx] & 0x3fc00000) | tmp;
1069                 break;
1070         case R300_RB3D_COLOROFFSET0:
1071         case R300_RB3D_COLOROFFSET1:
1072         case R300_RB3D_COLOROFFSET2:
1073         case R300_RB3D_COLOROFFSET3:
1074                 i = (reg - R300_RB3D_COLOROFFSET0) >> 2;
1075                 r = r100_cs_packet_next_reloc(p, &reloc);
1076                 if (r) {
1077                         DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1078                                         idx, reg);
1079                         r100_cs_dump_packet(p, pkt);
1080                         return r;
1081                 }
1082                 track->cb[i].robj = reloc->robj;
1083                 track->cb[i].offset = ib_chunk->kdata[idx];
1084                 ib[idx] = ib_chunk->kdata[idx] + ((u32)reloc->lobj.gpu_offset);
1085                 break;
1086         case R300_ZB_DEPTHOFFSET:
1087                 r = r100_cs_packet_next_reloc(p, &reloc);
1088                 if (r) {
1089                         DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1090                                         idx, reg);
1091                         r100_cs_dump_packet(p, pkt);
1092                         return r;
1093                 }
1094                 track->zb.robj = reloc->robj;
1095                 track->zb.offset = ib_chunk->kdata[idx];
1096                 ib[idx] = ib_chunk->kdata[idx] + ((u32)reloc->lobj.gpu_offset);
1097                 break;
1098         case R300_TX_OFFSET_0:
1099         case R300_TX_OFFSET_0+4:
1100         case R300_TX_OFFSET_0+8:
1101         case R300_TX_OFFSET_0+12:
1102         case R300_TX_OFFSET_0+16:
1103         case R300_TX_OFFSET_0+20:
1104         case R300_TX_OFFSET_0+24:
1105         case R300_TX_OFFSET_0+28:
1106         case R300_TX_OFFSET_0+32:
1107         case R300_TX_OFFSET_0+36:
1108         case R300_TX_OFFSET_0+40:
1109         case R300_TX_OFFSET_0+44:
1110         case R300_TX_OFFSET_0+48:
1111         case R300_TX_OFFSET_0+52:
1112         case R300_TX_OFFSET_0+56:
1113         case R300_TX_OFFSET_0+60:
1114                 i = (reg - R300_TX_OFFSET_0) >> 2;
1115                 r = r100_cs_packet_next_reloc(p, &reloc);
1116                 if (r) {
1117                         DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1118                                         idx, reg);
1119                         r100_cs_dump_packet(p, pkt);
1120                         return r;
1121                 }
1122                 ib[idx] = ib_chunk->kdata[idx] + ((u32)reloc->lobj.gpu_offset);
1123                 track->textures[i].robj = reloc->robj;
1124                 break;
1125         /* Tracked registers */
1126         case 0x2084:
1127                 /* VAP_VF_CNTL */
1128                 track->vap_vf_cntl = ib_chunk->kdata[idx];
1129                 break;
1130         case 0x20B4:
1131                 /* VAP_VTX_SIZE */
1132                 track->vtx_size = ib_chunk->kdata[idx] & 0x7F;
1133                 break;
1134         case 0x2134:
1135                 /* VAP_VF_MAX_VTX_INDX */
1136                 track->max_indx = ib_chunk->kdata[idx] & 0x00FFFFFFUL;
1137                 break;
1138         case 0x43E4:
1139                 /* SC_SCISSOR1 */
1140                 track->maxy = ((ib_chunk->kdata[idx] >> 13) & 0x1FFF) + 1;
1141                 if (p->rdev->family < CHIP_RV515) {
1142                         track->maxy -= 1440;
1143                 }
1144                 break;
1145         case 0x4E00:
1146                 /* RB3D_CCTL */
1147                 track->num_cb = ((ib_chunk->kdata[idx] >> 5) & 0x3) + 1;
1148                 break;
1149         case 0x4E38:
1150         case 0x4E3C:
1151         case 0x4E40:
1152         case 0x4E44:
1153                 /* RB3D_COLORPITCH0 */
1154                 /* RB3D_COLORPITCH1 */
1155                 /* RB3D_COLORPITCH2 */
1156                 /* RB3D_COLORPITCH3 */
1157                 r = r100_cs_packet_next_reloc(p, &reloc);
1158                 if (r) {
1159                         DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1160                                   idx, reg);
1161                         r100_cs_dump_packet(p, pkt);
1162                         return r;
1163                 }
1164
1165                 if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO)
1166                         tile_flags |= R300_COLOR_TILE_ENABLE;
1167                 if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO)
1168                         tile_flags |= R300_COLOR_MICROTILE_ENABLE;
1169
1170                 tmp = ib_chunk->kdata[idx] & ~(0x7 << 16);
1171                 tmp |= tile_flags;
1172                 ib[idx] = tmp;
1173
1174                 i = (reg - 0x4E38) >> 2;
1175                 track->cb[i].pitch = ib_chunk->kdata[idx] & 0x3FFE;
1176                 switch (((ib_chunk->kdata[idx] >> 21) & 0xF)) {
1177                 case 9:
1178                 case 11:
1179                 case 12:
1180                         track->cb[i].cpp = 1;
1181                         break;
1182                 case 3:
1183                 case 4:
1184                 case 13:
1185                 case 15:
1186                         track->cb[i].cpp = 2;
1187                         break;
1188                 case 6:
1189                         track->cb[i].cpp = 4;
1190                         break;
1191                 case 10:
1192                         track->cb[i].cpp = 8;
1193                         break;
1194                 case 7:
1195                         track->cb[i].cpp = 16;
1196                         break;
1197                 default:
1198                         DRM_ERROR("Invalid color buffer format (%d) !\n",
1199                                   ((ib_chunk->kdata[idx] >> 21) & 0xF));
1200                         return -EINVAL;
1201                 }
1202                 break;
1203         case 0x4F00:
1204                 /* ZB_CNTL */
1205                 if (ib_chunk->kdata[idx] & 2) {
1206                         track->z_enabled = true;
1207                 } else {
1208                         track->z_enabled = false;
1209                 }
1210                 break;
1211         case 0x4F10:
1212                 /* ZB_FORMAT */
1213                 switch ((ib_chunk->kdata[idx] & 0xF)) {
1214                 case 0:
1215                 case 1:
1216                         track->zb.cpp = 2;
1217                         break;
1218                 case 2:
1219                         track->zb.cpp = 4;
1220                         break;
1221                 default:
1222                         DRM_ERROR("Invalid z buffer format (%d) !\n",
1223                                   (ib_chunk->kdata[idx] & 0xF));
1224                         return -EINVAL;
1225                 }
1226                 break;
1227         case 0x4F24:
1228                 /* ZB_DEPTHPITCH */
1229                 r = r100_cs_packet_next_reloc(p, &reloc);
1230                 if (r) {
1231                         DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1232                                   idx, reg);
1233                         r100_cs_dump_packet(p, pkt);
1234                         return r;
1235                 }
1236
1237                 if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO)
1238                         tile_flags |= R300_DEPTHMACROTILE_ENABLE;
1239                 if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO)
1240                         tile_flags |= R300_DEPTHMICROTILE_TILED;;
1241
1242                 tmp = ib_chunk->kdata[idx] & ~(0x7 << 16);
1243                 tmp |= tile_flags;
1244                 ib[idx] = tmp;
1245
1246                 track->zb.pitch = ib_chunk->kdata[idx] & 0x3FFC;
1247                 break;
1248         case 0x4104:
1249                 for (i = 0; i < 16; i++) {
1250                         bool enabled;
1251
1252                         enabled = !!(ib_chunk->kdata[idx] & (1 << i));
1253                         track->textures[i].enabled = enabled;
1254                 }
1255                 break;
1256         case 0x44C0:
1257         case 0x44C4:
1258         case 0x44C8:
1259         case 0x44CC:
1260         case 0x44D0:
1261         case 0x44D4:
1262         case 0x44D8:
1263         case 0x44DC:
1264         case 0x44E0:
1265         case 0x44E4:
1266         case 0x44E8:
1267         case 0x44EC:
1268         case 0x44F0:
1269         case 0x44F4:
1270         case 0x44F8:
1271         case 0x44FC:
1272                 /* TX_FORMAT1_[0-15] */
1273                 i = (reg - 0x44C0) >> 2;
1274                 tmp = (ib_chunk->kdata[idx] >> 25) & 0x3;
1275                 track->textures[i].tex_coord_type = tmp;
1276                 switch ((ib_chunk->kdata[idx] & 0x1F)) {
1277                 case 0:
1278                 case 2:
1279                 case 5:
1280                 case 18:
1281                 case 20:
1282                 case 21:
1283                         track->textures[i].cpp = 1;
1284                         break;
1285                 case 1:
1286                 case 3:
1287                 case 6:
1288                 case 7:
1289                 case 10:
1290                 case 11:
1291                 case 19:
1292                 case 22:
1293                 case 24:
1294                         track->textures[i].cpp = 2;
1295                         break;
1296                 case 4:
1297                 case 8:
1298                 case 9:
1299                 case 12:
1300                 case 13:
1301                 case 23:
1302                 case 25:
1303                 case 27:
1304                 case 30:
1305                         track->textures[i].cpp = 4;
1306                         break;
1307                 case 14:
1308                 case 26:
1309                 case 28:
1310                         track->textures[i].cpp = 8;
1311                         break;
1312                 case 29:
1313                         track->textures[i].cpp = 16;
1314                         break;
1315                 default:
1316                         DRM_ERROR("Invalid texture format %u\n",
1317                                   (ib_chunk->kdata[idx] & 0x1F));
1318                         return -EINVAL;
1319                         break;
1320                 }
1321                 break;
1322         case 0x4400:
1323         case 0x4404:
1324         case 0x4408:
1325         case 0x440C:
1326         case 0x4410:
1327         case 0x4414:
1328         case 0x4418:
1329         case 0x441C:
1330         case 0x4420:
1331         case 0x4424:
1332         case 0x4428:
1333         case 0x442C:
1334         case 0x4430:
1335         case 0x4434:
1336         case 0x4438:
1337         case 0x443C:
1338                 /* TX_FILTER0_[0-15] */
1339                 i = (reg - 0x4400) >> 2;
1340                 tmp = ib_chunk->kdata[idx] & 0x7;;
1341                 if (tmp == 2 || tmp == 4 || tmp == 6) {
1342                         track->textures[i].roundup_w = false;
1343                 }
1344                 tmp = (ib_chunk->kdata[idx] >> 3) & 0x7;;
1345                 if (tmp == 2 || tmp == 4 || tmp == 6) {
1346                         track->textures[i].roundup_h = false;
1347                 }
1348                 break;
1349         case 0x4500:
1350         case 0x4504:
1351         case 0x4508:
1352         case 0x450C:
1353         case 0x4510:
1354         case 0x4514:
1355         case 0x4518:
1356         case 0x451C:
1357         case 0x4520:
1358         case 0x4524:
1359         case 0x4528:
1360         case 0x452C:
1361         case 0x4530:
1362         case 0x4534:
1363         case 0x4538:
1364         case 0x453C:
1365                 /* TX_FORMAT2_[0-15] */
1366                 i = (reg - 0x4500) >> 2;
1367                 tmp = ib_chunk->kdata[idx] & 0x3FFF;
1368                 track->textures[i].pitch = tmp + 1;
1369                 if (p->rdev->family >= CHIP_RV515) {
1370                         tmp = ((ib_chunk->kdata[idx] >> 15) & 1) << 11;
1371                         track->textures[i].width_11 = tmp;
1372                         tmp = ((ib_chunk->kdata[idx] >> 16) & 1) << 11;
1373                         track->textures[i].height_11 = tmp;
1374                 }
1375                 break;
1376         case 0x4480:
1377         case 0x4484:
1378         case 0x4488:
1379         case 0x448C:
1380         case 0x4490:
1381         case 0x4494:
1382         case 0x4498:
1383         case 0x449C:
1384         case 0x44A0:
1385         case 0x44A4:
1386         case 0x44A8:
1387         case 0x44AC:
1388         case 0x44B0:
1389         case 0x44B4:
1390         case 0x44B8:
1391         case 0x44BC:
1392                 /* TX_FORMAT0_[0-15] */
1393                 i = (reg - 0x4480) >> 2;
1394                 tmp = ib_chunk->kdata[idx] & 0x7FF;
1395                 track->textures[i].width = tmp + 1;
1396                 tmp = (ib_chunk->kdata[idx] >> 11) & 0x7FF;
1397                 track->textures[i].height = tmp + 1;
1398                 tmp = (ib_chunk->kdata[idx] >> 26) & 0xF;
1399                 track->textures[i].num_levels = tmp;
1400                 tmp = ib_chunk->kdata[idx] & (1 << 31);
1401                 track->textures[i].use_pitch = !!tmp;
1402                 tmp = (ib_chunk->kdata[idx] >> 22) & 0xF;
1403                 track->textures[i].txdepth = tmp;
1404                 break;
1405         default:
1406                 printk(KERN_ERR "Forbidden register 0x%04X in cs at %d\n",
1407                        reg, idx);
1408                 return -EINVAL;
1409         }
1410         return 0;
1411 }
1412
1413 static int r300_packet3_check(struct radeon_cs_parser *p,
1414                               struct radeon_cs_packet *pkt)
1415 {
1416         struct radeon_cs_chunk *ib_chunk;
1417         struct radeon_cs_reloc *reloc;
1418         struct r300_cs_track *track;
1419         volatile uint32_t *ib;
1420         unsigned idx;
1421         unsigned i, c;
1422         int r;
1423
1424         ib = p->ib->ptr;
1425         ib_chunk = &p->chunks[p->chunk_ib_idx];
1426         idx = pkt->idx + 1;
1427         track = (struct r300_cs_track*)p->track;
1428         switch(pkt->opcode) {
1429         case PACKET3_3D_LOAD_VBPNTR:
1430                 c = ib_chunk->kdata[idx++] & 0x1F;
1431                 track->num_arrays = c;
1432                 for (i = 0; i < (c - 1); i+=2, idx+=3) {
1433                         r = r100_cs_packet_next_reloc(p, &reloc);
1434                         if (r) {
1435                                 DRM_ERROR("No reloc for packet3 %d\n",
1436                                           pkt->opcode);
1437                                 r100_cs_dump_packet(p, pkt);
1438                                 return r;
1439                         }
1440                         ib[idx+1] = ib_chunk->kdata[idx+1] + ((u32)reloc->lobj.gpu_offset);
1441                         track->arrays[i + 0].robj = reloc->robj;
1442                         track->arrays[i + 0].esize = ib_chunk->kdata[idx] >> 8;
1443                         track->arrays[i + 0].esize &= 0x7F;
1444                         r = r100_cs_packet_next_reloc(p, &reloc);
1445                         if (r) {
1446                                 DRM_ERROR("No reloc for packet3 %d\n",
1447                                           pkt->opcode);
1448                                 r100_cs_dump_packet(p, pkt);
1449                                 return r;
1450                         }
1451                         ib[idx+2] = ib_chunk->kdata[idx+2] + ((u32)reloc->lobj.gpu_offset);
1452                         track->arrays[i + 1].robj = reloc->robj;
1453                         track->arrays[i + 1].esize = ib_chunk->kdata[idx] >> 24;
1454                         track->arrays[i + 1].esize &= 0x7F;
1455                 }
1456                 if (c & 1) {
1457                         r = r100_cs_packet_next_reloc(p, &reloc);
1458                         if (r) {
1459                                 DRM_ERROR("No reloc for packet3 %d\n",
1460                                           pkt->opcode);
1461                                 r100_cs_dump_packet(p, pkt);
1462                                 return r;
1463                         }
1464                         ib[idx+1] = ib_chunk->kdata[idx+1] + ((u32)reloc->lobj.gpu_offset);
1465                         track->arrays[i + 0].robj = reloc->robj;
1466                         track->arrays[i + 0].esize = ib_chunk->kdata[idx] >> 8;
1467                         track->arrays[i + 0].esize &= 0x7F;
1468                 }
1469                 break;
1470         case PACKET3_INDX_BUFFER:
1471                 r = r100_cs_packet_next_reloc(p, &reloc);
1472                 if (r) {
1473                         DRM_ERROR("No reloc for packet3 %d\n", pkt->opcode);
1474                         r100_cs_dump_packet(p, pkt);
1475                         return r;
1476                 }
1477                 ib[idx+1] = ib_chunk->kdata[idx+1] + ((u32)reloc->lobj.gpu_offset);
1478                 r = r100_cs_track_check_pkt3_indx_buffer(p, pkt, reloc->robj);
1479                 if (r) {
1480                         return r;
1481                 }
1482                 break;
1483         /* Draw packet */
1484         case PACKET3_3D_DRAW_IMMD:
1485                 /* Number of dwords is vtx_size * (num_vertices - 1)
1486                  * PRIM_WALK must be equal to 3 vertex data in embedded
1487                  * in cmd stream */
1488                 if (((ib_chunk->kdata[idx+1] >> 4) & 0x3) != 3) {
1489                         DRM_ERROR("PRIM_WALK must be 3 for IMMD draw\n");
1490                         return -EINVAL;
1491                 }
1492                 track->vap_vf_cntl = ib_chunk->kdata[idx+1];
1493                 track->immd_dwords = pkt->count - 1;
1494                 r = r300_cs_track_check(p->rdev, track);
1495                 if (r) {
1496                         return r;
1497                 }
1498                 break;
1499         case PACKET3_3D_DRAW_IMMD_2:
1500                 /* Number of dwords is vtx_size * (num_vertices - 1)
1501                  * PRIM_WALK must be equal to 3 vertex data in embedded
1502                  * in cmd stream */
1503                 if (((ib_chunk->kdata[idx] >> 4) & 0x3) != 3) {
1504                         DRM_ERROR("PRIM_WALK must be 3 for IMMD draw\n");
1505                         return -EINVAL;
1506                 }
1507                 track->vap_vf_cntl = ib_chunk->kdata[idx];
1508                 track->immd_dwords = pkt->count;
1509                 r = r300_cs_track_check(p->rdev, track);
1510                 if (r) {
1511                         return r;
1512                 }
1513                 break;
1514         case PACKET3_3D_DRAW_VBUF:
1515                 track->vap_vf_cntl = ib_chunk->kdata[idx + 1];
1516                 r = r300_cs_track_check(p->rdev, track);
1517                 if (r) {
1518                         return r;
1519                 }
1520                 break;
1521         case PACKET3_3D_DRAW_VBUF_2:
1522                 track->vap_vf_cntl = ib_chunk->kdata[idx];
1523                 r = r300_cs_track_check(p->rdev, track);
1524                 if (r) {
1525                         return r;
1526                 }
1527                 break;
1528         case PACKET3_3D_DRAW_INDX:
1529                 track->vap_vf_cntl = ib_chunk->kdata[idx + 1];
1530                 r = r300_cs_track_check(p->rdev, track);
1531                 if (r) {
1532                         return r;
1533                 }
1534                 break;
1535         case PACKET3_3D_DRAW_INDX_2:
1536                 track->vap_vf_cntl = ib_chunk->kdata[idx];
1537                 r = r300_cs_track_check(p->rdev, track);
1538                 if (r) {
1539                         return r;
1540                 }
1541                 break;
1542         case PACKET3_NOP:
1543                 break;
1544         default:
1545                 DRM_ERROR("Packet3 opcode %x not supported\n", pkt->opcode);
1546                 return -EINVAL;
1547         }
1548         return 0;
1549 }
1550
1551 int r300_cs_parse(struct radeon_cs_parser *p)
1552 {
1553         struct radeon_cs_packet pkt;
1554         struct r300_cs_track track;
1555         int r;
1556
1557         r300_cs_track_clear(&track);
1558         p->track = &track;
1559         do {
1560                 r = r100_cs_packet_parse(p, &pkt, p->idx);
1561                 if (r) {
1562                         return r;
1563                 }
1564                 p->idx += pkt.count + 2;
1565                 switch (pkt.type) {
1566                 case PACKET_TYPE0:
1567                         r = r100_cs_parse_packet0(p, &pkt,
1568                                                   p->rdev->config.r300.reg_safe_bm,
1569                                                   p->rdev->config.r300.reg_safe_bm_size,
1570                                                   &r300_packet0_check);
1571                         break;
1572                 case PACKET_TYPE2:
1573                         break;
1574                 case PACKET_TYPE3:
1575                         r = r300_packet3_check(p, &pkt);
1576                         break;
1577                 default:
1578                         DRM_ERROR("Unknown packet type %d !\n", pkt.type);
1579                         return -EINVAL;
1580                 }
1581                 if (r) {
1582                         return r;
1583                 }
1584         } while (p->idx < p->chunks[p->chunk_ib_idx].length_dw);
1585         return 0;
1586 }
1587
1588 int r300_init(struct radeon_device *rdev)
1589 {
1590         rdev->config.r300.reg_safe_bm = r300_reg_safe_bm;
1591         rdev->config.r300.reg_safe_bm_size = ARRAY_SIZE(r300_reg_safe_bm);
1592         return 0;
1593 }