drm/radeon: overhaul texture checking. (v3)
[linux-3.10.git] / drivers / gpu / drm / radeon / r600_cs.c
1 /*
2  * Copyright 2008 Advanced Micro Devices, Inc.
3  * Copyright 2008 Red Hat Inc.
4  * Copyright 2009 Jerome Glisse.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22  * OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors: Dave Airlie
25  *          Alex Deucher
26  *          Jerome Glisse
27  */
28 #include <linux/kernel.h>
29 #include "drmP.h"
30 #include "radeon.h"
31 #include "r600d.h"
32 #include "r600_reg_safe.h"
33
34 static int r600_cs_packet_next_reloc_mm(struct radeon_cs_parser *p,
35                                         struct radeon_cs_reloc **cs_reloc);
36 static int r600_cs_packet_next_reloc_nomm(struct radeon_cs_parser *p,
37                                         struct radeon_cs_reloc **cs_reloc);
38 typedef int (*next_reloc_t)(struct radeon_cs_parser*, struct radeon_cs_reloc**);
39 static next_reloc_t r600_cs_packet_next_reloc = &r600_cs_packet_next_reloc_mm;
40 extern void r600_cs_legacy_get_tiling_conf(struct drm_device *dev, u32 *npipes, u32 *nbanks, u32 *group_size);
41
42
43 struct r600_cs_track {
44         /* configuration we miror so that we use same code btw kms/ums */
45         u32                     group_size;
46         u32                     nbanks;
47         u32                     npipes;
48         /* value we track */
49         u32                     sq_config;
50         u32                     nsamples;
51         u32                     cb_color_base_last[8];
52         struct radeon_bo        *cb_color_bo[8];
53         u64                     cb_color_bo_mc[8];
54         u32                     cb_color_bo_offset[8];
55         struct radeon_bo        *cb_color_frag_bo[8];
56         struct radeon_bo        *cb_color_tile_bo[8];
57         u32                     cb_color_info[8];
58         u32                     cb_color_size_idx[8];
59         u32                     cb_target_mask;
60         u32                     cb_shader_mask;
61         u32                     cb_color_size[8];
62         u32                     vgt_strmout_en;
63         u32                     vgt_strmout_buffer_en;
64         u32                     db_depth_control;
65         u32                     db_depth_info;
66         u32                     db_depth_size_idx;
67         u32                     db_depth_view;
68         u32                     db_depth_size;
69         u32                     db_offset;
70         struct radeon_bo        *db_bo;
71         u64                     db_bo_mc;
72 };
73
74 #define FMT_8_BIT(fmt, vc) [fmt] = { 1, 1, 1, vc }
75 #define FMT_16_BIT(fmt, vc) [fmt] = { 1, 1, 2, vc }
76 #define FMT_24_BIT(fmt) [fmt] = { 1, 1, 3, 0 }
77 #define FMT_32_BIT(fmt, vc) [fmt] = { 1, 1, 4, vc }
78 #define FMT_48_BIT(fmt) [fmt] = { 1, 1, 6, 0 }
79 #define FMT_64_BIT(fmt, vc) [fmt] = { 1, 1, 8, vc }
80 #define FMT_96_BIT(fmt) [fmt] = { 1, 1, 12, 0 }
81 #define FMT_128_BIT(fmt, vc) [fmt] = { 1, 1, 16, vc }
82
83 struct gpu_formats {
84         unsigned blockwidth;
85         unsigned blockheight;
86         unsigned blocksize;
87         unsigned valid_color;
88 };
89
90 static const struct gpu_formats color_formats_table[] = {
91         /* 8 bit */
92         FMT_8_BIT(V_038004_COLOR_8, 1),
93         FMT_8_BIT(V_038004_COLOR_4_4, 1),
94         FMT_8_BIT(V_038004_COLOR_3_3_2, 1),
95         FMT_8_BIT(V_038004_FMT_1, 0),
96
97         /* 16-bit */
98         FMT_16_BIT(V_038004_COLOR_16, 1),
99         FMT_16_BIT(V_038004_COLOR_16_FLOAT, 1),
100         FMT_16_BIT(V_038004_COLOR_8_8, 1),
101         FMT_16_BIT(V_038004_COLOR_5_6_5, 1),
102         FMT_16_BIT(V_038004_COLOR_6_5_5, 1),
103         FMT_16_BIT(V_038004_COLOR_1_5_5_5, 1),
104         FMT_16_BIT(V_038004_COLOR_4_4_4_4, 1),
105         FMT_16_BIT(V_038004_COLOR_5_5_5_1, 1),
106
107         /* 24-bit */
108         FMT_24_BIT(V_038004_FMT_8_8_8),
109                                                
110         /* 32-bit */
111         FMT_32_BIT(V_038004_COLOR_32, 1),
112         FMT_32_BIT(V_038004_COLOR_32_FLOAT, 1),
113         FMT_32_BIT(V_038004_COLOR_16_16, 1),
114         FMT_32_BIT(V_038004_COLOR_16_16_FLOAT, 1),
115         FMT_32_BIT(V_038004_COLOR_8_24, 1),
116         FMT_32_BIT(V_038004_COLOR_8_24_FLOAT, 1),
117         FMT_32_BIT(V_038004_COLOR_24_8, 1),
118         FMT_32_BIT(V_038004_COLOR_24_8_FLOAT, 1),
119         FMT_32_BIT(V_038004_COLOR_10_11_11, 1),
120         FMT_32_BIT(V_038004_COLOR_10_11_11_FLOAT, 1),
121         FMT_32_BIT(V_038004_COLOR_11_11_10, 1),
122         FMT_32_BIT(V_038004_COLOR_11_11_10_FLOAT, 1),
123         FMT_32_BIT(V_038004_COLOR_2_10_10_10, 1),
124         FMT_32_BIT(V_038004_COLOR_8_8_8_8, 1),
125         FMT_32_BIT(V_038004_COLOR_10_10_10_2, 1),
126         FMT_32_BIT(V_038004_FMT_5_9_9_9_SHAREDEXP, 0),
127         FMT_32_BIT(V_038004_FMT_32_AS_8, 0),
128         FMT_32_BIT(V_038004_FMT_32_AS_8_8, 0),
129
130         /* 48-bit */
131         FMT_48_BIT(V_038004_FMT_16_16_16),
132         FMT_48_BIT(V_038004_FMT_16_16_16_FLOAT),
133
134         /* 64-bit */
135         FMT_64_BIT(V_038004_COLOR_X24_8_32_FLOAT, 1),
136         FMT_64_BIT(V_038004_COLOR_32_32, 1),
137         FMT_64_BIT(V_038004_COLOR_32_32_FLOAT, 1),
138         FMT_64_BIT(V_038004_COLOR_16_16_16_16, 1),
139         FMT_64_BIT(V_038004_COLOR_16_16_16_16_FLOAT, 1),
140
141         FMT_96_BIT(V_038004_FMT_32_32_32),
142         FMT_96_BIT(V_038004_FMT_32_32_32_FLOAT),
143
144         /* 128-bit */
145         FMT_128_BIT(V_038004_COLOR_32_32_32_32, 1),
146         FMT_128_BIT(V_038004_COLOR_32_32_32_32_FLOAT, 1),
147
148         [V_038004_FMT_GB_GR] = { 2, 1, 4, 0 },
149         [V_038004_FMT_BG_RG] = { 2, 1, 4, 0 },
150
151         /* block compressed formats */
152         [V_038004_FMT_BC1] = { 4, 4, 8, 0 },
153         [V_038004_FMT_BC2] = { 4, 4, 16, 0 },
154         [V_038004_FMT_BC3] = { 4, 4, 16, 0 },
155         [V_038004_FMT_BC4] = { 4, 4, 8, 0 },
156         [V_038004_FMT_BC5] = { 4, 4, 16, 0},
157
158 };
159
160 static inline bool fmt_is_valid_color(u32 format)
161 {
162         if (format > ARRAY_SIZE(color_formats_table))
163                 return false;
164         
165         if (color_formats_table[format].valid_color)
166                 return true;
167
168         return false;
169 }
170
171 static inline bool fmt_is_valid_texture(u32 format)
172 {
173         if (format > ARRAY_SIZE(color_formats_table))
174                 return false;
175         
176         if (color_formats_table[format].blockwidth > 0)
177                 return true;
178
179         return false;
180 }
181
182 static inline int fmt_get_blocksize(u32 format)
183 {
184         if (format > ARRAY_SIZE(color_formats_table))
185                 return 0;
186
187         return color_formats_table[format].blocksize;
188 }
189
190 static inline int fmt_get_nblocksx(u32 format, u32 w)
191 {
192         unsigned bw;
193         if (format > ARRAY_SIZE(color_formats_table))
194                 return 0;
195
196         bw = color_formats_table[format].blockwidth;
197         if (bw == 0)
198                 return 0;
199
200         return (w + bw - 1) / bw;
201 }
202
203 static inline int fmt_get_nblocksy(u32 format, u32 h)
204 {
205         unsigned bh;
206         if (format > ARRAY_SIZE(color_formats_table))
207                 return 0;
208
209         bh = color_formats_table[format].blockheight;
210         if (bh == 0)
211                 return 0;
212
213         return (h + bh - 1) / bh;
214 }
215
216 static inline int r600_bpe_from_format(u32 *bpe, u32 format)
217 {
218         unsigned res;
219         if (format > ARRAY_SIZE(color_formats_table))
220                 goto fail;
221
222         res = color_formats_table[format].blocksize;
223         if (res == 0)
224                 goto fail;
225
226         *bpe = res;
227         return 0;
228
229 fail:
230         *bpe = 16;
231         return -EINVAL;
232 }
233
234 struct array_mode_checker {
235         int array_mode;
236         u32 group_size;
237         u32 nbanks;
238         u32 npipes;
239         u32 nsamples;
240         u32 blocksize;
241 };
242
243 /* returns alignment in pixels for pitch/height/depth and bytes for base */
244 static inline int r600_get_array_mode_alignment(struct array_mode_checker *values,
245                                                 u32 *pitch_align,
246                                                 u32 *height_align,
247                                                 u32 *depth_align,
248                                                 u64 *base_align)
249 {
250         u32 tile_width = 8;
251         u32 tile_height = 8;
252         u32 macro_tile_width = values->nbanks;
253         u32 macro_tile_height = values->npipes;
254         u32 tile_bytes = tile_width * tile_height * values->blocksize * values->nsamples;
255         u32 macro_tile_bytes = macro_tile_width * macro_tile_height * tile_bytes;
256
257         switch (values->array_mode) {
258         case ARRAY_LINEAR_GENERAL:
259                 /* technically tile_width/_height for pitch/height */
260                 *pitch_align = 1; /* tile_width */
261                 *height_align = 1; /* tile_height */
262                 *depth_align = 1;
263                 *base_align = 1;
264                 break;
265         case ARRAY_LINEAR_ALIGNED:
266                 *pitch_align = max((u32)64, (u32)(values->group_size / values->blocksize));
267                 *height_align = tile_height;
268                 *depth_align = 1;
269                 *base_align = values->group_size;
270                 break;
271         case ARRAY_1D_TILED_THIN1:
272                 *pitch_align = max((u32)tile_width,
273                                    (u32)(values->group_size /
274                                          (tile_height * values->blocksize * values->nsamples)));
275                 *height_align = tile_height;
276                 *depth_align = 1;
277                 *base_align = values->group_size;
278                 break;
279         case ARRAY_2D_TILED_THIN1:
280                 *pitch_align = max((u32)macro_tile_width,
281                                   (u32)(((values->group_size / tile_height) /
282                                          (values->blocksize * values->nsamples)) *
283                                         values->nbanks)) * tile_width;
284                 *height_align = macro_tile_height * tile_height;
285                 *depth_align = 1;
286                 *base_align = max(macro_tile_bytes,
287                                   (*pitch_align) * values->blocksize * (*height_align) * values->nsamples);
288                 break;
289         default:
290                 return -EINVAL;
291         }
292
293         return 0;
294 }
295
296 static void r600_cs_track_init(struct r600_cs_track *track)
297 {
298         int i;
299
300         /* assume DX9 mode */
301         track->sq_config = DX9_CONSTS;
302         for (i = 0; i < 8; i++) {
303                 track->cb_color_base_last[i] = 0;
304                 track->cb_color_size[i] = 0;
305                 track->cb_color_size_idx[i] = 0;
306                 track->cb_color_info[i] = 0;
307                 track->cb_color_bo[i] = NULL;
308                 track->cb_color_bo_offset[i] = 0xFFFFFFFF;
309                 track->cb_color_bo_mc[i] = 0xFFFFFFFF;
310         }
311         track->cb_target_mask = 0xFFFFFFFF;
312         track->cb_shader_mask = 0xFFFFFFFF;
313         track->db_bo = NULL;
314         track->db_bo_mc = 0xFFFFFFFF;
315         /* assume the biggest format and that htile is enabled */
316         track->db_depth_info = 7 | (1 << 25);
317         track->db_depth_view = 0xFFFFC000;
318         track->db_depth_size = 0xFFFFFFFF;
319         track->db_depth_size_idx = 0;
320         track->db_depth_control = 0xFFFFFFFF;
321 }
322
323 static inline int r600_cs_track_validate_cb(struct radeon_cs_parser *p, int i)
324 {
325         struct r600_cs_track *track = p->track;
326         u32 slice_tile_max, size, tmp;
327         u32 height, height_align, pitch, pitch_align, depth_align;
328         u64 base_offset, base_align;
329         struct array_mode_checker array_check;
330         volatile u32 *ib = p->ib->ptr;
331         unsigned array_mode;
332         u32 format;
333         if (G_0280A0_TILE_MODE(track->cb_color_info[i])) {
334                 dev_warn(p->dev, "FMASK or CMASK buffer are not supported by this kernel\n");
335                 return -EINVAL;
336         }
337         size = radeon_bo_size(track->cb_color_bo[i]) - track->cb_color_bo_offset[i];
338         format = G_0280A0_FORMAT(track->cb_color_info[i]);
339         if (!fmt_is_valid_color(format)) {
340                 dev_warn(p->dev, "%s:%d cb invalid format %d for %d (0x%08X)\n",
341                          __func__, __LINE__, format,
342                         i, track->cb_color_info[i]);
343                 return -EINVAL;
344         }
345         /* pitch in pixels */
346         pitch = (G_028060_PITCH_TILE_MAX(track->cb_color_size[i]) + 1) * 8;
347         slice_tile_max = G_028060_SLICE_TILE_MAX(track->cb_color_size[i]) + 1;
348         slice_tile_max *= 64;
349         height = slice_tile_max / pitch;
350         if (height > 8192)
351                 height = 8192;
352         array_mode = G_0280A0_ARRAY_MODE(track->cb_color_info[i]);
353
354         base_offset = track->cb_color_bo_mc[i] + track->cb_color_bo_offset[i];
355         array_check.array_mode = array_mode;
356         array_check.group_size = track->group_size;
357         array_check.nbanks = track->nbanks;
358         array_check.npipes = track->npipes;
359         array_check.nsamples = track->nsamples;
360         array_check.blocksize = fmt_get_blocksize(format);
361         if (r600_get_array_mode_alignment(&array_check,
362                                           &pitch_align, &height_align, &depth_align, &base_align)) {
363                 dev_warn(p->dev, "%s invalid tiling %d for %d (0x%08X)\n", __func__,
364                          G_0280A0_ARRAY_MODE(track->cb_color_info[i]), i,
365                          track->cb_color_info[i]);
366                 return -EINVAL;
367         }
368         switch (array_mode) {
369         case V_0280A0_ARRAY_LINEAR_GENERAL:
370                 break;
371         case V_0280A0_ARRAY_LINEAR_ALIGNED:
372                 break;
373         case V_0280A0_ARRAY_1D_TILED_THIN1:
374                 /* avoid breaking userspace */
375                 if (height > 7)
376                         height &= ~0x7;
377                 break;
378         case V_0280A0_ARRAY_2D_TILED_THIN1:
379                 break;
380         default:
381                 dev_warn(p->dev, "%s invalid tiling %d for %d (0x%08X)\n", __func__,
382                         G_0280A0_ARRAY_MODE(track->cb_color_info[i]), i,
383                         track->cb_color_info[i]);
384                 return -EINVAL;
385         }
386
387         if (!IS_ALIGNED(pitch, pitch_align)) {
388                 dev_warn(p->dev, "%s:%d cb pitch (%d) invalid\n",
389                          __func__, __LINE__, pitch);
390                 return -EINVAL;
391         }
392         if (!IS_ALIGNED(height, height_align)) {
393                 dev_warn(p->dev, "%s:%d cb height (%d) invalid\n",
394                          __func__, __LINE__, height);
395                 return -EINVAL;
396         }
397         if (!IS_ALIGNED(base_offset, base_align)) {
398                 dev_warn(p->dev, "%s offset[%d] 0x%llx not aligned\n", __func__, i, base_offset);
399                 return -EINVAL;
400         }
401
402         /* check offset */
403         tmp = fmt_get_nblocksy(format, height) * fmt_get_nblocksx(format, pitch) * fmt_get_blocksize(format);
404         if ((tmp + track->cb_color_bo_offset[i]) > radeon_bo_size(track->cb_color_bo[i])) {
405                 if (array_mode == V_0280A0_ARRAY_LINEAR_GENERAL) {
406                         /* the initial DDX does bad things with the CB size occasionally */
407                         /* it rounds up height too far for slice tile max but the BO is smaller */
408                         /* r600c,g also seem to flush at bad times in some apps resulting in
409                          * bogus values here. So for linear just allow anything to avoid breaking
410                          * broken userspace.
411                          */
412                 } else {
413                         dev_warn(p->dev, "%s offset[%d] %d %d %lu too big\n", __func__, i, track->cb_color_bo_offset[i], tmp, radeon_bo_size(track->cb_color_bo[i]));
414                         return -EINVAL;
415                 }
416         }
417         /* limit max tile */
418         tmp = (height * pitch) >> 6;
419         if (tmp < slice_tile_max)
420                 slice_tile_max = tmp;
421         tmp = S_028060_PITCH_TILE_MAX((pitch / 8) - 1) |
422                 S_028060_SLICE_TILE_MAX(slice_tile_max - 1);
423         ib[track->cb_color_size_idx[i]] = tmp;
424         return 0;
425 }
426
427 static int r600_cs_track_check(struct radeon_cs_parser *p)
428 {
429         struct r600_cs_track *track = p->track;
430         u32 tmp;
431         int r, i;
432         volatile u32 *ib = p->ib->ptr;
433
434         /* on legacy kernel we don't perform advanced check */
435         if (p->rdev == NULL)
436                 return 0;
437         /* we don't support out buffer yet */
438         if (track->vgt_strmout_en || track->vgt_strmout_buffer_en) {
439                 dev_warn(p->dev, "this kernel doesn't support SMX output buffer\n");
440                 return -EINVAL;
441         }
442         /* check that we have a cb for each enabled target, we don't check
443          * shader_mask because it seems mesa isn't always setting it :(
444          */
445         tmp = track->cb_target_mask;
446         for (i = 0; i < 8; i++) {
447                 if ((tmp >> (i * 4)) & 0xF) {
448                         /* at least one component is enabled */
449                         if (track->cb_color_bo[i] == NULL) {
450                                 dev_warn(p->dev, "%s:%d mask 0x%08X | 0x%08X no cb for %d\n",
451                                         __func__, __LINE__, track->cb_target_mask, track->cb_shader_mask, i);
452                                 return -EINVAL;
453                         }
454                         /* perform rewrite of CB_COLOR[0-7]_SIZE */
455                         r = r600_cs_track_validate_cb(p, i);
456                         if (r)
457                                 return r;
458                 }
459         }
460         /* Check depth buffer */
461         if (G_028800_STENCIL_ENABLE(track->db_depth_control) ||
462                 G_028800_Z_ENABLE(track->db_depth_control)) {
463                 u32 nviews, bpe, ntiles, size, slice_tile_max;
464                 u32 height, height_align, pitch, pitch_align, depth_align;
465                 u64 base_offset, base_align;
466                 struct array_mode_checker array_check;
467                 int array_mode;
468
469                 if (track->db_bo == NULL) {
470                         dev_warn(p->dev, "z/stencil with no depth buffer\n");
471                         return -EINVAL;
472                 }
473                 if (G_028010_TILE_SURFACE_ENABLE(track->db_depth_info)) {
474                         dev_warn(p->dev, "this kernel doesn't support z/stencil htile\n");
475                         return -EINVAL;
476                 }
477                 switch (G_028010_FORMAT(track->db_depth_info)) {
478                 case V_028010_DEPTH_16:
479                         bpe = 2;
480                         break;
481                 case V_028010_DEPTH_X8_24:
482                 case V_028010_DEPTH_8_24:
483                 case V_028010_DEPTH_X8_24_FLOAT:
484                 case V_028010_DEPTH_8_24_FLOAT:
485                 case V_028010_DEPTH_32_FLOAT:
486                         bpe = 4;
487                         break;
488                 case V_028010_DEPTH_X24_8_32_FLOAT:
489                         bpe = 8;
490                         break;
491                 default:
492                         dev_warn(p->dev, "z/stencil with invalid format %d\n", G_028010_FORMAT(track->db_depth_info));
493                         return -EINVAL;
494                 }
495                 if ((track->db_depth_size & 0xFFFFFC00) == 0xFFFFFC00) {
496                         if (!track->db_depth_size_idx) {
497                                 dev_warn(p->dev, "z/stencil buffer size not set\n");
498                                 return -EINVAL;
499                         }
500                         tmp = radeon_bo_size(track->db_bo) - track->db_offset;
501                         tmp = (tmp / bpe) >> 6;
502                         if (!tmp) {
503                                 dev_warn(p->dev, "z/stencil buffer too small (0x%08X %d %d %ld)\n",
504                                                 track->db_depth_size, bpe, track->db_offset,
505                                                 radeon_bo_size(track->db_bo));
506                                 return -EINVAL;
507                         }
508                         ib[track->db_depth_size_idx] = S_028000_SLICE_TILE_MAX(tmp - 1) | (track->db_depth_size & 0x3FF);
509                 } else {
510                         size = radeon_bo_size(track->db_bo);
511                         /* pitch in pixels */
512                         pitch = (G_028000_PITCH_TILE_MAX(track->db_depth_size) + 1) * 8;
513                         slice_tile_max = G_028000_SLICE_TILE_MAX(track->db_depth_size) + 1;
514                         slice_tile_max *= 64;
515                         height = slice_tile_max / pitch;
516                         if (height > 8192)
517                                 height = 8192;
518                         base_offset = track->db_bo_mc + track->db_offset;
519                         array_mode = G_028010_ARRAY_MODE(track->db_depth_info);
520                         array_check.array_mode = array_mode;
521                         array_check.group_size = track->group_size;
522                         array_check.nbanks = track->nbanks;
523                         array_check.npipes = track->npipes;
524                         array_check.nsamples = track->nsamples;
525                         array_check.blocksize = bpe;
526                         if (r600_get_array_mode_alignment(&array_check,
527                                                           &pitch_align, &height_align, &depth_align, &base_align)) {
528                                 dev_warn(p->dev, "%s invalid tiling %d (0x%08X)\n", __func__,
529                                          G_028010_ARRAY_MODE(track->db_depth_info),
530                                          track->db_depth_info);
531                                 return -EINVAL;
532                         }
533                         switch (array_mode) {
534                         case V_028010_ARRAY_1D_TILED_THIN1:
535                                 /* don't break userspace */
536                                 height &= ~0x7;
537                                 break;
538                         case V_028010_ARRAY_2D_TILED_THIN1:
539                                 break;
540                         default:
541                                 dev_warn(p->dev, "%s invalid tiling %d (0x%08X)\n", __func__,
542                                          G_028010_ARRAY_MODE(track->db_depth_info),
543                                          track->db_depth_info);
544                                 return -EINVAL;
545                         }
546
547                         if (!IS_ALIGNED(pitch, pitch_align)) {
548                                 dev_warn(p->dev, "%s:%d db pitch (%d) invalid\n",
549                                          __func__, __LINE__, pitch);
550                                 return -EINVAL;
551                         }
552                         if (!IS_ALIGNED(height, height_align)) {
553                                 dev_warn(p->dev, "%s:%d db height (%d) invalid\n",
554                                          __func__, __LINE__, height);
555                                 return -EINVAL;
556                         }
557                         if (!IS_ALIGNED(base_offset, base_align)) {
558                                 dev_warn(p->dev, "%s offset[%d] 0x%llx not aligned\n", __func__, i, base_offset);
559                                 return -EINVAL;
560                         }
561
562                         ntiles = G_028000_SLICE_TILE_MAX(track->db_depth_size) + 1;
563                         nviews = G_028004_SLICE_MAX(track->db_depth_view) + 1;
564                         tmp = ntiles * bpe * 64 * nviews;
565                         if ((tmp + track->db_offset) > radeon_bo_size(track->db_bo)) {
566                                 dev_warn(p->dev, "z/stencil buffer too small (0x%08X %d %d %d -> %u have %lu)\n",
567                                                 track->db_depth_size, ntiles, nviews, bpe, tmp + track->db_offset,
568                                                 radeon_bo_size(track->db_bo));
569                                 return -EINVAL;
570                         }
571                 }
572         }
573         return 0;
574 }
575
576 /**
577  * r600_cs_packet_parse() - parse cp packet and point ib index to next packet
578  * @parser:     parser structure holding parsing context.
579  * @pkt:        where to store packet informations
580  *
581  * Assume that chunk_ib_index is properly set. Will return -EINVAL
582  * if packet is bigger than remaining ib size. or if packets is unknown.
583  **/
584 int r600_cs_packet_parse(struct radeon_cs_parser *p,
585                         struct radeon_cs_packet *pkt,
586                         unsigned idx)
587 {
588         struct radeon_cs_chunk *ib_chunk = &p->chunks[p->chunk_ib_idx];
589         uint32_t header;
590
591         if (idx >= ib_chunk->length_dw) {
592                 DRM_ERROR("Can not parse packet at %d after CS end %d !\n",
593                           idx, ib_chunk->length_dw);
594                 return -EINVAL;
595         }
596         header = radeon_get_ib_value(p, idx);
597         pkt->idx = idx;
598         pkt->type = CP_PACKET_GET_TYPE(header);
599         pkt->count = CP_PACKET_GET_COUNT(header);
600         pkt->one_reg_wr = 0;
601         switch (pkt->type) {
602         case PACKET_TYPE0:
603                 pkt->reg = CP_PACKET0_GET_REG(header);
604                 break;
605         case PACKET_TYPE3:
606                 pkt->opcode = CP_PACKET3_GET_OPCODE(header);
607                 break;
608         case PACKET_TYPE2:
609                 pkt->count = -1;
610                 break;
611         default:
612                 DRM_ERROR("Unknown packet type %d at %d !\n", pkt->type, idx);
613                 return -EINVAL;
614         }
615         if ((pkt->count + 1 + pkt->idx) >= ib_chunk->length_dw) {
616                 DRM_ERROR("Packet (%d:%d:%d) end after CS buffer (%d) !\n",
617                           pkt->idx, pkt->type, pkt->count, ib_chunk->length_dw);
618                 return -EINVAL;
619         }
620         return 0;
621 }
622
623 /**
624  * r600_cs_packet_next_reloc_mm() - parse next packet which should be reloc packet3
625  * @parser:             parser structure holding parsing context.
626  * @data:               pointer to relocation data
627  * @offset_start:       starting offset
628  * @offset_mask:        offset mask (to align start offset on)
629  * @reloc:              reloc informations
630  *
631  * Check next packet is relocation packet3, do bo validation and compute
632  * GPU offset using the provided start.
633  **/
634 static int r600_cs_packet_next_reloc_mm(struct radeon_cs_parser *p,
635                                         struct radeon_cs_reloc **cs_reloc)
636 {
637         struct radeon_cs_chunk *relocs_chunk;
638         struct radeon_cs_packet p3reloc;
639         unsigned idx;
640         int r;
641
642         if (p->chunk_relocs_idx == -1) {
643                 DRM_ERROR("No relocation chunk !\n");
644                 return -EINVAL;
645         }
646         *cs_reloc = NULL;
647         relocs_chunk = &p->chunks[p->chunk_relocs_idx];
648         r = r600_cs_packet_parse(p, &p3reloc, p->idx);
649         if (r) {
650                 return r;
651         }
652         p->idx += p3reloc.count + 2;
653         if (p3reloc.type != PACKET_TYPE3 || p3reloc.opcode != PACKET3_NOP) {
654                 DRM_ERROR("No packet3 for relocation for packet at %d.\n",
655                           p3reloc.idx);
656                 return -EINVAL;
657         }
658         idx = radeon_get_ib_value(p, p3reloc.idx + 1);
659         if (idx >= relocs_chunk->length_dw) {
660                 DRM_ERROR("Relocs at %d after relocations chunk end %d !\n",
661                           idx, relocs_chunk->length_dw);
662                 return -EINVAL;
663         }
664         /* FIXME: we assume reloc size is 4 dwords */
665         *cs_reloc = p->relocs_ptr[(idx / 4)];
666         return 0;
667 }
668
669 /**
670  * r600_cs_packet_next_reloc_nomm() - parse next packet which should be reloc packet3
671  * @parser:             parser structure holding parsing context.
672  * @data:               pointer to relocation data
673  * @offset_start:       starting offset
674  * @offset_mask:        offset mask (to align start offset on)
675  * @reloc:              reloc informations
676  *
677  * Check next packet is relocation packet3, do bo validation and compute
678  * GPU offset using the provided start.
679  **/
680 static int r600_cs_packet_next_reloc_nomm(struct radeon_cs_parser *p,
681                                         struct radeon_cs_reloc **cs_reloc)
682 {
683         struct radeon_cs_chunk *relocs_chunk;
684         struct radeon_cs_packet p3reloc;
685         unsigned idx;
686         int r;
687
688         if (p->chunk_relocs_idx == -1) {
689                 DRM_ERROR("No relocation chunk !\n");
690                 return -EINVAL;
691         }
692         *cs_reloc = NULL;
693         relocs_chunk = &p->chunks[p->chunk_relocs_idx];
694         r = r600_cs_packet_parse(p, &p3reloc, p->idx);
695         if (r) {
696                 return r;
697         }
698         p->idx += p3reloc.count + 2;
699         if (p3reloc.type != PACKET_TYPE3 || p3reloc.opcode != PACKET3_NOP) {
700                 DRM_ERROR("No packet3 for relocation for packet at %d.\n",
701                           p3reloc.idx);
702                 return -EINVAL;
703         }
704         idx = radeon_get_ib_value(p, p3reloc.idx + 1);
705         if (idx >= relocs_chunk->length_dw) {
706                 DRM_ERROR("Relocs at %d after relocations chunk end %d !\n",
707                           idx, relocs_chunk->length_dw);
708                 return -EINVAL;
709         }
710         *cs_reloc = p->relocs;
711         (*cs_reloc)->lobj.gpu_offset = (u64)relocs_chunk->kdata[idx + 3] << 32;
712         (*cs_reloc)->lobj.gpu_offset |= relocs_chunk->kdata[idx + 0];
713         return 0;
714 }
715
716 /**
717  * r600_cs_packet_next_is_pkt3_nop() - test if next packet is packet3 nop for reloc
718  * @parser:             parser structure holding parsing context.
719  *
720  * Check next packet is relocation packet3, do bo validation and compute
721  * GPU offset using the provided start.
722  **/
723 static inline int r600_cs_packet_next_is_pkt3_nop(struct radeon_cs_parser *p)
724 {
725         struct radeon_cs_packet p3reloc;
726         int r;
727
728         r = r600_cs_packet_parse(p, &p3reloc, p->idx);
729         if (r) {
730                 return 0;
731         }
732         if (p3reloc.type != PACKET_TYPE3 || p3reloc.opcode != PACKET3_NOP) {
733                 return 0;
734         }
735         return 1;
736 }
737
738 /**
739  * r600_cs_packet_next_vline() - parse userspace VLINE packet
740  * @parser:             parser structure holding parsing context.
741  *
742  * Userspace sends a special sequence for VLINE waits.
743  * PACKET0 - VLINE_START_END + value
744  * PACKET3 - WAIT_REG_MEM poll vline status reg
745  * RELOC (P3) - crtc_id in reloc.
746  *
747  * This function parses this and relocates the VLINE START END
748  * and WAIT_REG_MEM packets to the correct crtc.
749  * It also detects a switched off crtc and nulls out the
750  * wait in that case.
751  */
752 static int r600_cs_packet_parse_vline(struct radeon_cs_parser *p)
753 {
754         struct drm_mode_object *obj;
755         struct drm_crtc *crtc;
756         struct radeon_crtc *radeon_crtc;
757         struct radeon_cs_packet p3reloc, wait_reg_mem;
758         int crtc_id;
759         int r;
760         uint32_t header, h_idx, reg, wait_reg_mem_info;
761         volatile uint32_t *ib;
762
763         ib = p->ib->ptr;
764
765         /* parse the WAIT_REG_MEM */
766         r = r600_cs_packet_parse(p, &wait_reg_mem, p->idx);
767         if (r)
768                 return r;
769
770         /* check its a WAIT_REG_MEM */
771         if (wait_reg_mem.type != PACKET_TYPE3 ||
772             wait_reg_mem.opcode != PACKET3_WAIT_REG_MEM) {
773                 DRM_ERROR("vline wait missing WAIT_REG_MEM segment\n");
774                 r = -EINVAL;
775                 return r;
776         }
777
778         wait_reg_mem_info = radeon_get_ib_value(p, wait_reg_mem.idx + 1);
779         /* bit 4 is reg (0) or mem (1) */
780         if (wait_reg_mem_info & 0x10) {
781                 DRM_ERROR("vline WAIT_REG_MEM waiting on MEM rather than REG\n");
782                 r = -EINVAL;
783                 return r;
784         }
785         /* waiting for value to be equal */
786         if ((wait_reg_mem_info & 0x7) != 0x3) {
787                 DRM_ERROR("vline WAIT_REG_MEM function not equal\n");
788                 r = -EINVAL;
789                 return r;
790         }
791         if ((radeon_get_ib_value(p, wait_reg_mem.idx + 2) << 2) != AVIVO_D1MODE_VLINE_STATUS) {
792                 DRM_ERROR("vline WAIT_REG_MEM bad reg\n");
793                 r = -EINVAL;
794                 return r;
795         }
796
797         if (radeon_get_ib_value(p, wait_reg_mem.idx + 5) != AVIVO_D1MODE_VLINE_STAT) {
798                 DRM_ERROR("vline WAIT_REG_MEM bad bit mask\n");
799                 r = -EINVAL;
800                 return r;
801         }
802
803         /* jump over the NOP */
804         r = r600_cs_packet_parse(p, &p3reloc, p->idx + wait_reg_mem.count + 2);
805         if (r)
806                 return r;
807
808         h_idx = p->idx - 2;
809         p->idx += wait_reg_mem.count + 2;
810         p->idx += p3reloc.count + 2;
811
812         header = radeon_get_ib_value(p, h_idx);
813         crtc_id = radeon_get_ib_value(p, h_idx + 2 + 7 + 1);
814         reg = CP_PACKET0_GET_REG(header);
815
816         obj = drm_mode_object_find(p->rdev->ddev, crtc_id, DRM_MODE_OBJECT_CRTC);
817         if (!obj) {
818                 DRM_ERROR("cannot find crtc %d\n", crtc_id);
819                 r = -EINVAL;
820                 goto out;
821         }
822         crtc = obj_to_crtc(obj);
823         radeon_crtc = to_radeon_crtc(crtc);
824         crtc_id = radeon_crtc->crtc_id;
825
826         if (!crtc->enabled) {
827                 /* if the CRTC isn't enabled - we need to nop out the WAIT_REG_MEM */
828                 ib[h_idx + 2] = PACKET2(0);
829                 ib[h_idx + 3] = PACKET2(0);
830                 ib[h_idx + 4] = PACKET2(0);
831                 ib[h_idx + 5] = PACKET2(0);
832                 ib[h_idx + 6] = PACKET2(0);
833                 ib[h_idx + 7] = PACKET2(0);
834                 ib[h_idx + 8] = PACKET2(0);
835         } else if (crtc_id == 1) {
836                 switch (reg) {
837                 case AVIVO_D1MODE_VLINE_START_END:
838                         header &= ~R600_CP_PACKET0_REG_MASK;
839                         header |= AVIVO_D2MODE_VLINE_START_END >> 2;
840                         break;
841                 default:
842                         DRM_ERROR("unknown crtc reloc\n");
843                         r = -EINVAL;
844                         goto out;
845                 }
846                 ib[h_idx] = header;
847                 ib[h_idx + 4] = AVIVO_D2MODE_VLINE_STATUS >> 2;
848         }
849 out:
850         return r;
851 }
852
853 static int r600_packet0_check(struct radeon_cs_parser *p,
854                                 struct radeon_cs_packet *pkt,
855                                 unsigned idx, unsigned reg)
856 {
857         int r;
858
859         switch (reg) {
860         case AVIVO_D1MODE_VLINE_START_END:
861                 r = r600_cs_packet_parse_vline(p);
862                 if (r) {
863                         DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
864                                         idx, reg);
865                         return r;
866                 }
867                 break;
868         default:
869                 printk(KERN_ERR "Forbidden register 0x%04X in cs at %d\n",
870                        reg, idx);
871                 return -EINVAL;
872         }
873         return 0;
874 }
875
876 static int r600_cs_parse_packet0(struct radeon_cs_parser *p,
877                                 struct radeon_cs_packet *pkt)
878 {
879         unsigned reg, i;
880         unsigned idx;
881         int r;
882
883         idx = pkt->idx + 1;
884         reg = pkt->reg;
885         for (i = 0; i <= pkt->count; i++, idx++, reg += 4) {
886                 r = r600_packet0_check(p, pkt, idx, reg);
887                 if (r) {
888                         return r;
889                 }
890         }
891         return 0;
892 }
893
894 /**
895  * r600_cs_check_reg() - check if register is authorized or not
896  * @parser: parser structure holding parsing context
897  * @reg: register we are testing
898  * @idx: index into the cs buffer
899  *
900  * This function will test against r600_reg_safe_bm and return 0
901  * if register is safe. If register is not flag as safe this function
902  * will test it against a list of register needind special handling.
903  */
904 static inline int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
905 {
906         struct r600_cs_track *track = (struct r600_cs_track *)p->track;
907         struct radeon_cs_reloc *reloc;
908         u32 last_reg = ARRAY_SIZE(r600_reg_safe_bm);
909         u32 m, i, tmp, *ib;
910         int r;
911
912         i = (reg >> 7);
913         if (i > last_reg) {
914                 dev_warn(p->dev, "forbidden register 0x%08x at %d\n", reg, idx);
915                 return -EINVAL;
916         }
917         m = 1 << ((reg >> 2) & 31);
918         if (!(r600_reg_safe_bm[i] & m))
919                 return 0;
920         ib = p->ib->ptr;
921         switch (reg) {
922         /* force following reg to 0 in an attemp to disable out buffer
923          * which will need us to better understand how it works to perform
924          * security check on it (Jerome)
925          */
926         case R_0288A8_SQ_ESGS_RING_ITEMSIZE:
927         case R_008C44_SQ_ESGS_RING_SIZE:
928         case R_0288B0_SQ_ESTMP_RING_ITEMSIZE:
929         case R_008C54_SQ_ESTMP_RING_SIZE:
930         case R_0288C0_SQ_FBUF_RING_ITEMSIZE:
931         case R_008C74_SQ_FBUF_RING_SIZE:
932         case R_0288B4_SQ_GSTMP_RING_ITEMSIZE:
933         case R_008C5C_SQ_GSTMP_RING_SIZE:
934         case R_0288AC_SQ_GSVS_RING_ITEMSIZE:
935         case R_008C4C_SQ_GSVS_RING_SIZE:
936         case R_0288BC_SQ_PSTMP_RING_ITEMSIZE:
937         case R_008C6C_SQ_PSTMP_RING_SIZE:
938         case R_0288C4_SQ_REDUC_RING_ITEMSIZE:
939         case R_008C7C_SQ_REDUC_RING_SIZE:
940         case R_0288B8_SQ_VSTMP_RING_ITEMSIZE:
941         case R_008C64_SQ_VSTMP_RING_SIZE:
942         case R_0288C8_SQ_GS_VERT_ITEMSIZE:
943                 /* get value to populate the IB don't remove */
944                 tmp =radeon_get_ib_value(p, idx);
945                 ib[idx] = 0;
946                 break;
947         case SQ_CONFIG:
948                 track->sq_config = radeon_get_ib_value(p, idx);
949                 break;
950         case R_028800_DB_DEPTH_CONTROL:
951                 track->db_depth_control = radeon_get_ib_value(p, idx);
952                 break;
953         case R_028010_DB_DEPTH_INFO:
954                 if (r600_cs_packet_next_is_pkt3_nop(p)) {
955                         r = r600_cs_packet_next_reloc(p, &reloc);
956                         if (r) {
957                                 dev_warn(p->dev, "bad SET_CONTEXT_REG "
958                                          "0x%04X\n", reg);
959                                 return -EINVAL;
960                         }
961                         track->db_depth_info = radeon_get_ib_value(p, idx);
962                         ib[idx] &= C_028010_ARRAY_MODE;
963                         track->db_depth_info &= C_028010_ARRAY_MODE;
964                         if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) {
965                                 ib[idx] |= S_028010_ARRAY_MODE(V_028010_ARRAY_2D_TILED_THIN1);
966                                 track->db_depth_info |= S_028010_ARRAY_MODE(V_028010_ARRAY_2D_TILED_THIN1);
967                         } else {
968                                 ib[idx] |= S_028010_ARRAY_MODE(V_028010_ARRAY_1D_TILED_THIN1);
969                                 track->db_depth_info |= S_028010_ARRAY_MODE(V_028010_ARRAY_1D_TILED_THIN1);
970                         }
971                 } else
972                         track->db_depth_info = radeon_get_ib_value(p, idx);
973                 break;
974         case R_028004_DB_DEPTH_VIEW:
975                 track->db_depth_view = radeon_get_ib_value(p, idx);
976                 break;
977         case R_028000_DB_DEPTH_SIZE:
978                 track->db_depth_size = radeon_get_ib_value(p, idx);
979                 track->db_depth_size_idx = idx;
980                 break;
981         case R_028AB0_VGT_STRMOUT_EN:
982                 track->vgt_strmout_en = radeon_get_ib_value(p, idx);
983                 break;
984         case R_028B20_VGT_STRMOUT_BUFFER_EN:
985                 track->vgt_strmout_buffer_en = radeon_get_ib_value(p, idx);
986                 break;
987         case R_028238_CB_TARGET_MASK:
988                 track->cb_target_mask = radeon_get_ib_value(p, idx);
989                 break;
990         case R_02823C_CB_SHADER_MASK:
991                 track->cb_shader_mask = radeon_get_ib_value(p, idx);
992                 break;
993         case R_028C04_PA_SC_AA_CONFIG:
994                 tmp = G_028C04_MSAA_NUM_SAMPLES(radeon_get_ib_value(p, idx));
995                 track->nsamples = 1 << tmp;
996                 break;
997         case R_0280A0_CB_COLOR0_INFO:
998         case R_0280A4_CB_COLOR1_INFO:
999         case R_0280A8_CB_COLOR2_INFO:
1000         case R_0280AC_CB_COLOR3_INFO:
1001         case R_0280B0_CB_COLOR4_INFO:
1002         case R_0280B4_CB_COLOR5_INFO:
1003         case R_0280B8_CB_COLOR6_INFO:
1004         case R_0280BC_CB_COLOR7_INFO:
1005                 if (r600_cs_packet_next_is_pkt3_nop(p)) {
1006                         r = r600_cs_packet_next_reloc(p, &reloc);
1007                         if (r) {
1008                                 dev_err(p->dev, "bad SET_CONTEXT_REG 0x%04X\n", reg);
1009                                 return -EINVAL;
1010                         }
1011                         tmp = (reg - R_0280A0_CB_COLOR0_INFO) / 4;
1012                         track->cb_color_info[tmp] = radeon_get_ib_value(p, idx);
1013                         if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) {
1014                                 ib[idx] |= S_0280A0_ARRAY_MODE(V_0280A0_ARRAY_2D_TILED_THIN1);
1015                                 track->cb_color_info[tmp] |= S_0280A0_ARRAY_MODE(V_0280A0_ARRAY_2D_TILED_THIN1);
1016                         } else if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) {
1017                                 ib[idx] |= S_0280A0_ARRAY_MODE(V_0280A0_ARRAY_1D_TILED_THIN1);
1018                                 track->cb_color_info[tmp] |= S_0280A0_ARRAY_MODE(V_0280A0_ARRAY_1D_TILED_THIN1);
1019                         }
1020                 } else {
1021                         tmp = (reg - R_0280A0_CB_COLOR0_INFO) / 4;
1022                         track->cb_color_info[tmp] = radeon_get_ib_value(p, idx);
1023                 }
1024                 break;
1025         case R_028060_CB_COLOR0_SIZE:
1026         case R_028064_CB_COLOR1_SIZE:
1027         case R_028068_CB_COLOR2_SIZE:
1028         case R_02806C_CB_COLOR3_SIZE:
1029         case R_028070_CB_COLOR4_SIZE:
1030         case R_028074_CB_COLOR5_SIZE:
1031         case R_028078_CB_COLOR6_SIZE:
1032         case R_02807C_CB_COLOR7_SIZE:
1033                 tmp = (reg - R_028060_CB_COLOR0_SIZE) / 4;
1034                 track->cb_color_size[tmp] = radeon_get_ib_value(p, idx);
1035                 track->cb_color_size_idx[tmp] = idx;
1036                 break;
1037                 /* This register were added late, there is userspace
1038                  * which does provide relocation for those but set
1039                  * 0 offset. In order to avoid breaking old userspace
1040                  * we detect this and set address to point to last
1041                  * CB_COLOR0_BASE, note that if userspace doesn't set
1042                  * CB_COLOR0_BASE before this register we will report
1043                  * error. Old userspace always set CB_COLOR0_BASE
1044                  * before any of this.
1045                  */
1046         case R_0280E0_CB_COLOR0_FRAG:
1047         case R_0280E4_CB_COLOR1_FRAG:
1048         case R_0280E8_CB_COLOR2_FRAG:
1049         case R_0280EC_CB_COLOR3_FRAG:
1050         case R_0280F0_CB_COLOR4_FRAG:
1051         case R_0280F4_CB_COLOR5_FRAG:
1052         case R_0280F8_CB_COLOR6_FRAG:
1053         case R_0280FC_CB_COLOR7_FRAG:
1054                 tmp = (reg - R_0280E0_CB_COLOR0_FRAG) / 4;
1055                 if (!r600_cs_packet_next_is_pkt3_nop(p)) {
1056                         if (!track->cb_color_base_last[tmp]) {
1057                                 dev_err(p->dev, "Broken old userspace ? no cb_color0_base supplied before trying to write 0x%08X\n", reg);
1058                                 return -EINVAL;
1059                         }
1060                         ib[idx] = track->cb_color_base_last[tmp];
1061                         track->cb_color_frag_bo[tmp] = track->cb_color_bo[tmp];
1062                 } else {
1063                         r = r600_cs_packet_next_reloc(p, &reloc);
1064                         if (r) {
1065                                 dev_err(p->dev, "bad SET_CONTEXT_REG 0x%04X\n", reg);
1066                                 return -EINVAL;
1067                         }
1068                         ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
1069                         track->cb_color_frag_bo[tmp] = reloc->robj;
1070                 }
1071                 break;
1072         case R_0280C0_CB_COLOR0_TILE:
1073         case R_0280C4_CB_COLOR1_TILE:
1074         case R_0280C8_CB_COLOR2_TILE:
1075         case R_0280CC_CB_COLOR3_TILE:
1076         case R_0280D0_CB_COLOR4_TILE:
1077         case R_0280D4_CB_COLOR5_TILE:
1078         case R_0280D8_CB_COLOR6_TILE:
1079         case R_0280DC_CB_COLOR7_TILE:
1080                 tmp = (reg - R_0280C0_CB_COLOR0_TILE) / 4;
1081                 if (!r600_cs_packet_next_is_pkt3_nop(p)) {
1082                         if (!track->cb_color_base_last[tmp]) {
1083                                 dev_err(p->dev, "Broken old userspace ? no cb_color0_base supplied before trying to write 0x%08X\n", reg);
1084                                 return -EINVAL;
1085                         }
1086                         ib[idx] = track->cb_color_base_last[tmp];
1087                         track->cb_color_tile_bo[tmp] = track->cb_color_bo[tmp];
1088                 } else {
1089                         r = r600_cs_packet_next_reloc(p, &reloc);
1090                         if (r) {
1091                                 dev_err(p->dev, "bad SET_CONTEXT_REG 0x%04X\n", reg);
1092                                 return -EINVAL;
1093                         }
1094                         ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
1095                         track->cb_color_tile_bo[tmp] = reloc->robj;
1096                 }
1097                 break;
1098         case CB_COLOR0_BASE:
1099         case CB_COLOR1_BASE:
1100         case CB_COLOR2_BASE:
1101         case CB_COLOR3_BASE:
1102         case CB_COLOR4_BASE:
1103         case CB_COLOR5_BASE:
1104         case CB_COLOR6_BASE:
1105         case CB_COLOR7_BASE:
1106                 r = r600_cs_packet_next_reloc(p, &reloc);
1107                 if (r) {
1108                         dev_warn(p->dev, "bad SET_CONTEXT_REG "
1109                                         "0x%04X\n", reg);
1110                         return -EINVAL;
1111                 }
1112                 tmp = (reg - CB_COLOR0_BASE) / 4;
1113                 track->cb_color_bo_offset[tmp] = radeon_get_ib_value(p, idx) << 8;
1114                 ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
1115                 track->cb_color_base_last[tmp] = ib[idx];
1116                 track->cb_color_bo[tmp] = reloc->robj;
1117                 track->cb_color_bo_mc[tmp] = reloc->lobj.gpu_offset;
1118                 break;
1119         case DB_DEPTH_BASE:
1120                 r = r600_cs_packet_next_reloc(p, &reloc);
1121                 if (r) {
1122                         dev_warn(p->dev, "bad SET_CONTEXT_REG "
1123                                         "0x%04X\n", reg);
1124                         return -EINVAL;
1125                 }
1126                 track->db_offset = radeon_get_ib_value(p, idx) << 8;
1127                 ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
1128                 track->db_bo = reloc->robj;
1129                 track->db_bo_mc = reloc->lobj.gpu_offset;
1130                 break;
1131         case DB_HTILE_DATA_BASE:
1132         case SQ_PGM_START_FS:
1133         case SQ_PGM_START_ES:
1134         case SQ_PGM_START_VS:
1135         case SQ_PGM_START_GS:
1136         case SQ_PGM_START_PS:
1137         case SQ_ALU_CONST_CACHE_GS_0:
1138         case SQ_ALU_CONST_CACHE_GS_1:
1139         case SQ_ALU_CONST_CACHE_GS_2:
1140         case SQ_ALU_CONST_CACHE_GS_3:
1141         case SQ_ALU_CONST_CACHE_GS_4:
1142         case SQ_ALU_CONST_CACHE_GS_5:
1143         case SQ_ALU_CONST_CACHE_GS_6:
1144         case SQ_ALU_CONST_CACHE_GS_7:
1145         case SQ_ALU_CONST_CACHE_GS_8:
1146         case SQ_ALU_CONST_CACHE_GS_9:
1147         case SQ_ALU_CONST_CACHE_GS_10:
1148         case SQ_ALU_CONST_CACHE_GS_11:
1149         case SQ_ALU_CONST_CACHE_GS_12:
1150         case SQ_ALU_CONST_CACHE_GS_13:
1151         case SQ_ALU_CONST_CACHE_GS_14:
1152         case SQ_ALU_CONST_CACHE_GS_15:
1153         case SQ_ALU_CONST_CACHE_PS_0:
1154         case SQ_ALU_CONST_CACHE_PS_1:
1155         case SQ_ALU_CONST_CACHE_PS_2:
1156         case SQ_ALU_CONST_CACHE_PS_3:
1157         case SQ_ALU_CONST_CACHE_PS_4:
1158         case SQ_ALU_CONST_CACHE_PS_5:
1159         case SQ_ALU_CONST_CACHE_PS_6:
1160         case SQ_ALU_CONST_CACHE_PS_7:
1161         case SQ_ALU_CONST_CACHE_PS_8:
1162         case SQ_ALU_CONST_CACHE_PS_9:
1163         case SQ_ALU_CONST_CACHE_PS_10:
1164         case SQ_ALU_CONST_CACHE_PS_11:
1165         case SQ_ALU_CONST_CACHE_PS_12:
1166         case SQ_ALU_CONST_CACHE_PS_13:
1167         case SQ_ALU_CONST_CACHE_PS_14:
1168         case SQ_ALU_CONST_CACHE_PS_15:
1169         case SQ_ALU_CONST_CACHE_VS_0:
1170         case SQ_ALU_CONST_CACHE_VS_1:
1171         case SQ_ALU_CONST_CACHE_VS_2:
1172         case SQ_ALU_CONST_CACHE_VS_3:
1173         case SQ_ALU_CONST_CACHE_VS_4:
1174         case SQ_ALU_CONST_CACHE_VS_5:
1175         case SQ_ALU_CONST_CACHE_VS_6:
1176         case SQ_ALU_CONST_CACHE_VS_7:
1177         case SQ_ALU_CONST_CACHE_VS_8:
1178         case SQ_ALU_CONST_CACHE_VS_9:
1179         case SQ_ALU_CONST_CACHE_VS_10:
1180         case SQ_ALU_CONST_CACHE_VS_11:
1181         case SQ_ALU_CONST_CACHE_VS_12:
1182         case SQ_ALU_CONST_CACHE_VS_13:
1183         case SQ_ALU_CONST_CACHE_VS_14:
1184         case SQ_ALU_CONST_CACHE_VS_15:
1185                 r = r600_cs_packet_next_reloc(p, &reloc);
1186                 if (r) {
1187                         dev_warn(p->dev, "bad SET_CONTEXT_REG "
1188                                         "0x%04X\n", reg);
1189                         return -EINVAL;
1190                 }
1191                 ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
1192                 break;
1193         default:
1194                 dev_warn(p->dev, "forbidden register 0x%08x at %d\n", reg, idx);
1195                 return -EINVAL;
1196         }
1197         return 0;
1198 }
1199
1200 static inline unsigned mip_minify(unsigned size, unsigned level)
1201 {
1202         unsigned val;
1203
1204         val = max(1U, size >> level);
1205         if (level > 0)
1206                 val = roundup_pow_of_two(val);
1207         return val;
1208 }
1209
1210 static void r600_texture_size(unsigned nfaces, unsigned blevel, unsigned llevel,
1211                               unsigned w0, unsigned h0, unsigned d0, unsigned format,
1212                               unsigned block_align, unsigned height_align, unsigned base_align,
1213                               unsigned *l0_size, unsigned *mipmap_size)
1214 {
1215         unsigned offset, i, level;
1216         unsigned width, height, depth, size;
1217         unsigned blocksize;
1218         unsigned nbx, nby;
1219         unsigned nlevels = llevel - blevel + 1;
1220
1221         *l0_size = -1;
1222         blocksize = fmt_get_blocksize(format);
1223
1224         w0 = mip_minify(w0, 0);
1225         h0 = mip_minify(h0, 0);
1226         d0 = mip_minify(d0, 0);
1227         for(i = 0, offset = 0, level = blevel; i < nlevels; i++, level++) {
1228                 width = mip_minify(w0, i);
1229                 nbx = fmt_get_nblocksx(format, width);
1230
1231                 nbx = round_up(nbx, block_align);
1232
1233                 height = mip_minify(h0, i);
1234                 nby = fmt_get_nblocksy(format, height);
1235                 nby = round_up(nby, height_align);
1236
1237                 depth = mip_minify(d0, i);
1238
1239                 size = nbx * nby * blocksize;
1240                 if (nfaces)
1241                         size *= nfaces;
1242                 else
1243                         size *= depth;
1244
1245                 if (i == 0)
1246                         *l0_size = size;
1247
1248                 if (i == 0 || i == 1)
1249                         offset = round_up(offset, base_align);
1250
1251                 offset += size;
1252         }
1253         *mipmap_size = offset;
1254         if (llevel == 0)
1255                 *mipmap_size = *l0_size;
1256         if (!blevel)
1257                 *mipmap_size -= *l0_size;
1258 }
1259
1260 /**
1261  * r600_check_texture_resource() - check if register is authorized or not
1262  * @p: parser structure holding parsing context
1263  * @idx: index into the cs buffer
1264  * @texture: texture's bo structure
1265  * @mipmap: mipmap's bo structure
1266  *
1267  * This function will check that the resource has valid field and that
1268  * the texture and mipmap bo object are big enough to cover this resource.
1269  */
1270 static inline int r600_check_texture_resource(struct radeon_cs_parser *p,  u32 idx,
1271                                               struct radeon_bo *texture,
1272                                               struct radeon_bo *mipmap,
1273                                               u64 base_offset,
1274                                               u64 mip_offset,
1275                                               u32 tiling_flags)
1276 {
1277         struct r600_cs_track *track = p->track;
1278         u32 nfaces, llevel, blevel, w0, h0, d0;
1279         u32 word0, word1, l0_size, mipmap_size;
1280         u32 height_align, pitch, pitch_align, depth_align;
1281         u32 array, barray, larray;
1282         u64 base_align;
1283         struct array_mode_checker array_check;
1284         u32 format;
1285
1286         /* on legacy kernel we don't perform advanced check */
1287         if (p->rdev == NULL)
1288                 return 0;
1289
1290         /* convert to bytes */
1291         base_offset <<= 8;
1292         mip_offset <<= 8;
1293
1294         word0 = radeon_get_ib_value(p, idx + 0);
1295         if (tiling_flags & RADEON_TILING_MACRO)
1296                 word0 |= S_038000_TILE_MODE(V_038000_ARRAY_2D_TILED_THIN1);
1297         else if (tiling_flags & RADEON_TILING_MICRO)
1298                 word0 |= S_038000_TILE_MODE(V_038000_ARRAY_1D_TILED_THIN1);
1299         word1 = radeon_get_ib_value(p, idx + 1);
1300         w0 = G_038000_TEX_WIDTH(word0) + 1;
1301         h0 = G_038004_TEX_HEIGHT(word1) + 1;
1302         d0 = G_038004_TEX_DEPTH(word1);
1303         nfaces = 1;
1304         switch (G_038000_DIM(word0)) {
1305         case V_038000_SQ_TEX_DIM_1D:
1306         case V_038000_SQ_TEX_DIM_2D:
1307         case V_038000_SQ_TEX_DIM_3D:
1308                 break;
1309         case V_038000_SQ_TEX_DIM_CUBEMAP:
1310                 if (p->family >= CHIP_RV770)
1311                         nfaces = 8;
1312                 else
1313                         nfaces = 6;
1314                 break;
1315         case V_038000_SQ_TEX_DIM_1D_ARRAY:
1316         case V_038000_SQ_TEX_DIM_2D_ARRAY:
1317                 array = 1;
1318                 break;
1319         case V_038000_SQ_TEX_DIM_2D_MSAA:
1320         case V_038000_SQ_TEX_DIM_2D_ARRAY_MSAA:
1321         default:
1322                 dev_warn(p->dev, "this kernel doesn't support %d texture dim\n", G_038000_DIM(word0));
1323                 return -EINVAL;
1324         }
1325         format = G_038004_DATA_FORMAT(word1);
1326         if (!fmt_is_valid_texture(format)) {
1327                 dev_warn(p->dev, "%s:%d texture invalid format %d\n",
1328                          __func__, __LINE__, format);
1329                 return -EINVAL;
1330         }
1331
1332         /* pitch in texels */
1333         pitch = (G_038000_PITCH(word0) + 1) * 8;
1334         array_check.array_mode = G_038000_TILE_MODE(word0);
1335         array_check.group_size = track->group_size;
1336         array_check.nbanks = track->nbanks;
1337         array_check.npipes = track->npipes;
1338         array_check.nsamples = 1;
1339         array_check.blocksize = fmt_get_blocksize(format);
1340         if (r600_get_array_mode_alignment(&array_check,
1341                                           &pitch_align, &height_align, &depth_align, &base_align)) {
1342                 dev_warn(p->dev, "%s:%d tex array mode (%d) invalid\n",
1343                          __func__, __LINE__, G_038000_TILE_MODE(word0));
1344                 return -EINVAL;
1345         }
1346
1347         /* XXX check height as well... */
1348
1349         if (!IS_ALIGNED(pitch, pitch_align)) {
1350                 dev_warn(p->dev, "%s:%d tex pitch (%d) invalid\n",
1351                          __func__, __LINE__, pitch);
1352                 return -EINVAL;
1353         }
1354         if (!IS_ALIGNED(base_offset, base_align)) {
1355                 dev_warn(p->dev, "%s:%d tex base offset (0x%llx) invalid\n",
1356                          __func__, __LINE__, base_offset);
1357                 return -EINVAL;
1358         }
1359         if (!IS_ALIGNED(mip_offset, base_align)) {
1360                 dev_warn(p->dev, "%s:%d tex mip offset (0x%llx) invalid\n",
1361                          __func__, __LINE__, mip_offset);
1362                 return -EINVAL;
1363         }
1364
1365         word0 = radeon_get_ib_value(p, idx + 4);
1366         word1 = radeon_get_ib_value(p, idx + 5);
1367         blevel = G_038010_BASE_LEVEL(word0);
1368         llevel = G_038014_LAST_LEVEL(word1);
1369         if (array == 1) {
1370                 barray = G_038014_BASE_ARRAY(word1);
1371                 larray = G_038014_LAST_ARRAY(word1);
1372
1373                 nfaces = larray - barray + 1;
1374         }
1375         r600_texture_size(nfaces, blevel, llevel, w0, h0, d0, format,
1376                           pitch_align, height_align, base_align,
1377                           &l0_size, &mipmap_size);
1378         /* using get ib will give us the offset into the texture bo */
1379         word0 = radeon_get_ib_value(p, idx + 2) << 8;
1380         if ((l0_size + word0) > radeon_bo_size(texture)) {
1381                 dev_warn(p->dev, "texture bo too small (%d %d %d %d -> %d have %ld)\n",
1382                         w0, h0, format, word0, l0_size, radeon_bo_size(texture));
1383                 dev_warn(p->dev, "alignments %d %d %d %lld\n", pitch, pitch_align, height_align, base_align);
1384                 return -EINVAL;
1385         }
1386         /* using get ib will give us the offset into the mipmap bo */
1387         word0 = radeon_get_ib_value(p, idx + 3) << 8;
1388         if ((mipmap_size + word0) > radeon_bo_size(mipmap)) {
1389                 /*dev_warn(p->dev, "mipmap bo too small (%d %d %d %d %d %d -> %d have %ld)\n",
1390                   w0, h0, format, blevel, nlevels, word0, mipmap_size, radeon_bo_size(texture));*/
1391         }
1392         return 0;
1393 }
1394
1395 static int r600_packet3_check(struct radeon_cs_parser *p,
1396                                 struct radeon_cs_packet *pkt)
1397 {
1398         struct radeon_cs_reloc *reloc;
1399         struct r600_cs_track *track;
1400         volatile u32 *ib;
1401         unsigned idx;
1402         unsigned i;
1403         unsigned start_reg, end_reg, reg;
1404         int r;
1405         u32 idx_value;
1406
1407         track = (struct r600_cs_track *)p->track;
1408         ib = p->ib->ptr;
1409         idx = pkt->idx + 1;
1410         idx_value = radeon_get_ib_value(p, idx);
1411
1412         switch (pkt->opcode) {
1413         case PACKET3_START_3D_CMDBUF:
1414                 if (p->family >= CHIP_RV770 || pkt->count) {
1415                         DRM_ERROR("bad START_3D\n");
1416                         return -EINVAL;
1417                 }
1418                 break;
1419         case PACKET3_CONTEXT_CONTROL:
1420                 if (pkt->count != 1) {
1421                         DRM_ERROR("bad CONTEXT_CONTROL\n");
1422                         return -EINVAL;
1423                 }
1424                 break;
1425         case PACKET3_INDEX_TYPE:
1426         case PACKET3_NUM_INSTANCES:
1427                 if (pkt->count) {
1428                         DRM_ERROR("bad INDEX_TYPE/NUM_INSTANCES\n");
1429                         return -EINVAL;
1430                 }
1431                 break;
1432         case PACKET3_DRAW_INDEX:
1433                 if (pkt->count != 3) {
1434                         DRM_ERROR("bad DRAW_INDEX\n");
1435                         return -EINVAL;
1436                 }
1437                 r = r600_cs_packet_next_reloc(p, &reloc);
1438                 if (r) {
1439                         DRM_ERROR("bad DRAW_INDEX\n");
1440                         return -EINVAL;
1441                 }
1442                 ib[idx+0] = idx_value + (u32)(reloc->lobj.gpu_offset & 0xffffffff);
1443                 ib[idx+1] += upper_32_bits(reloc->lobj.gpu_offset) & 0xff;
1444                 r = r600_cs_track_check(p);
1445                 if (r) {
1446                         dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
1447                         return r;
1448                 }
1449                 break;
1450         case PACKET3_DRAW_INDEX_AUTO:
1451                 if (pkt->count != 1) {
1452                         DRM_ERROR("bad DRAW_INDEX_AUTO\n");
1453                         return -EINVAL;
1454                 }
1455                 r = r600_cs_track_check(p);
1456                 if (r) {
1457                         dev_warn(p->dev, "%s:%d invalid cmd stream %d\n", __func__, __LINE__, idx);
1458                         return r;
1459                 }
1460                 break;
1461         case PACKET3_DRAW_INDEX_IMMD_BE:
1462         case PACKET3_DRAW_INDEX_IMMD:
1463                 if (pkt->count < 2) {
1464                         DRM_ERROR("bad DRAW_INDEX_IMMD\n");
1465                         return -EINVAL;
1466                 }
1467                 r = r600_cs_track_check(p);
1468                 if (r) {
1469                         dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
1470                         return r;
1471                 }
1472                 break;
1473         case PACKET3_WAIT_REG_MEM:
1474                 if (pkt->count != 5) {
1475                         DRM_ERROR("bad WAIT_REG_MEM\n");
1476                         return -EINVAL;
1477                 }
1478                 /* bit 4 is reg (0) or mem (1) */
1479                 if (idx_value & 0x10) {
1480                         r = r600_cs_packet_next_reloc(p, &reloc);
1481                         if (r) {
1482                                 DRM_ERROR("bad WAIT_REG_MEM\n");
1483                                 return -EINVAL;
1484                         }
1485                         ib[idx+1] += (u32)(reloc->lobj.gpu_offset & 0xffffffff);
1486                         ib[idx+2] += upper_32_bits(reloc->lobj.gpu_offset) & 0xff;
1487                 }
1488                 break;
1489         case PACKET3_SURFACE_SYNC:
1490                 if (pkt->count != 3) {
1491                         DRM_ERROR("bad SURFACE_SYNC\n");
1492                         return -EINVAL;
1493                 }
1494                 /* 0xffffffff/0x0 is flush all cache flag */
1495                 if (radeon_get_ib_value(p, idx + 1) != 0xffffffff ||
1496                     radeon_get_ib_value(p, idx + 2) != 0) {
1497                         r = r600_cs_packet_next_reloc(p, &reloc);
1498                         if (r) {
1499                                 DRM_ERROR("bad SURFACE_SYNC\n");
1500                                 return -EINVAL;
1501                         }
1502                         ib[idx+2] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
1503                 }
1504                 break;
1505         case PACKET3_EVENT_WRITE:
1506                 if (pkt->count != 2 && pkt->count != 0) {
1507                         DRM_ERROR("bad EVENT_WRITE\n");
1508                         return -EINVAL;
1509                 }
1510                 if (pkt->count) {
1511                         r = r600_cs_packet_next_reloc(p, &reloc);
1512                         if (r) {
1513                                 DRM_ERROR("bad EVENT_WRITE\n");
1514                                 return -EINVAL;
1515                         }
1516                         ib[idx+1] += (u32)(reloc->lobj.gpu_offset & 0xffffffff);
1517                         ib[idx+2] += upper_32_bits(reloc->lobj.gpu_offset) & 0xff;
1518                 }
1519                 break;
1520         case PACKET3_EVENT_WRITE_EOP:
1521                 if (pkt->count != 4) {
1522                         DRM_ERROR("bad EVENT_WRITE_EOP\n");
1523                         return -EINVAL;
1524                 }
1525                 r = r600_cs_packet_next_reloc(p, &reloc);
1526                 if (r) {
1527                         DRM_ERROR("bad EVENT_WRITE\n");
1528                         return -EINVAL;
1529                 }
1530                 ib[idx+1] += (u32)(reloc->lobj.gpu_offset & 0xffffffff);
1531                 ib[idx+2] += upper_32_bits(reloc->lobj.gpu_offset) & 0xff;
1532                 break;
1533         case PACKET3_SET_CONFIG_REG:
1534                 start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_OFFSET;
1535                 end_reg = 4 * pkt->count + start_reg - 4;
1536                 if ((start_reg < PACKET3_SET_CONFIG_REG_OFFSET) ||
1537                     (start_reg >= PACKET3_SET_CONFIG_REG_END) ||
1538                     (end_reg >= PACKET3_SET_CONFIG_REG_END)) {
1539                         DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
1540                         return -EINVAL;
1541                 }
1542                 for (i = 0; i < pkt->count; i++) {
1543                         reg = start_reg + (4 * i);
1544                         r = r600_cs_check_reg(p, reg, idx+1+i);
1545                         if (r)
1546                                 return r;
1547                 }
1548                 break;
1549         case PACKET3_SET_CONTEXT_REG:
1550                 start_reg = (idx_value << 2) + PACKET3_SET_CONTEXT_REG_OFFSET;
1551                 end_reg = 4 * pkt->count + start_reg - 4;
1552                 if ((start_reg < PACKET3_SET_CONTEXT_REG_OFFSET) ||
1553                     (start_reg >= PACKET3_SET_CONTEXT_REG_END) ||
1554                     (end_reg >= PACKET3_SET_CONTEXT_REG_END)) {
1555                         DRM_ERROR("bad PACKET3_SET_CONTEXT_REG\n");
1556                         return -EINVAL;
1557                 }
1558                 for (i = 0; i < pkt->count; i++) {
1559                         reg = start_reg + (4 * i);
1560                         r = r600_cs_check_reg(p, reg, idx+1+i);
1561                         if (r)
1562                                 return r;
1563                 }
1564                 break;
1565         case PACKET3_SET_RESOURCE:
1566                 if (pkt->count % 7) {
1567                         DRM_ERROR("bad SET_RESOURCE\n");
1568                         return -EINVAL;
1569                 }
1570                 start_reg = (idx_value << 2) + PACKET3_SET_RESOURCE_OFFSET;
1571                 end_reg = 4 * pkt->count + start_reg - 4;
1572                 if ((start_reg < PACKET3_SET_RESOURCE_OFFSET) ||
1573                     (start_reg >= PACKET3_SET_RESOURCE_END) ||
1574                     (end_reg >= PACKET3_SET_RESOURCE_END)) {
1575                         DRM_ERROR("bad SET_RESOURCE\n");
1576                         return -EINVAL;
1577                 }
1578                 for (i = 0; i < (pkt->count / 7); i++) {
1579                         struct radeon_bo *texture, *mipmap;
1580                         u32 size, offset, base_offset, mip_offset;
1581
1582                         switch (G__SQ_VTX_CONSTANT_TYPE(radeon_get_ib_value(p, idx+(i*7)+6+1))) {
1583                         case SQ_TEX_VTX_VALID_TEXTURE:
1584                                 /* tex base */
1585                                 r = r600_cs_packet_next_reloc(p, &reloc);
1586                                 if (r) {
1587                                         DRM_ERROR("bad SET_RESOURCE\n");
1588                                         return -EINVAL;
1589                                 }
1590                                 base_offset = (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
1591                                 if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO)
1592                                         ib[idx+1+(i*7)+0] |= S_038000_TILE_MODE(V_038000_ARRAY_2D_TILED_THIN1);
1593                                 else if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO)
1594                                         ib[idx+1+(i*7)+0] |= S_038000_TILE_MODE(V_038000_ARRAY_1D_TILED_THIN1);
1595                                 texture = reloc->robj;
1596                                 /* tex mip base */
1597                                 r = r600_cs_packet_next_reloc(p, &reloc);
1598                                 if (r) {
1599                                         DRM_ERROR("bad SET_RESOURCE\n");
1600                                         return -EINVAL;
1601                                 }
1602                                 mip_offset = (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
1603                                 mipmap = reloc->robj;
1604                                 r = r600_check_texture_resource(p,  idx+(i*7)+1,
1605                                                                 texture, mipmap,
1606                                                                 base_offset + radeon_get_ib_value(p, idx+1+(i*7)+2),
1607                                                                 mip_offset + radeon_get_ib_value(p, idx+1+(i*7)+3),
1608                                                                 reloc->lobj.tiling_flags);
1609                                 if (r)
1610                                         return r;
1611                                 ib[idx+1+(i*7)+2] += base_offset;
1612                                 ib[idx+1+(i*7)+3] += mip_offset;
1613                                 break;
1614                         case SQ_TEX_VTX_VALID_BUFFER:
1615                                 /* vtx base */
1616                                 r = r600_cs_packet_next_reloc(p, &reloc);
1617                                 if (r) {
1618                                         DRM_ERROR("bad SET_RESOURCE\n");
1619                                         return -EINVAL;
1620                                 }
1621                                 offset = radeon_get_ib_value(p, idx+1+(i*7)+0);
1622                                 size = radeon_get_ib_value(p, idx+1+(i*7)+1) + 1;
1623                                 if (p->rdev && (size + offset) > radeon_bo_size(reloc->robj)) {
1624                                         /* force size to size of the buffer */
1625                                         dev_warn(p->dev, "vbo resource seems too big (%d) for the bo (%ld)\n",
1626                                                  size + offset, radeon_bo_size(reloc->robj));
1627                                         ib[idx+1+(i*7)+1] = radeon_bo_size(reloc->robj);
1628                                 }
1629                                 ib[idx+1+(i*7)+0] += (u32)((reloc->lobj.gpu_offset) & 0xffffffff);
1630                                 ib[idx+1+(i*7)+2] += upper_32_bits(reloc->lobj.gpu_offset) & 0xff;
1631                                 break;
1632                         case SQ_TEX_VTX_INVALID_TEXTURE:
1633                         case SQ_TEX_VTX_INVALID_BUFFER:
1634                         default:
1635                                 DRM_ERROR("bad SET_RESOURCE\n");
1636                                 return -EINVAL;
1637                         }
1638                 }
1639                 break;
1640         case PACKET3_SET_ALU_CONST:
1641                 if (track->sq_config & DX9_CONSTS) {
1642                         start_reg = (idx_value << 2) + PACKET3_SET_ALU_CONST_OFFSET;
1643                         end_reg = 4 * pkt->count + start_reg - 4;
1644                         if ((start_reg < PACKET3_SET_ALU_CONST_OFFSET) ||
1645                             (start_reg >= PACKET3_SET_ALU_CONST_END) ||
1646                             (end_reg >= PACKET3_SET_ALU_CONST_END)) {
1647                                 DRM_ERROR("bad SET_ALU_CONST\n");
1648                                 return -EINVAL;
1649                         }
1650                 }
1651                 break;
1652         case PACKET3_SET_BOOL_CONST:
1653                 start_reg = (idx_value << 2) + PACKET3_SET_BOOL_CONST_OFFSET;
1654                 end_reg = 4 * pkt->count + start_reg - 4;
1655                 if ((start_reg < PACKET3_SET_BOOL_CONST_OFFSET) ||
1656                     (start_reg >= PACKET3_SET_BOOL_CONST_END) ||
1657                     (end_reg >= PACKET3_SET_BOOL_CONST_END)) {
1658                         DRM_ERROR("bad SET_BOOL_CONST\n");
1659                         return -EINVAL;
1660                 }
1661                 break;
1662         case PACKET3_SET_LOOP_CONST:
1663                 start_reg = (idx_value << 2) + PACKET3_SET_LOOP_CONST_OFFSET;
1664                 end_reg = 4 * pkt->count + start_reg - 4;
1665                 if ((start_reg < PACKET3_SET_LOOP_CONST_OFFSET) ||
1666                     (start_reg >= PACKET3_SET_LOOP_CONST_END) ||
1667                     (end_reg >= PACKET3_SET_LOOP_CONST_END)) {
1668                         DRM_ERROR("bad SET_LOOP_CONST\n");
1669                         return -EINVAL;
1670                 }
1671                 break;
1672         case PACKET3_SET_CTL_CONST:
1673                 start_reg = (idx_value << 2) + PACKET3_SET_CTL_CONST_OFFSET;
1674                 end_reg = 4 * pkt->count + start_reg - 4;
1675                 if ((start_reg < PACKET3_SET_CTL_CONST_OFFSET) ||
1676                     (start_reg >= PACKET3_SET_CTL_CONST_END) ||
1677                     (end_reg >= PACKET3_SET_CTL_CONST_END)) {
1678                         DRM_ERROR("bad SET_CTL_CONST\n");
1679                         return -EINVAL;
1680                 }
1681                 break;
1682         case PACKET3_SET_SAMPLER:
1683                 if (pkt->count % 3) {
1684                         DRM_ERROR("bad SET_SAMPLER\n");
1685                         return -EINVAL;
1686                 }
1687                 start_reg = (idx_value << 2) + PACKET3_SET_SAMPLER_OFFSET;
1688                 end_reg = 4 * pkt->count + start_reg - 4;
1689                 if ((start_reg < PACKET3_SET_SAMPLER_OFFSET) ||
1690                     (start_reg >= PACKET3_SET_SAMPLER_END) ||
1691                     (end_reg >= PACKET3_SET_SAMPLER_END)) {
1692                         DRM_ERROR("bad SET_SAMPLER\n");
1693                         return -EINVAL;
1694                 }
1695                 break;
1696         case PACKET3_SURFACE_BASE_UPDATE:
1697                 if (p->family >= CHIP_RV770 || p->family == CHIP_R600) {
1698                         DRM_ERROR("bad SURFACE_BASE_UPDATE\n");
1699                         return -EINVAL;
1700                 }
1701                 if (pkt->count) {
1702                         DRM_ERROR("bad SURFACE_BASE_UPDATE\n");
1703                         return -EINVAL;
1704                 }
1705                 break;
1706         case PACKET3_NOP:
1707                 break;
1708         default:
1709                 DRM_ERROR("Packet3 opcode %x not supported\n", pkt->opcode);
1710                 return -EINVAL;
1711         }
1712         return 0;
1713 }
1714
1715 int r600_cs_parse(struct radeon_cs_parser *p)
1716 {
1717         struct radeon_cs_packet pkt;
1718         struct r600_cs_track *track;
1719         int r;
1720
1721         if (p->track == NULL) {
1722                 /* initialize tracker, we are in kms */
1723                 track = kzalloc(sizeof(*track), GFP_KERNEL);
1724                 if (track == NULL)
1725                         return -ENOMEM;
1726                 r600_cs_track_init(track);
1727                 if (p->rdev->family < CHIP_RV770) {
1728                         track->npipes = p->rdev->config.r600.tiling_npipes;
1729                         track->nbanks = p->rdev->config.r600.tiling_nbanks;
1730                         track->group_size = p->rdev->config.r600.tiling_group_size;
1731                 } else if (p->rdev->family <= CHIP_RV740) {
1732                         track->npipes = p->rdev->config.rv770.tiling_npipes;
1733                         track->nbanks = p->rdev->config.rv770.tiling_nbanks;
1734                         track->group_size = p->rdev->config.rv770.tiling_group_size;
1735                 }
1736                 p->track = track;
1737         }
1738         do {
1739                 r = r600_cs_packet_parse(p, &pkt, p->idx);
1740                 if (r) {
1741                         kfree(p->track);
1742                         p->track = NULL;
1743                         return r;
1744                 }
1745                 p->idx += pkt.count + 2;
1746                 switch (pkt.type) {
1747                 case PACKET_TYPE0:
1748                         r = r600_cs_parse_packet0(p, &pkt);
1749                         break;
1750                 case PACKET_TYPE2:
1751                         break;
1752                 case PACKET_TYPE3:
1753                         r = r600_packet3_check(p, &pkt);
1754                         break;
1755                 default:
1756                         DRM_ERROR("Unknown packet type %d !\n", pkt.type);
1757                         kfree(p->track);
1758                         p->track = NULL;
1759                         return -EINVAL;
1760                 }
1761                 if (r) {
1762                         kfree(p->track);
1763                         p->track = NULL;
1764                         return r;
1765                 }
1766         } while (p->idx < p->chunks[p->chunk_ib_idx].length_dw);
1767 #if 0
1768         for (r = 0; r < p->ib->length_dw; r++) {
1769                 printk(KERN_INFO "%05d  0x%08X\n", r, p->ib->ptr[r]);
1770                 mdelay(1);
1771         }
1772 #endif
1773         kfree(p->track);
1774         p->track = NULL;
1775         return 0;
1776 }
1777
1778 static int r600_cs_parser_relocs_legacy(struct radeon_cs_parser *p)
1779 {
1780         if (p->chunk_relocs_idx == -1) {
1781                 return 0;
1782         }
1783         p->relocs = kzalloc(sizeof(struct radeon_cs_reloc), GFP_KERNEL);
1784         if (p->relocs == NULL) {
1785                 return -ENOMEM;
1786         }
1787         return 0;
1788 }
1789
1790 /**
1791  * cs_parser_fini() - clean parser states
1792  * @parser:     parser structure holding parsing context.
1793  * @error:      error number
1794  *
1795  * If error is set than unvalidate buffer, otherwise just free memory
1796  * used by parsing context.
1797  **/
1798 static void r600_cs_parser_fini(struct radeon_cs_parser *parser, int error)
1799 {
1800         unsigned i;
1801
1802         kfree(parser->relocs);
1803         for (i = 0; i < parser->nchunks; i++) {
1804                 kfree(parser->chunks[i].kdata);
1805                 kfree(parser->chunks[i].kpage[0]);
1806                 kfree(parser->chunks[i].kpage[1]);
1807         }
1808         kfree(parser->chunks);
1809         kfree(parser->chunks_array);
1810 }
1811
1812 int r600_cs_legacy(struct drm_device *dev, void *data, struct drm_file *filp,
1813                         unsigned family, u32 *ib, int *l)
1814 {
1815         struct radeon_cs_parser parser;
1816         struct radeon_cs_chunk *ib_chunk;
1817         struct radeon_ib fake_ib;
1818         struct r600_cs_track *track;
1819         int r;
1820
1821         /* initialize tracker */
1822         track = kzalloc(sizeof(*track), GFP_KERNEL);
1823         if (track == NULL)
1824                 return -ENOMEM;
1825         r600_cs_track_init(track);
1826         r600_cs_legacy_get_tiling_conf(dev, &track->npipes, &track->nbanks, &track->group_size);
1827         /* initialize parser */
1828         memset(&parser, 0, sizeof(struct radeon_cs_parser));
1829         parser.filp = filp;
1830         parser.dev = &dev->pdev->dev;
1831         parser.rdev = NULL;
1832         parser.family = family;
1833         parser.ib = &fake_ib;
1834         parser.track = track;
1835         fake_ib.ptr = ib;
1836         r = radeon_cs_parser_init(&parser, data);
1837         if (r) {
1838                 DRM_ERROR("Failed to initialize parser !\n");
1839                 r600_cs_parser_fini(&parser, r);
1840                 return r;
1841         }
1842         r = r600_cs_parser_relocs_legacy(&parser);
1843         if (r) {
1844                 DRM_ERROR("Failed to parse relocation !\n");
1845                 r600_cs_parser_fini(&parser, r);
1846                 return r;
1847         }
1848         /* Copy the packet into the IB, the parser will read from the
1849          * input memory (cached) and write to the IB (which can be
1850          * uncached). */
1851         ib_chunk = &parser.chunks[parser.chunk_ib_idx];
1852         parser.ib->length_dw = ib_chunk->length_dw;
1853         *l = parser.ib->length_dw;
1854         r = r600_cs_parse(&parser);
1855         if (r) {
1856                 DRM_ERROR("Invalid command stream !\n");
1857                 r600_cs_parser_fini(&parser, r);
1858                 return r;
1859         }
1860         r = radeon_cs_finish_pages(&parser);
1861         if (r) {
1862                 DRM_ERROR("Invalid command stream !\n");
1863                 r600_cs_parser_fini(&parser, r);
1864                 return r;
1865         }
1866         r600_cs_parser_fini(&parser, r);
1867         return r;
1868 }
1869
1870 void r600_cs_legacy_init(void)
1871 {
1872         r600_cs_packet_next_reloc = &r600_cs_packet_next_reloc_nomm;
1873 }