video: tegra: host: Fix error case memory leaks
[linux-2.6.git] / drivers / video / tegra / host / gr3d / gr3d_t30.c
1 /*
2  * drivers/video/tegra/host/gr3d/gr3d_t30.c
3  *
4  * Tegra Graphics Host 3D for Tegra3
5  *
6  * Copyright (c) 2011-2012 NVIDIA Corporation.
7  *
8  * This program is free software; you can redistribute it and/or modify it
9  * under the terms and conditions of the GNU General Public License,
10  * version 2, as published by the Free Software Foundation.
11  *
12  * This program is distributed in the hope it will be useful, but WITHOUT
13  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
15  * more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
19  */
20
21 #include "nvhost_hwctx.h"
22 #include "nvhost_channel.h"
23 #include "nvhost_cdma.h"
24 #include "dev.h"
25 #include "host1x/host1x_hardware.h"
26 #include "host1x/host1x_syncpt.h"
27 #include "gr3d.h"
28
29 #include <mach/gpufuse.h>
30 #include <mach/hardware.h>
31 #include <linux/slab.h>
32
33 /*  99 > 2, which makes kernel panic if register set is incorrect */
34 static int register_sets = 99;
35
36 static const struct hwctx_reginfo ctxsave_regs_3d_global[] = {
37         HWCTX_REGINFO(0xe00,    4, DIRECT),
38         HWCTX_REGINFO(0xe05,   30, DIRECT),
39         HWCTX_REGINFO(0xe25,    2, DIRECT),
40         HWCTX_REGINFO(0xe28,    2, DIRECT),
41         HWCTX_REGINFO(0xe30,   16, DIRECT),
42         HWCTX_REGINFO(0x001,    2, DIRECT),
43         HWCTX_REGINFO(0x00c,   10, DIRECT),
44         HWCTX_REGINFO(0x100,   34, DIRECT),
45         HWCTX_REGINFO(0x124,    2, DIRECT),
46         HWCTX_REGINFO(0x200,    5, DIRECT),
47         HWCTX_REGINFO(0x205, 1024, INDIRECT),
48         HWCTX_REGINFO(0x207, 1024, INDIRECT),
49         HWCTX_REGINFO(0x209,    1, DIRECT),
50         HWCTX_REGINFO(0x300,   64, DIRECT),
51         HWCTX_REGINFO(0x343,   25, DIRECT),
52         HWCTX_REGINFO(0x363,    2, DIRECT),
53         HWCTX_REGINFO(0x400,   16, DIRECT),
54         HWCTX_REGINFO(0x411,    1, DIRECT),
55         HWCTX_REGINFO(0x412,    1, DIRECT),
56         HWCTX_REGINFO(0x500,    4, DIRECT),
57         HWCTX_REGINFO(0x520,   32, DIRECT),
58         HWCTX_REGINFO(0x540,   64, INDIRECT),
59         HWCTX_REGINFO(0x600,   16, INDIRECT_4X),
60         HWCTX_REGINFO(0x603,  128, INDIRECT),
61         HWCTX_REGINFO(0x608,    4, DIRECT),
62         HWCTX_REGINFO(0x60e,    1, DIRECT),
63         HWCTX_REGINFO(0x700,   64, INDIRECT),
64         HWCTX_REGINFO(0x710,   50, DIRECT),
65         HWCTX_REGINFO(0x750,   16, DIRECT),
66         HWCTX_REGINFO(0x800,   16, INDIRECT_4X),
67         HWCTX_REGINFO(0x803,  512, INDIRECT),
68         HWCTX_REGINFO(0x805,   64, INDIRECT),
69         HWCTX_REGINFO(0x820,   32, DIRECT),
70         HWCTX_REGINFO(0x900,   64, INDIRECT),
71         HWCTX_REGINFO(0x902,    2, DIRECT),
72         HWCTX_REGINFO(0x90a,    1, DIRECT),
73         HWCTX_REGINFO(0xa02,   10, DIRECT),
74         HWCTX_REGINFO(0xb04,    1, DIRECT),
75         HWCTX_REGINFO(0xb06,   13, DIRECT),
76 };
77
78 static const struct hwctx_reginfo ctxsave_regs_3d_perset[] = {
79         HWCTX_REGINFO(0xe04,    1, DIRECT),
80         HWCTX_REGINFO(0xe2a,    1, DIRECT),
81         HWCTX_REGINFO(0x413,    1, DIRECT),
82         HWCTX_REGINFO(0x90b,    1, DIRECT),
83         HWCTX_REGINFO(0xe41,    1, DIRECT),
84 };
85
86 static unsigned int restore_set1_offset;
87
88 #define SAVE_BEGIN_V1_SIZE (1 + RESTORE_BEGIN_SIZE)
89 #define SAVE_DIRECT_V1_SIZE (4 + RESTORE_DIRECT_SIZE)
90 #define SAVE_INDIRECT_V1_SIZE (6 + RESTORE_INDIRECT_SIZE)
91 #define SAVE_END_V1_SIZE (9 + RESTORE_END_SIZE)
92 #define SAVE_INCRS 3
93 #define SAVE_THRESH_OFFSET 0
94 #define RESTORE_BEGIN_SIZE 4
95 #define RESTORE_DIRECT_SIZE 1
96 #define RESTORE_INDIRECT_SIZE 2
97 #define RESTORE_END_SIZE 1
98
99 struct save_info {
100         u32 *ptr;
101         unsigned int save_count;
102         unsigned int restore_count;
103         unsigned int save_incrs;
104         unsigned int restore_incrs;
105 };
106
107 /*** v1 saver ***/
108
109 static void save_push_v1(struct nvhost_hwctx *nctx, struct nvhost_cdma *cdma)
110 {
111         struct host1x_hwctx *ctx = to_host1x_hwctx(nctx);
112         struct host1x_hwctx_handler *p = host1x_hwctx_handler(ctx);
113
114         /* wait for 3d idle */
115         nvhost_cdma_push(cdma,
116                         nvhost_opcode_setclass(NV_GRAPHICS_3D_CLASS_ID, 0, 0),
117                         nvhost_opcode_imm_incr_syncpt(NV_SYNCPT_OP_DONE,
118                                         p->syncpt));
119         nvhost_cdma_push(cdma,
120                         nvhost_opcode_setclass(NV_HOST1X_CLASS_ID,
121                                         NV_CLASS_HOST_WAIT_SYNCPT_BASE, 1),
122                         nvhost_class_host_wait_syncpt_base(p->syncpt,
123                                                         p->waitbase, 1));
124         /* back to 3d */
125         nvhost_cdma_push(cdma,
126                         nvhost_opcode_setclass(NV_GRAPHICS_3D_CLASS_ID, 0, 0),
127                         NVHOST_OPCODE_NOOP);
128         /* set register set 0 and 1 register read memory output addresses,
129            and send their reads to memory */
130         if (register_sets == 2) {
131                 nvhost_cdma_push(cdma,
132                         nvhost_opcode_imm(AR3D_GSHIM_WRITE_MASK, 2),
133                         nvhost_opcode_imm(AR3D_GLOBAL_MEMORY_OUTPUT_READS,
134                                         1));
135                 nvhost_cdma_push(cdma,
136                                 nvhost_opcode_nonincr(0x904, 1),
137                                 ctx->restore_phys + restore_set1_offset * 4);
138         }
139         nvhost_cdma_push(cdma,
140                 nvhost_opcode_imm(AR3D_GSHIM_WRITE_MASK, 1),
141                 nvhost_opcode_imm(AR3D_GLOBAL_MEMORY_OUTPUT_READS, 1));
142         nvhost_cdma_push(cdma,
143                 nvhost_opcode_nonincr(AR3D_DW_MEMORY_OUTPUT_ADDRESS, 1),
144                 ctx->restore_phys);
145         /* gather the save buffer */
146         nvhost_cdma_push_gather(cdma,
147                         nvhost_get_host(nctx->channel->dev)->nvmap,
148                         p->save_buf,
149                         0,
150                         nvhost_opcode_gather(p->save_size),
151                         p->save_phys);
152 }
153
154 static void __init save_begin_v1(struct host1x_hwctx_handler *p, u32 *ptr)
155 {
156         ptr[0] = nvhost_opcode_nonincr(AR3D_DW_MEMORY_OUTPUT_DATA,
157                         RESTORE_BEGIN_SIZE);
158         nvhost_3dctx_restore_begin(p, ptr + 1);
159         ptr += RESTORE_BEGIN_SIZE;
160 }
161
162 static void __init save_direct_v1(u32 *ptr, u32 start_reg, u32 count)
163 {
164         ptr[0] = nvhost_opcode_setclass(NV_GRAPHICS_3D_CLASS_ID,
165                         AR3D_DW_MEMORY_OUTPUT_DATA, 1);
166         nvhost_3dctx_restore_direct(ptr + 1, start_reg, count);
167         ptr += RESTORE_DIRECT_SIZE;
168         ptr[1] = nvhost_opcode_setclass(NV_HOST1X_CLASS_ID,
169                                         NV_CLASS_HOST_INDOFF, 1);
170         ptr[2] = nvhost_class_host_indoff_reg_read(NV_HOST_MODULE_GR3D,
171                                                 start_reg, true);
172         /* TODO could do this in the setclass if count < 6 */
173         ptr[3] = nvhost_opcode_nonincr(NV_CLASS_HOST_INDDATA, count);
174 }
175
176 static void __init save_indirect_v1(u32 *ptr, u32 offset_reg, u32 offset,
177                         u32 data_reg, u32 count)
178 {
179         ptr[0] = nvhost_opcode_setclass(NV_GRAPHICS_3D_CLASS_ID, 0, 0);
180         ptr[1] = nvhost_opcode_nonincr(AR3D_DW_MEMORY_OUTPUT_DATA,
181                         RESTORE_INDIRECT_SIZE);
182         nvhost_3dctx_restore_indirect(ptr + 2, offset_reg, offset, data_reg,
183                         count);
184         ptr += RESTORE_INDIRECT_SIZE;
185         ptr[2] = nvhost_opcode_imm(offset_reg, offset);
186         ptr[3] = nvhost_opcode_setclass(NV_HOST1X_CLASS_ID,
187                                         NV_CLASS_HOST_INDOFF, 1);
188         ptr[4] = nvhost_class_host_indoff_reg_read(NV_HOST_MODULE_GR3D,
189                                                 data_reg, false);
190         ptr[5] = nvhost_opcode_nonincr(NV_CLASS_HOST_INDDATA, count);
191 }
192
193 static void __init save_end_v1(struct host1x_hwctx_handler *p, u32 *ptr)
194 {
195         /* write end of restore buffer */
196         ptr[0] = nvhost_opcode_setclass(NV_GRAPHICS_3D_CLASS_ID,
197                         AR3D_DW_MEMORY_OUTPUT_DATA, 1);
198         nvhost_3dctx_restore_end(p, ptr + 1);
199         ptr += RESTORE_END_SIZE;
200         /* reset to dual reg if necessary */
201         ptr[1] = nvhost_opcode_imm(AR3D_GSHIM_WRITE_MASK,
202                         (1 << register_sets) - 1);
203         /* op_done syncpt incr to flush FDC */
204         ptr[2] = nvhost_opcode_imm_incr_syncpt(NV_SYNCPT_OP_DONE, p->syncpt);
205         /* host wait for that syncpt incr, and advance the wait base */
206         ptr[3] = nvhost_opcode_setclass(NV_HOST1X_CLASS_ID,
207                         NV_CLASS_HOST_WAIT_SYNCPT_BASE,
208                         nvhost_mask2(
209                                         NV_CLASS_HOST_WAIT_SYNCPT_BASE,
210                                         NV_CLASS_HOST_INCR_SYNCPT_BASE));
211         ptr[4] = nvhost_class_host_wait_syncpt_base(p->syncpt,
212                                 p->waitbase, p->save_incrs - 1);
213         ptr[5] = nvhost_class_host_incr_syncpt_base(p->waitbase,
214                         p->save_incrs);
215         /* set class back to 3d */
216         ptr[6] = nvhost_opcode_setclass(NV_GRAPHICS_3D_CLASS_ID, 0, 0);
217         /* send reg reads back to host */
218         ptr[7] = nvhost_opcode_imm(AR3D_GLOBAL_MEMORY_OUTPUT_READS, 0);
219         /* final syncpt increment to release waiters */
220         ptr[8] = nvhost_opcode_imm(0, p->syncpt);
221 }
222
223 /*** save ***/
224
225
226
227 static void __init setup_save_regs(struct save_info *info,
228                         const struct hwctx_reginfo *regs,
229                         unsigned int nr_regs)
230 {
231         const struct hwctx_reginfo *rend = regs + nr_regs;
232         u32 *ptr = info->ptr;
233         unsigned int save_count = info->save_count;
234         unsigned int restore_count = info->restore_count;
235
236         for ( ; regs != rend; ++regs) {
237                 u32 offset = regs->offset;
238                 u32 count = regs->count;
239                 u32 indoff = offset + 1;
240                 switch (regs->type) {
241                 case HWCTX_REGINFO_DIRECT:
242                         if (ptr) {
243                                 save_direct_v1(ptr, offset, count);
244                                 ptr += SAVE_DIRECT_V1_SIZE;
245                         }
246                         save_count += SAVE_DIRECT_V1_SIZE;
247                         restore_count += RESTORE_DIRECT_SIZE;
248                         break;
249                 case HWCTX_REGINFO_INDIRECT_4X:
250                         ++indoff;
251                         /* fall through */
252                 case HWCTX_REGINFO_INDIRECT:
253                         if (ptr) {
254                                 save_indirect_v1(ptr, offset, 0,
255                                                 indoff, count);
256                                 ptr += SAVE_INDIRECT_V1_SIZE;
257                         }
258                         save_count += SAVE_INDIRECT_V1_SIZE;
259                         restore_count += RESTORE_INDIRECT_SIZE;
260                         break;
261                 }
262                 if (ptr) {
263                         /* SAVE cases only: reserve room for incoming data */
264                         u32 k = 0;
265                         /*
266                          * Create a signature pattern for indirect data (which
267                          * will be overwritten by true incoming data) for
268                          * better deducing where we are in a long command
269                          * sequence, when given only a FIFO snapshot for debug
270                          * purposes.
271                         */
272                         for (k = 0; k < count; k++)
273                                 *(ptr + k) = 0xd000d000 | (offset << 16) | k;
274                         ptr += count;
275                 }
276                 save_count += count;
277                 restore_count += count;
278         }
279
280         info->ptr = ptr;
281         info->save_count = save_count;
282         info->restore_count = restore_count;
283 }
284
285 static void __init switch_gpu(struct save_info *info,
286                         unsigned int save_src_set,
287                         u32 save_dest_sets,
288                         u32 restore_dest_sets)
289 {
290         if (info->ptr) {
291                 info->ptr[0] = nvhost_opcode_setclass(
292                                 NV_GRAPHICS_3D_CLASS_ID,
293                                 AR3D_DW_MEMORY_OUTPUT_DATA, 1);
294                 info->ptr[1] = nvhost_opcode_imm(AR3D_GSHIM_WRITE_MASK,
295                                 restore_dest_sets);
296                 info->ptr[2] = nvhost_opcode_imm(AR3D_GSHIM_WRITE_MASK,
297                                 save_dest_sets);
298                 info->ptr[3] = nvhost_opcode_imm(AR3D_GSHIM_READ_SELECT,
299                                 save_src_set);
300                 info->ptr += 4;
301         }
302         info->save_count += 4;
303         info->restore_count += 1;
304 }
305
306 static void __init setup_save(struct host1x_hwctx_handler *p, u32 *ptr)
307 {
308         struct save_info info = {
309                 ptr,
310                 SAVE_BEGIN_V1_SIZE,
311                 RESTORE_BEGIN_SIZE,
312                 SAVE_INCRS,
313                 1
314         };
315         int save_end_size = SAVE_END_V1_SIZE;
316
317         BUG_ON(register_sets > 2);
318
319         if (info.ptr) {
320                 save_begin_v1(p, info.ptr);
321                 info.ptr += SAVE_BEGIN_V1_SIZE;
322         }
323
324         /* read from set0, write cmds through set0, restore to set0 and 1 */
325         if (register_sets == 2)
326                 switch_gpu(&info, 0, 1, 3);
327
328         /* save regs that are common to both sets */
329         setup_save_regs(&info,
330                         ctxsave_regs_3d_global,
331                         ARRAY_SIZE(ctxsave_regs_3d_global));
332
333         /* read from set 0, write cmds through set0, restore to set0 */
334         if (register_sets == 2)
335                 switch_gpu(&info, 0, 1, 1);
336
337         /* save set 0 specific regs */
338         setup_save_regs(&info,
339                         ctxsave_regs_3d_perset,
340                         ARRAY_SIZE(ctxsave_regs_3d_perset));
341
342         if (register_sets == 2) {
343                 /* read from set1, write cmds through set1, restore to set1 */
344                 switch_gpu(&info, 1, 2, 2);
345                 /* note offset at which set 1 restore starts */
346                 restore_set1_offset = info.restore_count;
347                 /* save set 1 specific regs */
348                 setup_save_regs(&info,
349                                 ctxsave_regs_3d_perset,
350                                 ARRAY_SIZE(ctxsave_regs_3d_perset));
351         }
352
353         /* read from set0, write cmds through set1, restore to set0 and 1 */
354         if (register_sets == 2)
355                 switch_gpu(&info, 0, 2, 3);
356
357         if (info.ptr) {
358                 save_end_v1(p, info.ptr);
359                 info.ptr += SAVE_END_V1_SIZE;
360         }
361
362         wmb();
363
364         p->save_size = info.save_count + save_end_size;
365         p->restore_size = info.restore_count + RESTORE_END_SIZE;
366         p->save_incrs = info.save_incrs;
367         p->save_thresh = p->save_incrs - SAVE_THRESH_OFFSET;
368         p->restore_incrs = info.restore_incrs;
369 }
370
371
372 /*** ctx3d ***/
373
374 static struct nvhost_hwctx *ctx3d_alloc_v1(struct nvhost_hwctx_handler *h,
375                 struct nvhost_channel *ch)
376 {
377         struct host1x_hwctx_handler *p = to_host1x_hwctx_handler(h);
378         struct host1x_hwctx *ctx = nvhost_3dctx_alloc_common(p, ch, false);
379
380         if (ctx)
381                 return &ctx->hwctx;
382         else
383                 return NULL;
384 }
385
386 struct nvhost_hwctx_handler *nvhost_gr3d_t30_ctxhandler_init(
387                 u32 syncpt, u32 waitbase,
388                 struct nvhost_channel *ch)
389 {
390         struct nvmap_client *nvmap;
391         u32 *save_ptr;
392         struct host1x_hwctx_handler *p;
393
394         p = kmalloc(sizeof(*p), GFP_KERNEL);
395         if (!p)
396                 return NULL;
397
398         nvmap = nvhost_get_host(ch->dev)->nvmap;
399
400         register_sets = tegra_gpu_register_sets();
401         BUG_ON(register_sets == 0 || register_sets > 2);
402
403         p->syncpt = syncpt;
404         p->waitbase = waitbase;
405
406         setup_save(p, NULL);
407
408         p->save_buf = nvmap_alloc(nvmap, p->save_size * 4, 32,
409                                 NVMAP_HANDLE_WRITE_COMBINE, 0);
410         if (IS_ERR(p->save_buf)) {
411                 p->save_buf = NULL;
412                 return NULL;
413         }
414
415         p->save_slots = 6;
416         if (register_sets == 2)
417                 p->save_slots += 2;
418
419         save_ptr = nvmap_mmap(p->save_buf);
420         if (!save_ptr) {
421                 nvmap_free(nvmap, p->save_buf);
422                 p->save_buf = NULL;
423                 return NULL;
424         }
425
426         p->save_phys = nvmap_pin(nvmap, p->save_buf);
427
428         setup_save(p, save_ptr);
429
430         nvmap_munmap(p->save_buf, save_ptr);
431
432         p->h.alloc = ctx3d_alloc_v1;
433         p->h.save_push = save_push_v1;
434         p->h.save_service = NULL;
435         p->h.get = nvhost_3dctx_get;
436         p->h.put = nvhost_3dctx_put;
437
438         return &p->h;
439 }