video: tegra: host: Fix error handling
[linux-3.10.git] / drivers / video / tegra / host / gk20a / channel_gk20a.c
1 /*
2  * drivers/video/tegra/host/gk20a/channel_gk20a.c
3  *
4  * GK20A Graphics channel
5  *
6  * Copyright (c) 2011-2013, NVIDIA CORPORATION.  All rights reserved.
7  *
8  * This program is free software; you can redistribute it and/or modify it
9  * under the terms and conditions of the GNU General Public License,
10  * version 2, as published by the Free Software Foundation.
11  *
12  * This program is distributed in the hope it will be useful, but WITHOUT
13  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
15  * more details.
16  *
17  * You should have received a copy of the GNU General Public License along with
18  * this program; if not, write to the Free Software Foundation, Inc.,
19  * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
20  */
21
22 #include <linux/list.h>
23 #include <linux/delay.h>
24 #include <linux/highmem.h> /* need for nvmap.h*/
25 #include <trace/events/nvhost.h>
26 #include <linux/scatterlist.h>
27
28
29 #include "../dev.h"
30 #include "../nvhost_as.h"
31 #include "debug.h"
32
33 #include "gk20a.h"
34
35 #include "hw_ram_gk20a.h"
36 #include "hw_fifo_gk20a.h"
37 #include "hw_pbdma_gk20a.h"
38 #include "hw_ccsr_gk20a.h"
39 #include "hw_ltc_gk20a.h"
40 #include "chip_support.h"
41
42 #define NVMAP_HANDLE_PARAM_SIZE 1
43
44 static struct channel_gk20a *acquire_unused_channel(struct fifo_gk20a *f);
45 static void release_used_channel(struct fifo_gk20a *f, struct channel_gk20a *c);
46
47 static int alloc_priv_cmdbuf(struct channel_gk20a *c, u32 size,
48                              struct priv_cmd_entry **entry);
49 static void free_priv_cmdbuf(struct channel_gk20a *c,
50                              struct priv_cmd_entry *e);
51 static void recycle_priv_cmdbuf(struct channel_gk20a *c);
52
53 static int channel_gk20a_alloc_priv_cmdbuf(struct channel_gk20a *c);
54 static void channel_gk20a_free_priv_cmdbuf(struct channel_gk20a *c);
55
56 static int channel_gk20a_commit_userd(struct channel_gk20a *c);
57 static int channel_gk20a_setup_userd(struct channel_gk20a *c);
58 static int channel_gk20a_setup_ramfc(struct channel_gk20a *c,
59                         u64 gpfifo_base, u32 gpfifo_entries);
60
61 static void channel_gk20a_bind(struct channel_gk20a *ch_gk20a);
62 static void channel_gk20a_unbind(struct channel_gk20a *ch_gk20a);
63
64 static int channel_gk20a_alloc_inst(struct gk20a *g,
65                                 struct channel_gk20a *ch);
66 static void channel_gk20a_free_inst(struct gk20a *g,
67                                 struct channel_gk20a *ch);
68
69 static int channel_gk20a_update_runlist(struct channel_gk20a *c,
70                                         bool add);
71
72 static struct channel_gk20a *acquire_unused_channel(struct fifo_gk20a *f)
73 {
74         struct channel_gk20a *ch = NULL;
75         int chid;
76
77         mutex_lock(&f->ch_inuse_mutex);
78         for (chid = 0; chid < f->num_channels; chid++) {
79                 if (!f->channel[chid].in_use) {
80                         f->channel[chid].in_use = true;
81                         ch = &f->channel[chid];
82                         break;
83                 }
84         }
85         mutex_unlock(&f->ch_inuse_mutex);
86
87         return ch;
88 }
89
90 static void release_used_channel(struct fifo_gk20a *f, struct channel_gk20a *c)
91 {
92         mutex_lock(&f->ch_inuse_mutex);
93         f->channel[c->hw_chid].in_use = false;
94         mutex_unlock(&f->ch_inuse_mutex);
95 }
96
97 int channel_gk20a_commit_va(struct channel_gk20a *c)
98 {
99         phys_addr_t addr;
100         u32 addr_lo;
101         u32 addr_hi;
102         void *inst_ptr;
103
104         nvhost_dbg_fn("");
105
106         inst_ptr = nvhost_memmgr_mmap(c->inst_block.mem.ref);
107         if (!inst_ptr)
108                 return -ENOMEM;
109
110         addr = sg_phys(c->vm->pdes.sgt->sgl);
111         addr_lo = u64_lo32(addr) >> 12;
112         addr_hi = u64_hi32(addr);
113
114         nvhost_dbg_info("pde pa=0x%llx addr_lo=0x%x addr_hi=0x%x",
115                    (u64)addr, addr_lo, addr_hi);
116
117         mem_wr32(inst_ptr, ram_in_page_dir_base_lo_w(),
118                 ram_in_page_dir_base_target_vid_mem_f() |
119                 ram_in_page_dir_base_vol_true_f() |
120                 ram_in_page_dir_base_lo_f(addr_lo));
121
122         mem_wr32(inst_ptr, ram_in_page_dir_base_hi_w(),
123                 ram_in_page_dir_base_hi_f(addr_hi));
124
125         mem_wr32(inst_ptr, ram_in_adr_limit_lo_w(),
126                  u64_lo32(c->vm->va_limit) | 0xFFF);
127
128         mem_wr32(inst_ptr, ram_in_adr_limit_hi_w(),
129                 ram_in_adr_limit_hi_f(u64_hi32(c->vm->va_limit)));
130
131         nvhost_memmgr_munmap(c->inst_block.mem.ref, inst_ptr);
132
133         gk20a_mm_l2_invalidate(c->g);
134
135         return 0;
136 }
137
138 static int channel_gk20a_commit_userd(struct channel_gk20a *c)
139 {
140         u32 addr_lo;
141         u32 addr_hi;
142         void *inst_ptr;
143
144         nvhost_dbg_fn("");
145
146         inst_ptr = nvhost_memmgr_mmap(c->inst_block.mem.ref);
147         if (!inst_ptr)
148                 return -ENOMEM;
149
150         addr_lo = u64_lo32(c->userd_cpu_pa >> ram_userd_base_shift_v());
151         addr_hi = u64_hi32(c->userd_cpu_pa);
152
153         nvhost_dbg_info("channel %d : set ramfc userd 0x%16llx",
154                 c->hw_chid, (u64)c->userd_cpu_pa);
155
156         mem_wr32(inst_ptr, ram_in_ramfc_w() + ram_fc_userd_w(),
157                  pbdma_userd_target_vid_mem_f() |
158                  pbdma_userd_addr_f(addr_lo));
159
160         mem_wr32(inst_ptr, ram_in_ramfc_w() + ram_fc_userd_hi_w(),
161                  pbdma_userd_target_vid_mem_f() |
162                  pbdma_userd_hi_addr_f(addr_hi));
163
164         nvhost_memmgr_munmap(c->inst_block.mem.ref, inst_ptr);
165
166         gk20a_mm_l2_invalidate(c->g);
167
168         return 0;
169 }
170
171 static int channel_gk20a_setup_ramfc(struct channel_gk20a *c,
172                                 u64 gpfifo_base, u32 gpfifo_entries)
173 {
174         void *inst_ptr;
175
176         nvhost_dbg_fn("");
177
178         inst_ptr = nvhost_memmgr_mmap(c->inst_block.mem.ref);
179         if (!inst_ptr)
180                 return -ENOMEM;
181
182         memset(inst_ptr, 0, ram_fc_size_val_v());
183
184         mem_wr32(inst_ptr, ram_fc_gp_base_w(),
185                 pbdma_gp_base_offset_f(
186                 u64_lo32(gpfifo_base >> pbdma_gp_base_rsvd_s())));
187
188         mem_wr32(inst_ptr, ram_fc_gp_base_hi_w(),
189                 pbdma_gp_base_hi_offset_f(u64_hi32(gpfifo_base)) |
190                 pbdma_gp_base_hi_limit2_f(ilog2(gpfifo_entries)));
191
192         mem_wr32(inst_ptr, ram_fc_signature_w(),
193                  pbdma_signature_hw_valid_f() | pbdma_signature_sw_zero_f());
194
195         mem_wr32(inst_ptr, ram_fc_formats_w(),
196                 pbdma_formats_gp_fermi0_f() |
197                 pbdma_formats_pb_fermi1_f() |
198                 pbdma_formats_mp_fermi0_f());
199
200         mem_wr32(inst_ptr, ram_fc_pb_header_w(),
201                 pbdma_pb_header_priv_user_f() |
202                 pbdma_pb_header_method_zero_f() |
203                 pbdma_pb_header_subchannel_zero_f() |
204                 pbdma_pb_header_level_main_f() |
205                 pbdma_pb_header_first_true_f() |
206                 pbdma_pb_header_type_inc_f());
207
208         mem_wr32(inst_ptr, ram_fc_subdevice_w(),
209                 pbdma_subdevice_id_f(1) |
210                 pbdma_subdevice_status_active_f() |
211                 pbdma_subdevice_channel_dma_enable_f());
212
213         mem_wr32(inst_ptr, ram_fc_target_w(), pbdma_target_engine_sw_f());
214
215         mem_wr32(inst_ptr, ram_fc_acquire_w(),
216                 pbdma_acquire_retry_man_2_f() |
217                 pbdma_acquire_retry_exp_2_f() |
218                 pbdma_acquire_timeout_exp_max_f() |
219                 pbdma_acquire_timeout_man_max_f() |
220                 pbdma_acquire_timeout_en_disable_f());
221
222         mem_wr32(inst_ptr, ram_fc_eng_timeslice_w(),
223                 fifo_eng_timeslice_timeout_128_f() |
224                 fifo_eng_timeslice_timescale_3_f() |
225                 fifo_eng_timeslice_enable_true_f());
226
227         mem_wr32(inst_ptr, ram_fc_pb_timeslice_w(),
228                 fifo_pb_timeslice_timeout_16_f() |
229                 fifo_pb_timeslice_timescale_0_f() |
230                 fifo_pb_timeslice_enable_true_f());
231
232         mem_wr32(inst_ptr, ram_fc_chid_w(), ram_fc_chid_f(c->hw_chid));
233
234         /* TBD: alwasy priv mode? */
235         mem_wr32(inst_ptr, ram_fc_hce_ctrl_w(),
236                  pbdma_hce_ctrl_hce_priv_mode_yes_f());
237
238         nvhost_memmgr_munmap(c->inst_block.mem.ref, inst_ptr);
239
240         gk20a_mm_l2_invalidate(c->g);
241
242         return 0;
243 }
244
245 static int channel_gk20a_setup_userd(struct channel_gk20a *c)
246 {
247         BUG_ON(!c->userd_cpu_va);
248
249         nvhost_dbg_fn("");
250
251         mem_wr32(c->userd_cpu_va, ram_userd_put_w(), 0);
252         mem_wr32(c->userd_cpu_va, ram_userd_get_w(), 0);
253         mem_wr32(c->userd_cpu_va, ram_userd_ref_w(), 0);
254         mem_wr32(c->userd_cpu_va, ram_userd_put_hi_w(), 0);
255         mem_wr32(c->userd_cpu_va, ram_userd_ref_threshold_w(), 0);
256         mem_wr32(c->userd_cpu_va, ram_userd_gp_top_level_get_w(), 0);
257         mem_wr32(c->userd_cpu_va, ram_userd_gp_top_level_get_hi_w(), 0);
258         mem_wr32(c->userd_cpu_va, ram_userd_get_hi_w(), 0);
259         mem_wr32(c->userd_cpu_va, ram_userd_gp_get_w(), 0);
260         mem_wr32(c->userd_cpu_va, ram_userd_gp_put_w(), 0);
261
262         gk20a_mm_l2_invalidate(c->g);
263
264         return 0;
265 }
266
267 static void channel_gk20a_bind(struct channel_gk20a *ch_gk20a)
268 {
269         struct gk20a *g = get_gk20a(ch_gk20a->ch->dev);
270         struct fifo_gk20a *f = &g->fifo;
271         struct fifo_engine_info_gk20a *engine_info =
272                 f->engine_info + ENGINE_GR_GK20A;
273
274         u32 inst_ptr = sg_phys(ch_gk20a->inst_block.mem.sgt->sgl)
275                 >> ram_in_base_shift_v();
276
277         nvhost_dbg_info("bind channel %d inst ptr 0x%08x",
278                 ch_gk20a->hw_chid, inst_ptr);
279
280         ch_gk20a->bound = true;
281
282         gk20a_writel(g, ccsr_channel_r(ch_gk20a->hw_chid),
283                 (gk20a_readl(g, ccsr_channel_r(ch_gk20a->hw_chid)) &
284                  ~ccsr_channel_runlist_f(~0)) |
285                  ccsr_channel_runlist_f(engine_info->runlist_id));
286
287         gk20a_writel(g, ccsr_channel_inst_r(ch_gk20a->hw_chid),
288                 ccsr_channel_inst_ptr_f(inst_ptr) |
289                 ccsr_channel_inst_target_vid_mem_f() |
290                 ccsr_channel_inst_bind_true_f());
291
292         gk20a_writel(g, ccsr_channel_r(ch_gk20a->hw_chid),
293                 (gk20a_readl(g, ccsr_channel_r(ch_gk20a->hw_chid)) &
294                  ~ccsr_channel_enable_set_f(~0)) |
295                  ccsr_channel_enable_set_true_f());
296 }
297
298 static void channel_gk20a_unbind(struct channel_gk20a *ch_gk20a)
299 {
300         struct gk20a *g = get_gk20a(ch_gk20a->ch->dev);
301
302         nvhost_dbg_fn("");
303
304         if (ch_gk20a->bound)
305                 gk20a_writel(g, ccsr_channel_inst_r(ch_gk20a->hw_chid),
306                         ccsr_channel_inst_ptr_f(0) |
307                         ccsr_channel_inst_bind_false_f());
308
309         ch_gk20a->bound = false;
310 }
311
312 static int channel_gk20a_alloc_inst(struct gk20a *g,
313                                 struct channel_gk20a *ch)
314 {
315         struct mem_mgr *memmgr = mem_mgr_from_g(g);
316
317         nvhost_dbg_fn("");
318
319         ch->inst_block.mem.ref =
320                 nvhost_memmgr_alloc(memmgr, ram_in_alloc_size_v(),
321                                     DEFAULT_ALLOC_ALIGNMENT,
322                                     DEFAULT_ALLOC_FLAGS,
323                                     0);
324
325         if (IS_ERR(ch->inst_block.mem.ref)) {
326                 ch->inst_block.mem.ref = 0;
327                 goto clean_up;
328         }
329
330         ch->inst_block.mem.sgt =
331                 nvhost_memmgr_sg_table(memmgr, ch->inst_block.mem.ref);
332
333         /* IS_ERR throws a warning here (expecting void *) */
334         if (IS_ERR(ch->inst_block.mem.sgt)) {
335                 ch->inst_block.mem.sgt = NULL;
336                 goto clean_up;
337         }
338
339         nvhost_dbg_info("channel %d inst block physical addr: 0x%16llx",
340                 ch->hw_chid, (u64)sg_phys(ch->inst_block.mem.sgt->sgl));
341
342         ch->inst_block.mem.size = ram_in_alloc_size_v();
343
344         nvhost_dbg_fn("done");
345         return 0;
346
347 clean_up:
348         nvhost_dbg(dbg_fn | dbg_err, "fail");
349         channel_gk20a_free_inst(g, ch);
350         return -ENOMEM;
351 }
352
353 static void channel_gk20a_free_inst(struct gk20a *g,
354                                 struct channel_gk20a *ch)
355 {
356         struct mem_mgr *memmgr = mem_mgr_from_g(g);
357
358         nvhost_memmgr_free_sg_table(memmgr, ch->inst_block.mem.ref,
359                         ch->inst_block.mem.sgt);
360         nvhost_memmgr_put(memmgr, ch->inst_block.mem.ref);
361         memset(&ch->inst_block, 0, sizeof(struct inst_desc));
362 }
363
364 static int channel_gk20a_update_runlist(struct channel_gk20a *c,
365                                         bool add)
366 {
367         return gk20a_fifo_update_runlist(c->g,
368                 ENGINE_GR_GK20A, c->hw_chid, add);
369 }
370
371 void gk20a_disable_channel(struct channel_gk20a *ch,
372                            bool finish,
373                            long finish_timeout)
374 {
375         struct nvhost_device_data *pdata = nvhost_get_devdata(ch->g->dev);
376         struct nvhost_master *host = host_from_gk20a_channel(ch);
377         int err;
378
379         if (finish) {
380                 err = gk20a_channel_finish(ch, finish_timeout);
381                 WARN_ON(err);
382         }
383
384         /* ensure no fences are pending */
385         nvhost_syncpt_set_min_eq_max(&host->syncpt,
386                         ch->hw_chid + pdata->syncpt_base);
387
388         /* disable channel */
389         gk20a_writel(ch->g, ccsr_channel_r(ch->hw_chid),
390                 gk20a_readl(ch->g, ccsr_channel_r(ch->hw_chid)) |
391                 ccsr_channel_enable_clr_true_f());
392
393         /* preempt the channel */
394         gk20a_fifo_preempt_channel(ch->g,
395                 ENGINE_GR_GK20A, ch->hw_chid);
396
397         /* remove channel from runlist */
398         channel_gk20a_update_runlist(ch, false);
399 }
400
401 #if defined(CONFIG_TEGRA_GPU_CYCLE_STATS)
402
403 static void gk20a_free_cycle_stats_buffer(struct channel_gk20a *ch)
404 {
405         struct mem_mgr *memmgr = gk20a_channel_mem_mgr(ch);
406         /* disable existing cyclestats buffer */
407         mutex_lock(&ch->cyclestate.cyclestate_buffer_mutex);
408         if (ch->cyclestate.cyclestate_buffer_handler) {
409                 nvhost_memmgr_munmap(ch->cyclestate.cyclestate_buffer_handler,
410                                 ch->cyclestate.cyclestate_buffer);
411                 nvhost_memmgr_put(memmgr,
412                                 ch->cyclestate.cyclestate_buffer_handler);
413                 ch->cyclestate.cyclestate_buffer_handler = NULL;
414                 ch->cyclestate.cyclestate_buffer = NULL;
415                 ch->cyclestate.cyclestate_buffer_size = 0;
416         }
417         mutex_unlock(&ch->cyclestate.cyclestate_buffer_mutex);
418 }
419
420 int gk20a_channel_cycle_stats(struct channel_gk20a *ch,
421                        struct nvhost_cycle_stats_args *args)
422 {
423         struct mem_mgr *memmgr = gk20a_channel_mem_mgr(ch);
424         struct mem_handle *handle_ref;
425         void *virtual_address;
426         u64 cyclestate_buffer_size;
427         struct platform_device *dev = ch->ch->dev;
428
429         if (args->nvmap_handle && !ch->cyclestate.cyclestate_buffer_handler) {
430
431                 /* set up new cyclestats buffer */
432                 handle_ref = nvhost_memmgr_get(memmgr,
433                                 args->nvmap_handle, dev);
434                 if (IS_ERR(handle_ref))
435                         return PTR_ERR(handle_ref);
436                 virtual_address = nvhost_memmgr_mmap(handle_ref);
437                 if (!virtual_address)
438                         return -ENOMEM;
439
440                 nvhost_memmgr_get_param(memmgr, handle_ref,
441                                         NVMAP_HANDLE_PARAM_SIZE,
442                                         &cyclestate_buffer_size);
443
444                 ch->cyclestate.cyclestate_buffer_handler = handle_ref;
445                 ch->cyclestate.cyclestate_buffer = virtual_address;
446                 ch->cyclestate.cyclestate_buffer_size = cyclestate_buffer_size;
447                 return 0;
448
449         } else if (!args->nvmap_handle &&
450                         ch->cyclestate.cyclestate_buffer_handler) {
451                 gk20a_free_cycle_stats_buffer(ch);
452                 return 0;
453
454         } else if (!args->nvmap_handle &&
455                         !ch->cyclestate.cyclestate_buffer_handler) {
456                 /* no requst from GL */
457                 return 0;
458
459         } else {
460                 pr_err("channel already has cyclestats buffer\n");
461                 return -EINVAL;
462         }
463 }
464 #endif
465
466 void gk20a_free_channel(struct nvhost_hwctx *ctx, bool finish)
467 {
468         struct channel_gk20a *ch = ctx->priv;
469         struct gk20a *g = ch->g;
470         struct fifo_gk20a *f = &g->fifo;
471         struct gr_gk20a *gr = &g->gr;
472         struct mem_mgr *memmgr = gk20a_channel_mem_mgr(ch);
473         struct vm_gk20a *ch_vm = ch->vm;
474         unsigned long timeout = gk20a_get_gr_idle_timeout(g);
475
476         nvhost_dbg_fn("");
477
478         if (!ch->bound)
479                 return;
480
481         if (!gk20a_channel_as_bound(ch))
482                 goto unbind;
483
484         if (!tegra_platform_is_silicon())
485                 timeout = MAX_SCHEDULE_TIMEOUT;
486
487         nvhost_dbg_info("freeing bound channel context, timeout=%ld",
488                         timeout);
489
490         gk20a_disable_channel(ch, finish, timeout);
491
492         /* release channel ctx */
493         gk20a_free_channel_ctx(ch);
494
495         gk20a_gr_flush_channel_tlb(gr);
496
497         memset(&ch->ramfc, 0, sizeof(struct mem_desc_sub));
498
499         /* free gpfifo */
500         ch_vm->unmap(ch_vm, ch->gpfifo.gpu_va);
501         nvhost_memmgr_munmap(ch->gpfifo.mem.ref, ch->gpfifo.cpu_va);
502         gk20a_mm_l2_invalidate(ch->g);
503
504         nvhost_memmgr_put(memmgr, ch->gpfifo.mem.ref);
505         memset(&ch->gpfifo, 0, sizeof(struct gpfifo_desc));
506
507 #if defined(CONFIG_TEGRA_GPU_CYCLE_STATS)
508         gk20a_free_cycle_stats_buffer(ch);
509 #endif
510
511         ctx->priv = NULL;
512         channel_gk20a_free_priv_cmdbuf(ch);
513
514         /* release hwctx binding to the as_share */
515         nvhost_as_release_share(ch_vm->as_share, ctx);
516
517 unbind:
518         channel_gk20a_unbind(ch);
519         channel_gk20a_free_inst(g, ch);
520
521         ch->vpr = false;
522
523         /* ALWAYS last */
524         release_used_channel(f, ch);
525 }
526
527 struct nvhost_hwctx *gk20a_open_channel(struct nvhost_channel *ch,
528                                          struct nvhost_hwctx *ctx)
529 {
530         struct gk20a *g = get_gk20a(ch->dev);
531         struct fifo_gk20a *f = &g->fifo;
532         struct channel_gk20a *ch_gk20a;
533
534         ch_gk20a = acquire_unused_channel(f);
535         if (ch_gk20a == NULL) {
536                 /* TBD: we want to make this virtualizable */
537                 nvhost_err(dev_from_gk20a(g), "out of hw chids");
538                 return 0;
539         }
540
541         ctx->priv = ch_gk20a;
542         ch_gk20a->g = g;
543         /* note the ch here is the same for *EVERY* gk20a channel */
544         ch_gk20a->ch = ch;
545         /* but thre's one hwctx per gk20a channel */
546         ch_gk20a->hwctx = ctx;
547
548         if (channel_gk20a_alloc_inst(g, ch_gk20a)) {
549                 ch_gk20a->in_use = false;
550                 ctx->priv = 0;
551                 nvhost_err(dev_from_gk20a(g),
552                            "failed to open gk20a channel, out of inst mem");
553
554                 return 0;
555         }
556         channel_gk20a_bind(ch_gk20a);
557
558         /* The channel is *not* runnable at this point. It still needs to have
559          * an address space bound and allocate a gpfifo and grctx. */
560
561
562         init_waitqueue_head(&ch_gk20a->notifier_wq);
563         init_waitqueue_head(&ch_gk20a->semaphore_wq);
564         init_waitqueue_head(&ch_gk20a->submit_wq);
565
566         return ctx;
567 }
568
569 #if 0
570 /* move to debug_gk20a.c ... */
571 static void dump_gpfifo(struct channel_gk20a *c)
572 {
573         void *inst_ptr;
574         u32 chid = c->hw_chid;
575
576         nvhost_dbg_fn("");
577
578         inst_ptr = nvhost_memmgr_mmap(c->inst_block.mem.ref);
579         if (!inst_ptr)
580                 return;
581
582         nvhost_dbg_info("ramfc for channel %d:\n"
583                 "ramfc: gp_base 0x%08x, gp_base_hi 0x%08x, "
584                 "gp_fetch 0x%08x, gp_get 0x%08x, gp_put 0x%08x, "
585                 "pb_fetch 0x%08x, pb_fetch_hi 0x%08x, "
586                 "pb_get 0x%08x, pb_get_hi 0x%08x, "
587                 "pb_put 0x%08x, pb_put_hi 0x%08x\n"
588                 "userd: gp_put 0x%08x, gp_get 0x%08x, "
589                 "get 0x%08x, get_hi 0x%08x, "
590                 "put 0x%08x, put_hi 0x%08x\n"
591                 "pbdma: status 0x%08x, channel 0x%08x, userd 0x%08x, "
592                 "gp_base 0x%08x, gp_base_hi 0x%08x, "
593                 "gp_fetch 0x%08x, gp_get 0x%08x, gp_put 0x%08x, "
594                 "pb_fetch 0x%08x, pb_fetch_hi 0x%08x, "
595                 "get 0x%08x, get_hi 0x%08x, put 0x%08x, put_hi 0x%08x\n"
596                 "channel: ccsr_channel 0x%08x",
597                 chid,
598                 mem_rd32(inst_ptr, ram_fc_gp_base_w()),
599                 mem_rd32(inst_ptr, ram_fc_gp_base_hi_w()),
600                 mem_rd32(inst_ptr, ram_fc_gp_fetch_w()),
601                 mem_rd32(inst_ptr, ram_fc_gp_get_w()),
602                 mem_rd32(inst_ptr, ram_fc_gp_put_w()),
603                 mem_rd32(inst_ptr, ram_fc_pb_fetch_w()),
604                 mem_rd32(inst_ptr, ram_fc_pb_fetch_hi_w()),
605                 mem_rd32(inst_ptr, ram_fc_pb_get_w()),
606                 mem_rd32(inst_ptr, ram_fc_pb_get_hi_w()),
607                 mem_rd32(inst_ptr, ram_fc_pb_put_w()),
608                 mem_rd32(inst_ptr, ram_fc_pb_put_hi_w()),
609                 mem_rd32(c->userd_cpu_va, ram_userd_gp_put_w()),
610                 mem_rd32(c->userd_cpu_va, ram_userd_gp_get_w()),
611                 mem_rd32(c->userd_cpu_va, ram_userd_get_w()),
612                 mem_rd32(c->userd_cpu_va, ram_userd_get_hi_w()),
613                 mem_rd32(c->userd_cpu_va, ram_userd_put_w()),
614                 mem_rd32(c->userd_cpu_va, ram_userd_put_hi_w()),
615                 gk20a_readl(c->g, pbdma_status_r(0)),
616                 gk20a_readl(c->g, pbdma_channel_r(0)),
617                 gk20a_readl(c->g, pbdma_userd_r(0)),
618                 gk20a_readl(c->g, pbdma_gp_base_r(0)),
619                 gk20a_readl(c->g, pbdma_gp_base_hi_r(0)),
620                 gk20a_readl(c->g, pbdma_gp_fetch_r(0)),
621                 gk20a_readl(c->g, pbdma_gp_get_r(0)),
622                 gk20a_readl(c->g, pbdma_gp_put_r(0)),
623                 gk20a_readl(c->g, pbdma_pb_fetch_r(0)),
624                 gk20a_readl(c->g, pbdma_pb_fetch_hi_r(0)),
625                 gk20a_readl(c->g, pbdma_get_r(0)),
626                 gk20a_readl(c->g, pbdma_get_hi_r(0)),
627                 gk20a_readl(c->g, pbdma_put_r(0)),
628                 gk20a_readl(c->g, pbdma_put_hi_r(0)),
629                 gk20a_readl(c->g, ccsr_channel_r(chid)));
630
631         nvhost_memmgr_munmap(c->inst_block.mem.ref, inst_ptr);
632         gk20a_mm_l2_invalidate(c->g);
633 }
634 #endif
635
636 /* allocate private cmd buffer.
637    used for inserting commands before/after user submitted buffers. */
638 static int channel_gk20a_alloc_priv_cmdbuf(struct channel_gk20a *c)
639 {
640         struct device *d = dev_from_gk20a(c->g);
641         struct mem_mgr *memmgr = gk20a_channel_mem_mgr(c);
642         struct vm_gk20a *ch_vm = c->vm;
643         struct priv_cmd_queue *q = &c->priv_cmd_q;
644         struct priv_cmd_entry *e;
645         u32 i = 0, size;
646
647         /* Kernel can insert gpfifos before and after user gpfifos.
648            Before user gpfifos, kernel inserts fence_wait, which takes
649            syncpoint_a (2 dwords) + syncpoint_b (2 dwords) = 4 dwords.
650            After user gpfifos, kernel inserts fence_get, which takes
651            wfi (2 dwords) + syncpoint_a (2 dwords) + syncpoint_b (2 dwords)
652            = 6 dwords.
653            Worse case if kernel adds both of them for every user gpfifo,
654            max size of priv_cmdbuf is :
655            (gpfifo entry number * (2 / 3) * (4 + 6) * 4 bytes */
656         size = roundup_pow_of_two(
657                 c->gpfifo.entry_num * 2 * 10 * sizeof(u32) / 3);
658
659         q->mem.ref = nvhost_memmgr_alloc(memmgr,
660                                          size,
661                                          DEFAULT_ALLOC_ALIGNMENT,
662                                          DEFAULT_ALLOC_FLAGS,
663                                          0);
664         if (IS_ERR(q->mem.ref)) {
665                 nvhost_err(d, "ch %d : failed to allocate"
666                            " priv cmd buffer(size: %d bytes)",
667                            c->hw_chid, size);
668                 goto clean_up;
669         }
670         q->mem.size = size;
671
672         q->base_ptr = (u32 *)nvhost_memmgr_mmap(q->mem.ref);
673         if (!q->base_ptr) {
674                 nvhost_err(d, "ch %d : failed to map cpu va"
675                            "for priv cmd buffer", c->hw_chid);
676                 goto clean_up;
677         }
678
679         memset(q->base_ptr, 0, size);
680
681         q->base_gva = ch_vm->map(ch_vm, memmgr,
682                         q->mem.ref,
683                          /*offset_align, flags, kind*/
684                         0, 0, 0, NULL);
685         if (!q->base_gva) {
686                 nvhost_err(d, "ch %d : failed to map gpu va"
687                            "for priv cmd buffer", c->hw_chid);
688                 goto clean_up;
689         }
690
691         q->size = q->mem.size / sizeof (u32);
692
693         INIT_LIST_HEAD(&q->head);
694         INIT_LIST_HEAD(&q->free);
695
696         /* pre-alloc 25% of priv cmdbuf entries and put them on free list */
697         for (i = 0; i < q->size / 4; i++) {
698                 e = kzalloc(sizeof(struct priv_cmd_entry), GFP_KERNEL);
699                 if (!e) {
700                         nvhost_err(d, "ch %d: fail to pre-alloc cmd entry",
701                                 c->hw_chid);
702                         goto clean_up;
703                 }
704                 e->pre_alloc = true;
705                 list_add(&e->list, &q->free);
706         }
707
708         return 0;
709
710 clean_up:
711         channel_gk20a_free_priv_cmdbuf(c);
712         return -ENOMEM;
713 }
714
715 static void channel_gk20a_free_priv_cmdbuf(struct channel_gk20a *c)
716 {
717         struct mem_mgr *memmgr = gk20a_channel_mem_mgr(c);
718         struct vm_gk20a *ch_vm = c->vm;
719         struct priv_cmd_queue *q = &c->priv_cmd_q;
720         struct priv_cmd_entry *e;
721         struct list_head *pos, *tmp, *head;
722
723         if (q->size == 0)
724                 return;
725
726         ch_vm->unmap(ch_vm, q->base_gva);
727         nvhost_memmgr_munmap(q->mem.ref, q->base_ptr);
728         nvhost_memmgr_put(memmgr, q->mem.ref);
729
730         /* free used list */
731         head = &q->head;
732         list_for_each_safe(pos, tmp, head) {
733                 e = container_of(pos, struct priv_cmd_entry, list);
734                 free_priv_cmdbuf(c, e);
735         }
736
737         /* free free list */
738         head = &q->free;
739         list_for_each_safe(pos, tmp, head) {
740                 e = container_of(pos, struct priv_cmd_entry, list);
741                 e->pre_alloc = false;
742                 free_priv_cmdbuf(c, e);
743         }
744
745         memset(q, 0, sizeof(struct priv_cmd_queue));
746 }
747
748 /* allocate a cmd buffer with given size. size is number of u32 entries */
749 static int alloc_priv_cmdbuf(struct channel_gk20a *c, u32 orig_size,
750                              struct priv_cmd_entry **entry)
751 {
752         struct priv_cmd_queue *q = &c->priv_cmd_q;
753         struct priv_cmd_entry *e;
754         struct list_head *node;
755         u32 free_count;
756         u32 size = orig_size;
757         bool no_retry = false;
758
759         nvhost_dbg_fn("size %d", orig_size);
760
761         *entry = NULL;
762
763         /* if free space in the end is less than requested, increase the size
764          * to make the real allocated space start from beginning. */
765         if (q->put + size > q->size)
766                 size = orig_size + (q->size - q->put);
767
768         nvhost_dbg_info("ch %d: priv cmd queue get:put %d:%d",
769                         c->hw_chid, q->get, q->put);
770
771 TRY_AGAIN:
772         free_count = (q->size - (q->put - q->get) - 1) % q->size;
773
774         if (size > free_count) {
775                 if (!no_retry) {
776                         recycle_priv_cmdbuf(c);
777                         no_retry = true;
778                         goto TRY_AGAIN;
779                 } else
780                         return -EAGAIN;
781         }
782
783         if (unlikely(list_empty(&q->free))) {
784
785                 nvhost_dbg_info("ch %d: run out of pre-alloc entries",
786                         c->hw_chid);
787
788                 e = kzalloc(sizeof(struct priv_cmd_entry), GFP_KERNEL);
789                 if (!e) {
790                         nvhost_err(dev_from_gk20a(c->g),
791                                 "ch %d: fail to allocate priv cmd entry",
792                                 c->hw_chid);
793                         return -ENOMEM;
794                 }
795         } else  {
796                 node = q->free.next;
797                 list_del(node);
798                 e = container_of(node, struct priv_cmd_entry, list);
799         }
800
801         e->size = orig_size;
802         e->gp_get = c->gpfifo.get;
803         e->gp_put = c->gpfifo.put;
804         e->gp_wrap = c->gpfifo.wrap;
805
806         /* if we have increased size to skip free space in the end, set put
807            to beginning of cmd buffer (0) + size */
808         if (size != orig_size) {
809                 e->ptr = q->base_ptr;
810                 e->gva = q->base_gva;
811                 q->put = orig_size;
812         } else {
813                 e->ptr = q->base_ptr + q->put;
814                 e->gva = q->base_gva + q->put * sizeof(u32);
815                 q->put = (q->put + orig_size) & (q->size - 1);
816         }
817
818         /* we already handled q->put + size > q->size so BUG_ON this */
819         BUG_ON(q->put > q->size);
820
821         /* add new entry to head since we free from head */
822         list_add(&e->list, &q->head);
823
824         *entry = e;
825
826         nvhost_dbg_fn("done");
827
828         return 0;
829 }
830
831 /* Don't call this to free an explict cmd entry.
832  * It doesn't update priv_cmd_queue get/put */
833 static void free_priv_cmdbuf(struct channel_gk20a *c,
834                              struct priv_cmd_entry *e)
835 {
836         struct priv_cmd_queue *q = &c->priv_cmd_q;
837
838         if (!e)
839                 return;
840
841         list_del(&e->list);
842
843         if (unlikely(!e->pre_alloc))
844                 kfree(e);
845         else {
846                 memset(e, 0, sizeof(struct priv_cmd_entry));
847                 e->pre_alloc = true;
848                 list_add(&e->list, &q->free);
849         }
850 }
851
852 /* free entries if they're no longer being used */
853 static void recycle_priv_cmdbuf(struct channel_gk20a *c)
854 {
855         struct priv_cmd_queue *q = &c->priv_cmd_q;
856         struct priv_cmd_entry *e, *tmp;
857         struct list_head *head = &q->head;
858         bool wrap_around, found = false;
859
860         nvhost_dbg_fn("");
861
862         /* Find the most recent free entry. Free it and everything before it */
863         list_for_each_entry(e, head, list) {
864
865                 nvhost_dbg_info("ch %d: cmd entry get:put:wrap %d:%d:%d "
866                         "curr get:put:wrap %d:%d:%d",
867                         c->hw_chid, e->gp_get, e->gp_put, e->gp_wrap,
868                         c->gpfifo.get, c->gpfifo.put, c->gpfifo.wrap);
869
870                 wrap_around = (c->gpfifo.wrap != e->gp_wrap);
871                 if (e->gp_get < e->gp_put) {
872                         if (c->gpfifo.get >= e->gp_put ||
873                             wrap_around) {
874                                 found = true;
875                                 break;
876                         } else
877                                 e->gp_get = c->gpfifo.get;
878                 } else if (e->gp_get > e->gp_put) {
879                         if (wrap_around &&
880                             c->gpfifo.get >= e->gp_put) {
881                                 found = true;
882                                 break;
883                         } else
884                                 e->gp_get = c->gpfifo.get;
885                 }
886         }
887
888         if (found)
889                 q->get = (e->ptr - q->base_ptr) + e->size;
890         else {
891                 nvhost_dbg_info("no free entry recycled");
892                 return;
893         }
894
895         list_for_each_entry_safe_continue(e, tmp, head, list) {
896                 free_priv_cmdbuf(c, e);
897         }
898
899         nvhost_dbg_fn("done");
900 }
901
902
903 int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c,
904                                struct nvhost_alloc_gpfifo_args *args)
905 {
906         struct mem_mgr *memmgr = gk20a_channel_mem_mgr(c);
907         struct gk20a *g = c->g;
908         struct nvhost_device_data *pdata = nvhost_get_devdata(g->dev);
909         struct device *d = dev_from_gk20a(g);
910         struct vm_gk20a *ch_vm;
911         u32 gpfifo_size;
912         u32 ret;
913
914         /* Kernel can insert one extra gpfifo entry before user submitted gpfifos
915            and another one after, for internal usage. Triple the requested size. */
916         gpfifo_size = roundup_pow_of_two(args->num_entries * 3);
917
918         if (args->flags & NVHOST_ALLOC_GPFIFO_FLAGS_VPR_ENABLED)
919                 c->vpr = true;
920
921         /* an address space needs to have been bound at this point.   */
922         if (!gk20a_channel_as_bound(c)) {
923                 int err;
924                 nvhost_warn(d,
925                             "not bound to an address space at time of gpfifo"
926                             " allocation.  Attempting to create and bind to"
927                             " one...");
928                 /*
929                  * Eventually this will be a fatal error. For now attempt to
930                  * create and bind a share here.  This helps until we change
931                  * clients to use the new address space API.  However doing this
932                  * can mask errors in programming access to the address space
933                  * through the front door...
934                  */
935                 err = nvhost_as_alloc_and_bind_share(c->ch, c->hwctx);
936                 if (err || !gk20a_channel_as_bound(c)) {
937                         nvhost_err(d,
938                                    "not bound to address space at time"
939                                    " of gpfifo allocation");
940                         return err;
941                 }
942         }
943         ch_vm = c->vm;
944
945         c->cmds_pending = false;
946
947         c->last_submit_fence.valid        = false;
948         c->last_submit_fence.syncpt_value = 0;
949         c->last_submit_fence.syncpt_id    = c->hw_chid + pdata->syncpt_base;
950
951         c->ramfc.offset = 0;
952         c->ramfc.size = ram_in_ramfc_s() / 8;
953
954         if (c->gpfifo.mem.ref) {
955                 nvhost_err(d, "channel %d :"
956                            "gpfifo already allocated", c->hw_chid);
957                 return -EEXIST;
958         }
959
960         c->gpfifo.mem.ref =
961                 nvhost_memmgr_alloc(memmgr,
962                                     gpfifo_size * sizeof(struct gpfifo),
963                                     DEFAULT_ALLOC_ALIGNMENT,
964                                     DEFAULT_ALLOC_FLAGS,
965                                     0);
966         if (IS_ERR(c->gpfifo.mem.ref)) {
967                 nvhost_err(d, "channel %d :"
968                            " failed to allocate gpfifo (size: %d bytes)",
969                            c->hw_chid, gpfifo_size);
970                 c->gpfifo.mem.ref = 0;
971                 return PTR_ERR(c->gpfifo.mem.ref);
972         }
973         c->gpfifo.entry_num = gpfifo_size;
974
975         c->gpfifo.cpu_va =
976                 (struct gpfifo *)nvhost_memmgr_mmap(c->gpfifo.mem.ref);
977         if (!c->gpfifo.cpu_va)
978                 goto clean_up;
979
980         c->gpfifo.get = c->gpfifo.put = 0;
981
982         c->gpfifo.gpu_va = ch_vm->map(ch_vm, memmgr,
983                                 c->gpfifo.mem.ref,
984                                 /*offset_align, flags, kind*/
985                                 0, 0, 0, NULL);
986         if (!c->gpfifo.gpu_va) {
987                 nvhost_err(d, "channel %d : failed to map"
988                            " gpu_va for gpfifo", c->hw_chid);
989                 goto clean_up;
990         }
991
992         nvhost_dbg_info("channel %d : gpfifo_base 0x%016llx, size %d",
993                 c->hw_chid, c->gpfifo.gpu_va, c->gpfifo.entry_num);
994
995         channel_gk20a_setup_ramfc(c, c->gpfifo.gpu_va, c->gpfifo.entry_num);
996
997         channel_gk20a_setup_userd(c);
998         channel_gk20a_commit_userd(c);
999
1000         gk20a_mm_l2_invalidate(c->g);
1001
1002         /* TBD: setup engine contexts */
1003
1004         ret = channel_gk20a_alloc_priv_cmdbuf(c);
1005         if (ret)
1006                 goto clean_up;
1007
1008         ret = channel_gk20a_update_runlist(c, true);
1009         if (ret)
1010                 goto clean_up;
1011
1012         nvhost_dbg_fn("done");
1013         return 0;
1014
1015 clean_up:
1016         nvhost_dbg(dbg_fn | dbg_err, "fail");
1017         ch_vm->unmap(ch_vm, c->gpfifo.gpu_va);
1018         nvhost_memmgr_munmap(c->gpfifo.mem.ref, c->gpfifo.cpu_va);
1019         nvhost_memmgr_put(memmgr, c->gpfifo.mem.ref);
1020         memset(&c->gpfifo, 0, sizeof(struct gpfifo_desc));
1021         return -ENOMEM;
1022 }
1023
1024 static inline int wfi_cmd_size(void)
1025 {
1026         return 2;
1027 }
1028 void add_wfi_cmd(struct priv_cmd_entry *cmd, int *i)
1029 {
1030         /* wfi */
1031         cmd->ptr[(*i)++] = 0x2001001E;
1032         /* handle, ignored */
1033         cmd->ptr[(*i)++] = 0x00000000;
1034 }
1035
1036 static inline bool check_gp_put(struct gk20a *g,
1037                                 struct channel_gk20a *c)
1038 {
1039         u32 put;
1040         /* gp_put changed unexpectedly since last update? */
1041         put = gk20a_bar1_readl(g,
1042                c->userd_gpu_va + 4 * ram_userd_gp_put_w());
1043         if (c->gpfifo.put != put) {
1044                 /*TBD: BUG_ON/teardown on this*/
1045                 nvhost_err(dev_from_gk20a(g), "gp_put changed unexpectedly "
1046                            "since last update");
1047                 c->gpfifo.put = put;
1048                 return false; /* surprise! */
1049         }
1050         return true; /* checked out ok */
1051 }
1052
1053 /* Update with this periodically to determine how the gpfifo is draining. */
1054 static inline u32 update_gp_get(struct gk20a *g,
1055                                 struct channel_gk20a *c)
1056 {
1057         u32 new_get = gk20a_bar1_readl(g,
1058                 c->userd_gpu_va + sizeof(u32) * ram_userd_gp_get_w());
1059         if (new_get < c->gpfifo.get)
1060                 c->gpfifo.wrap = !c->gpfifo.wrap;
1061         c->gpfifo.get = new_get;
1062         return new_get;
1063 }
1064
1065 static inline u32 gp_free_count(struct channel_gk20a *c)
1066 {
1067         return (c->gpfifo.entry_num - (c->gpfifo.put - c->gpfifo.get) - 1) %
1068                 c->gpfifo.entry_num;
1069 }
1070
1071 /* Issue a syncpoint increment *preceded* by a wait-for-idle
1072  * command.  All commands on the channel will have been
1073  * consumed at the time the fence syncpoint increment occurs.
1074  */
1075 int gk20a_channel_submit_wfi_fence(struct gk20a *g,
1076                                    struct channel_gk20a *c,
1077                                    struct nvhost_syncpt *sp,
1078                                    struct nvhost_fence *fence)
1079 {
1080         struct priv_cmd_entry *cmd = NULL;
1081         int cmd_size, j = 0;
1082         u32 free_count;
1083         int err;
1084
1085         cmd_size =  4 + wfi_cmd_size();
1086
1087         update_gp_get(g, c);
1088         free_count = gp_free_count(c);
1089         if (unlikely(!free_count)) {
1090                 nvhost_err(dev_from_gk20a(g),
1091                            "not enough gpfifo space");
1092                 return -EAGAIN;
1093         }
1094
1095         err = alloc_priv_cmdbuf(c, cmd_size, &cmd);
1096         if (unlikely(err)) {
1097                 nvhost_err(dev_from_gk20a(g),
1098                            "not enough priv cmd buffer space");
1099                 return err;
1100         }
1101
1102         fence->value = nvhost_syncpt_incr_max(sp, fence->syncpt_id, 1);
1103
1104         c->last_submit_fence.valid        = true;
1105         c->last_submit_fence.syncpt_value = fence->value;
1106         c->last_submit_fence.syncpt_id    = fence->syncpt_id;
1107         c->last_submit_fence.wfi          = true;
1108
1109         trace_nvhost_ioctl_ctrl_syncpt_incr(fence->syncpt_id);
1110
1111
1112         add_wfi_cmd(cmd, &j);
1113
1114         /* syncpoint_a */
1115         cmd->ptr[j++] = 0x2001001C;
1116         /* payload, ignored */
1117         cmd->ptr[j++] = 0;
1118         /* syncpoint_b */
1119         cmd->ptr[j++] = 0x2001001D;
1120         /* syncpt_id, incr */
1121         cmd->ptr[j++] = (fence->syncpt_id << 8) | 0x1;
1122
1123         c->gpfifo.cpu_va[c->gpfifo.put].entry0 = u64_lo32(cmd->gva);
1124         c->gpfifo.cpu_va[c->gpfifo.put].entry1 = u64_hi32(cmd->gva) |
1125                 pbdma_gp_entry1_length_f(cmd->size);
1126
1127         c->gpfifo.put = (c->gpfifo.put + 1) & (c->gpfifo.entry_num - 1);
1128
1129         /* save gp_put */
1130         cmd->gp_put = c->gpfifo.put;
1131
1132         gk20a_bar1_writel(g,
1133                 c->userd_gpu_va + 4 * ram_userd_gp_put_w(),
1134                 c->gpfifo.put);
1135
1136         nvhost_dbg_info("post-submit put %d, get %d, size %d",
1137                 c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num);
1138
1139         return 0;
1140 }
1141
1142 static u32 get_gp_free_count(struct channel_gk20a *c)
1143 {
1144         update_gp_get(c->g, c);
1145         return gp_free_count(c);
1146 }
1147
1148 static void trace_write_pushbuffer(struct channel_gk20a *c, struct gpfifo *g)
1149 {
1150         void *mem = NULL;
1151         unsigned int words;
1152         u64 offset;
1153         struct mem_handle *r = NULL;
1154
1155         if (nvhost_debug_trace_cmdbuf) {
1156                 u64 gpu_va = (u64)g->entry0 |
1157                         (u64)((u64)pbdma_gp_entry1_get_hi_v(g->entry1) << 32);
1158                 struct mem_mgr *memmgr = NULL;
1159                 int err;
1160
1161                 words = pbdma_gp_entry1_length_v(g->entry1);
1162                 err = c->vm->find_buffer(c->vm, gpu_va, &memmgr, &r, &offset);
1163                 if (!err)
1164                         mem = nvhost_memmgr_mmap(r);
1165         }
1166
1167         if (mem) {
1168                 u32 i;
1169                 /*
1170                  * Write in batches of 128 as there seems to be a limit
1171                  * of how much you can output to ftrace at once.
1172                  */
1173                 for (i = 0; i < words; i += TRACE_MAX_LENGTH) {
1174                         trace_nvhost_cdma_push_gather(
1175                                 c->ch->dev->name,
1176                                 0,
1177                                 min(words - i, TRACE_MAX_LENGTH),
1178                                 offset + i * sizeof(u32),
1179                                 mem);
1180                 }
1181                 nvhost_memmgr_munmap(r, mem);
1182         }
1183 }
1184
1185 static int gk20a_channel_add_job(struct channel_gk20a *c,
1186                                  struct nvhost_fence *fence)
1187 {
1188         struct vm_gk20a *vm = c->vm;
1189         struct channel_gk20a_job *job = NULL;
1190         struct mapped_buffer_node **mapped_buffers = NULL;
1191         int err = 0, num_mapped_buffers;
1192
1193         err = vm->get_buffers(vm, &mapped_buffers, &num_mapped_buffers);
1194         if (err)
1195                 return err;
1196
1197         job = kzalloc(sizeof(*job), GFP_KERNEL);
1198         if (!job) {
1199                 vm->put_buffers(vm, mapped_buffers, num_mapped_buffers);
1200                 return -ENOMEM;
1201         }
1202
1203         job->num_mapped_buffers = num_mapped_buffers;
1204         job->mapped_buffers = mapped_buffers;
1205         job->fence = *fence;
1206
1207         mutex_lock(&c->jobs_lock);
1208         list_add_tail(&job->list, &c->jobs);
1209         mutex_unlock(&c->jobs_lock);
1210
1211         return 0;
1212 }
1213
1214 void gk20a_channel_update(struct channel_gk20a *c)
1215 {
1216         struct gk20a *g = c->g;
1217         struct nvhost_syncpt *sp = syncpt_from_gk20a(g);
1218         struct vm_gk20a *vm = c->vm;
1219         struct channel_gk20a_job *job, *n;
1220
1221         mutex_lock(&c->jobs_lock);
1222         list_for_each_entry_safe(job, n, &c->jobs, list) {
1223                 bool completed = nvhost_syncpt_is_expired(sp,
1224                         job->fence.syncpt_id, job->fence.value);
1225                 if (!completed)
1226                         break;
1227
1228                 vm->put_buffers(vm, job->mapped_buffers,
1229                                 job->num_mapped_buffers);
1230                 list_del_init(&job->list);
1231                 kfree(job);
1232         }
1233         mutex_unlock(&c->jobs_lock);
1234 }
1235 #ifdef CONFIG_DEBUG_FS
1236 static void gk20a_sync_debugfs(struct gk20a *g)
1237 {
1238         u32 reg_f = ltc_ltcs_ltss_tstg_set_mgmt_2_l2_bypass_mode_enabled_f();
1239         spin_lock(&g->debugfs_lock);
1240         if (g->mm.ltc_enabled != g->mm.ltc_enabled_debug) {
1241                 u32 reg = gk20a_readl(g, ltc_ltcs_ltss_tstg_set_mgmt_2_r());
1242                 if (g->mm.ltc_enabled_debug)
1243                         /* bypass disabled (normal caching ops)*/
1244                         reg &= ~reg_f;
1245                 else
1246                         /* bypass enabled (no caching) */
1247                         reg |= reg_f;
1248
1249                 gk20a_writel(g, ltc_ltcs_ltss_tstg_set_mgmt_2_r(), reg);
1250                 g->mm.ltc_enabled = g->mm.ltc_enabled_debug;
1251         }
1252         spin_unlock(&g->debugfs_lock);
1253 }
1254 #endif
1255
1256 int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
1257                                 struct nvhost_gpfifo *gpfifo,
1258                                 u32 num_entries,
1259                                 struct nvhost_fence *fence,
1260                                 u32 flags)
1261 {
1262         struct gk20a *g = c->g;
1263         struct nvhost_device_data *pdata = nvhost_get_devdata(g->dev);
1264         struct device *d = dev_from_gk20a(g);
1265         struct nvhost_syncpt *sp = syncpt_from_gk20a(g);
1266         u32 i, incr_id = ~0, wait_id = ~0, wait_value = 0;
1267         u32 err = 0;
1268         int incr_cmd_size;
1269         bool wfi_cmd;
1270         struct priv_cmd_entry *wait_cmd = NULL;
1271         struct priv_cmd_entry *incr_cmd = NULL;
1272         /* we might need two extra gpfifo entries - one for syncpoint
1273          * wait and one for syncpoint increment */
1274         const int extra_entries = 2;
1275
1276         if ((flags & (NVHOST_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT |
1277                       NVHOST_SUBMIT_GPFIFO_FLAGS_FENCE_GET)) &&
1278             !fence)
1279                 return -EINVAL;
1280 #ifdef CONFIG_DEBUG_FS
1281         /* update debug settings */
1282         gk20a_sync_debugfs(g);
1283 #endif
1284
1285         nvhost_dbg_info("channel %d", c->hw_chid);
1286
1287         trace_nvhost_channel_submit_gpfifo(c->ch->dev->name,
1288                                            c->hw_chid,
1289                                            num_entries,
1290                                            flags,
1291                                            fence->syncpt_id, fence->value,
1292                                            c->hw_chid + pdata->syncpt_base);
1293         check_gp_put(g, c);
1294         update_gp_get(g, c);
1295
1296         nvhost_dbg_info("pre-submit put %d, get %d, size %d",
1297                 c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num);
1298
1299         /* If the caller has requested a fence "get" then we need to be
1300          * sure the fence represents work completion.  In that case
1301          * issue a wait-for-idle before the syncpoint increment.
1302          */
1303         wfi_cmd = !!(flags & NVHOST_SUBMIT_GPFIFO_FLAGS_FENCE_GET);
1304
1305         /* Invalidate tlb if it's dirty...                                   */
1306         /* TBD: this should be done in the cmd stream, not with PRIs.        */
1307         /* We don't know what context is currently running...                */
1308         /* Note also: there can be more than one context associated with the */
1309         /* address space (vm).   */
1310         if (c->vm->tlb_dirty) {
1311                 c->vm->tlb_inval(c->vm);
1312                 c->vm->tlb_dirty = false;
1313         }
1314
1315         /* Make sure we have enough space for gpfifo entries. If not,
1316          * wait for signals from completed submits */
1317         if (gp_free_count(c) < num_entries + extra_entries) {
1318                 err = wait_event_interruptible(c->submit_wq,
1319                         get_gp_free_count(c) >= num_entries + extra_entries);
1320         }
1321
1322         if (err) {
1323                 nvhost_err(d, "not enough gpfifo space");
1324                 err = -EAGAIN;
1325                 goto clean_up;
1326         }
1327
1328         /* optionally insert syncpt wait in the beginning of gpfifo submission
1329            when user requested and the wait hasn't expired.
1330         */
1331
1332         /* validate that the id makes sense, elide if not */
1333         /* the only reason this isn't being unceremoniously killed is to
1334          * keep running some tests which trigger this condition*/
1335         if ((flags & NVHOST_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT) &&
1336             ((fence->syncpt_id < 0) ||
1337              (fence->syncpt_id >= nvhost_syncpt_nb_pts(sp)))) {
1338                 dev_warn(d, "invalid wait id in gpfifo submit, elided");
1339                 flags &= ~NVHOST_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT;
1340         }
1341
1342         if ((flags & NVHOST_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT) &&
1343             !nvhost_syncpt_is_expired(sp, fence->syncpt_id, fence->value)) {
1344                 alloc_priv_cmdbuf(c, 4, &wait_cmd);
1345                 if (wait_cmd == NULL) {
1346                         nvhost_err(d, "not enough priv cmd buffer space");
1347                         err = -EAGAIN;
1348                         goto clean_up;
1349                 }
1350         }
1351
1352         /* always insert syncpt increment at end of gpfifo submission
1353            to keep track of method completion for idle railgating */
1354         /* TODO: we need to find a way to get rid of these wfi on every
1355          * submission...
1356          */
1357         incr_cmd_size = 4;
1358         if (wfi_cmd)
1359                 incr_cmd_size += wfi_cmd_size();
1360         alloc_priv_cmdbuf(c, incr_cmd_size, &incr_cmd);
1361         if (incr_cmd == NULL) {
1362                 nvhost_err(d, "not enough priv cmd buffer space");
1363                 err = -EAGAIN;
1364                 goto clean_up;
1365         }
1366
1367         if (wait_cmd) {
1368                 wait_id = fence->syncpt_id;
1369                 wait_value = fence->value;
1370                 /* syncpoint_a */
1371                 wait_cmd->ptr[0] = 0x2001001C;
1372                 /* payload */
1373                 wait_cmd->ptr[1] = fence->value;
1374                 /* syncpoint_b */
1375                 wait_cmd->ptr[2] = 0x2001001D;
1376                 /* syncpt_id, switch_en, wait */
1377                 wait_cmd->ptr[3] = (wait_id << 8) | 0x10;
1378
1379                 c->gpfifo.cpu_va[c->gpfifo.put].entry0 =
1380                         u64_lo32(wait_cmd->gva);
1381                 c->gpfifo.cpu_va[c->gpfifo.put].entry1 =
1382                         u64_hi32(wait_cmd->gva) |
1383                         pbdma_gp_entry1_length_f(wait_cmd->size);
1384                 trace_write_pushbuffer(c, &c->gpfifo.cpu_va[c->gpfifo.put]);
1385
1386                 c->gpfifo.put = (c->gpfifo.put + 1) &
1387                         (c->gpfifo.entry_num - 1);
1388
1389                 /* save gp_put */
1390                 wait_cmd->gp_put = c->gpfifo.put;
1391         }
1392
1393         for (i = 0; i < num_entries; i++) {
1394                 c->gpfifo.cpu_va[c->gpfifo.put].entry0 =
1395                         gpfifo[i].entry0; /* cmd buf va low 32 */
1396                 c->gpfifo.cpu_va[c->gpfifo.put].entry1 =
1397                         gpfifo[i].entry1; /* cmd buf va high 32 | words << 10 */
1398                 trace_write_pushbuffer(c, &c->gpfifo.cpu_va[c->gpfifo.put]);
1399                 c->gpfifo.put = (c->gpfifo.put + 1) &
1400                         (c->gpfifo.entry_num - 1);
1401         }
1402
1403         if (incr_cmd) {
1404                 int j = 0;
1405                 incr_id = c->hw_chid + pdata->syncpt_base;
1406                 fence->syncpt_id = incr_id;
1407                 fence->value     = nvhost_syncpt_incr_max(sp, incr_id, 1);
1408
1409                 c->last_submit_fence.valid        = true;
1410                 c->last_submit_fence.syncpt_value = fence->value;
1411                 c->last_submit_fence.syncpt_id    = fence->syncpt_id;
1412                 c->last_submit_fence.wfi          = wfi_cmd;
1413
1414                 trace_nvhost_ioctl_ctrl_syncpt_incr(fence->syncpt_id);
1415                 if (wfi_cmd)
1416                         add_wfi_cmd(incr_cmd, &j);
1417                 /* syncpoint_a */
1418                 incr_cmd->ptr[j++] = 0x2001001C;
1419                 /* payload, ignored */
1420                 incr_cmd->ptr[j++] = 0;
1421                 /* syncpoint_b */
1422                 incr_cmd->ptr[j++] = 0x2001001D;
1423                 /* syncpt_id, incr */
1424                 incr_cmd->ptr[j++] = (fence->syncpt_id << 8) | 0x1;
1425
1426                 c->gpfifo.cpu_va[c->gpfifo.put].entry0 =
1427                         u64_lo32(incr_cmd->gva);
1428                 c->gpfifo.cpu_va[c->gpfifo.put].entry1 =
1429                         u64_hi32(incr_cmd->gva) |
1430                         pbdma_gp_entry1_length_f(incr_cmd->size);
1431                 trace_write_pushbuffer(c, &c->gpfifo.cpu_va[c->gpfifo.put]);
1432
1433                 c->gpfifo.put = (c->gpfifo.put + 1) &
1434                         (c->gpfifo.entry_num - 1);
1435
1436                 /* save gp_put */
1437                 incr_cmd->gp_put = c->gpfifo.put;
1438         }
1439
1440         /* Invalidate tlb if it's dirty...                                   */
1441         /* TBD: this should be done in the cmd stream, not with PRIs.        */
1442         /* We don't know what context is currently running...                */
1443         /* Note also: there can be more than one context associated with the */
1444         /* address space (vm).   */
1445         if (c->vm->tlb_dirty) {
1446                 c->vm->tlb_inval(c->vm);
1447                 c->vm->tlb_dirty = false;
1448         }
1449
1450         trace_nvhost_channel_submitted_gpfifo(c->ch->dev->name,
1451                                            c->hw_chid,
1452                                            num_entries,
1453                                            flags,
1454                                            wait_id, wait_value,
1455                                            incr_id, fence->value);
1456
1457
1458         /* TODO! Check for errors... */
1459         gk20a_channel_add_job(c, fence);
1460
1461         c->cmds_pending = true;
1462         gk20a_bar1_writel(g,
1463                 c->userd_gpu_va + 4 * ram_userd_gp_put_w(),
1464                 c->gpfifo.put);
1465
1466         nvhost_dbg_info("post-submit put %d, get %d, size %d",
1467                 c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num);
1468
1469         nvhost_dbg_fn("done");
1470         return 0;
1471
1472 clean_up:
1473         nvhost_dbg(dbg_fn | dbg_err, "fail");
1474         free_priv_cmdbuf(c, wait_cmd);
1475         free_priv_cmdbuf(c, incr_cmd);
1476         return err;
1477 }
1478
1479 void gk20a_remove_channel_support(struct channel_gk20a *c)
1480 {
1481
1482 }
1483
1484 int gk20a_init_channel_support(struct gk20a *g, u32 chid)
1485 {
1486         struct channel_gk20a *c = g->fifo.channel+chid;
1487         c->g = g;
1488         c->in_use = false;
1489         c->hw_chid = chid;
1490         c->bound = false;
1491         c->remove_support = gk20a_remove_channel_support;
1492         mutex_init(&c->jobs_lock);
1493         INIT_LIST_HEAD(&c->jobs);
1494 #if defined(CONFIG_TEGRA_GPU_CYCLE_STATS)
1495         mutex_init(&c->cyclestate.cyclestate_buffer_mutex);
1496 #endif
1497         return 0;
1498 }
1499
1500 int gk20a_channel_init(struct nvhost_channel *ch,
1501                        struct nvhost_master *host, int index)
1502 {
1503         return 0;
1504 }
1505
1506 int gk20a_channel_alloc_obj(struct nvhost_channel *channel,
1507                         u32 class_num,
1508                         u32 *obj_id,
1509                         u32 vaspace_share)
1510 {
1511         nvhost_dbg_fn("");
1512         return 0;
1513 }
1514
1515 int gk20a_channel_free_obj(struct nvhost_channel *channel, u32 obj_id)
1516 {
1517         nvhost_dbg_fn("");
1518         return 0;
1519 }
1520
1521 int gk20a_channel_finish(struct channel_gk20a *ch, long timeout)
1522 {
1523         struct nvhost_syncpt *sp = syncpt_from_gk20a(ch->g);
1524         struct nvhost_device_data *pdata = nvhost_get_devdata(ch->g->dev);
1525         struct nvhost_fence fence;
1526         int err = 0;
1527
1528         if (!ch->cmds_pending)
1529                 return 0;
1530
1531         if (!(ch->last_submit_fence.valid && ch->last_submit_fence.wfi)) {
1532                 nvhost_dbg_fn("issuing wfi, incr to finish the channel");
1533                 fence.syncpt_id = ch->hw_chid + pdata->syncpt_base;
1534                 err = gk20a_channel_submit_wfi_fence(ch->g, ch,
1535                                                      sp, &fence);
1536         }
1537         if (err)
1538                 return err;
1539
1540         BUG_ON(!(ch->last_submit_fence.valid && ch->last_submit_fence.wfi));
1541
1542         nvhost_dbg_fn("waiting for channel to finish syncpt:%d val:%d",
1543                       ch->last_submit_fence.syncpt_id,
1544                       ch->last_submit_fence.syncpt_value);
1545         err = nvhost_syncpt_wait_timeout(sp,
1546                                          ch->last_submit_fence.syncpt_id,
1547                                          ch->last_submit_fence.syncpt_value,
1548                                          timeout, &fence.value, NULL, false);
1549         if (WARN_ON(err))
1550                 dev_warn(dev_from_gk20a(ch->g),
1551                          "timed out waiting for gk20a channel to finish");
1552         else
1553                 ch->cmds_pending = false;
1554
1555         return err;
1556 }
1557
1558 int gk20a_channel_wait(struct channel_gk20a *ch,
1559                        struct nvhost_wait_args *args)
1560 {
1561         struct device *d = dev_from_gk20a(ch->g);
1562         struct platform_device *dev = ch->ch->dev;
1563         struct mem_mgr *memmgr = gk20a_channel_mem_mgr(ch);
1564         struct mem_handle *handle_ref;
1565         struct notification *notif;
1566         struct timespec tv;
1567         u64 jiffies;
1568         ulong id;
1569         u32 offset;
1570         long timeout;
1571         int remain, ret = 0;
1572
1573         nvhost_dbg_fn("");
1574
1575         if (args->timeout == NVHOST_NO_TIMEOUT)
1576                 timeout = MAX_SCHEDULE_TIMEOUT;
1577         else
1578                 timeout = (u32)msecs_to_jiffies(args->timeout);
1579
1580         switch (args->type) {
1581         case NVHOST_WAIT_TYPE_NOTIFIER:
1582                 id = args->condition.notifier.nvmap_handle;
1583                 offset = args->condition.notifier.offset;
1584
1585                 handle_ref = nvhost_memmgr_get(memmgr, id, dev);
1586                 if (IS_ERR(handle_ref)) {
1587                         nvhost_err(d, "invalid notifier nvmap handle 0x%lx",
1588                                    id);
1589                         return -EINVAL;
1590                 }
1591
1592                 notif = nvhost_memmgr_mmap(handle_ref);
1593                 if (!notif) {
1594                         nvhost_err(d, "failed to map notifier memory");
1595                         return -ENOMEM;
1596                 }
1597
1598                 notif = (struct notification *)((uintptr_t)notif + offset);
1599
1600                 /* user should set status pending before
1601                  * calling this ioctl */
1602                 remain = wait_event_interruptible_timeout(
1603                                 ch->notifier_wq,
1604                                 notif->status == 0,
1605                                 timeout);
1606
1607                 if (remain == 0 && notif->status != 0) {
1608                         ret = -ETIMEDOUT;
1609                         goto notif_clean_up;
1610                 } else if (remain < 0) {
1611                         ret = -EINTR;
1612                         goto notif_clean_up;
1613                 }
1614
1615                 /* TBD: fill in correct information */
1616                 jiffies = get_jiffies_64();
1617                 jiffies_to_timespec(jiffies, &tv);
1618                 notif->timestamp.nanoseconds[0] = tv.tv_nsec;
1619                 notif->timestamp.nanoseconds[1] = tv.tv_sec;
1620                 notif->info32 = 0xDEADBEEF; /* should be object name */
1621                 notif->info16 = ch->hw_chid; /* should be method offset */
1622
1623 notif_clean_up:
1624                 nvhost_memmgr_munmap(handle_ref, notif);
1625                 return ret;
1626         case NVHOST_WAIT_TYPE_SEMAPHORE:
1627                 break;
1628         default:
1629                 return -EINVAL;
1630         }
1631
1632         return 0;
1633 }
1634
1635
1636 int gk20a_channel_zcull_bind(struct channel_gk20a *ch,
1637                             struct nvhost_zcull_bind_args *args)
1638 {
1639         struct gk20a *g = ch->g;
1640         struct gr_gk20a *gr = &g->gr;
1641
1642         nvhost_dbg_fn("");
1643
1644         return gr_gk20a_bind_ctxsw_zcull(g, gr, ch,
1645                                 args->gpu_va, args->mode);
1646 }
1647
1648 /* in this context the "channel" is the host1x channel which
1649  * maps to *all* gk20a channels */
1650 int gk20a_channel_suspend(struct gk20a *g)
1651 {
1652         struct fifo_gk20a *f = &g->fifo;
1653         u32 chid;
1654         bool channels_in_use = false;
1655
1656         nvhost_dbg_fn("");
1657
1658         for (chid = 0; chid < f->num_channels; chid++) {
1659                 if (f->channel[chid].in_use) {
1660
1661                         nvhost_dbg_info("suspend channel %d", chid);
1662
1663                         /* disable channel */
1664                         gk20a_writel(g, ccsr_channel_r(chid),
1665                                 gk20a_readl(g, ccsr_channel_r(chid)) |
1666                                 ccsr_channel_enable_clr_true_f());
1667                         /* preempt the channel */
1668                         gk20a_fifo_preempt_channel(g,
1669                                 ENGINE_GR_GK20A, chid);
1670
1671                         channels_in_use = true;
1672                 }
1673         }
1674
1675         if (channels_in_use) {
1676                 gk20a_fifo_update_runlist(g, ENGINE_GR_GK20A, ~0, false);
1677
1678                 for (chid = 0; chid < f->num_channels; chid++) {
1679                         if (f->channel[chid].in_use)
1680                                 channel_gk20a_unbind(&f->channel[chid]);
1681                 }
1682         }
1683
1684         nvhost_dbg_fn("done");
1685         return 0;
1686 }
1687
1688 /* in this context the "channel" is the host1x channel which
1689  * maps to *all* gk20a channels */
1690 int gk20a_channel_resume(struct gk20a *g)
1691 {
1692         struct fifo_gk20a *f = &g->fifo;
1693         u32 chid;
1694         bool channels_in_use = false;
1695
1696         nvhost_dbg_fn("");
1697
1698         for (chid = 0; chid < f->num_channels; chid++) {
1699                 if (f->channel[chid].in_use) {
1700                         nvhost_dbg_info("resume channel %d", chid);
1701                         channel_gk20a_bind(&f->channel[chid]);
1702                         channels_in_use = true;
1703                 }
1704         }
1705
1706         if (channels_in_use)
1707                 gk20a_fifo_update_runlist(g, ENGINE_GR_GK20A, ~0, true);
1708
1709         nvhost_dbg_fn("done");
1710         return 0;
1711 }