video: tegra: host: Check error from channel finish
[linux-3.10.git] / drivers / video / tegra / host / gk20a / channel_gk20a.c
1 /*
2  * drivers/video/tegra/host/gk20a/channel_gk20a.c
3  *
4  * GK20A Graphics channel
5  *
6  * Copyright (c) 2011-2013, NVIDIA CORPORATION.  All rights reserved.
7  *
8  * This program is free software; you can redistribute it and/or modify it
9  * under the terms and conditions of the GNU General Public License,
10  * version 2, as published by the Free Software Foundation.
11  *
12  * This program is distributed in the hope it will be useful, but WITHOUT
13  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
15  * more details.
16  *
17  * You should have received a copy of the GNU General Public License along with
18  * this program; if not, write to the Free Software Foundation, Inc.,
19  * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
20  */
21
22 #include <linux/delay.h>
23 #include <linux/highmem.h> /* need for nvmap.h*/
24 #include <trace/events/nvhost.h>
25 #include <linux/scatterlist.h>
26
27
28 #include "../dev.h"
29 #include "../nvhost_as.h"
30
31 #include "gk20a.h"
32
33 #include "hw_ram_gk20a.h"
34 #include "hw_fifo_gk20a.h"
35 #include "hw_pbdma_gk20a.h"
36 #include "hw_ccsr_gk20a.h"
37 #include "chip_support.h"
38
39 #define NVMAP_HANDLE_PARAM_SIZE 1
40
41 static struct channel_gk20a *acquire_unused_channel(struct fifo_gk20a *f);
42 static void release_used_channel(struct fifo_gk20a *f, struct channel_gk20a *c);
43
44 static int alloc_priv_cmdbuf(struct channel_gk20a *c, u32 size,
45                              struct priv_cmd_entry **entry);
46 static void free_priv_cmdbuf(struct channel_gk20a *c,
47                              struct priv_cmd_entry *e);
48 static void recycle_priv_cmdbuf(struct channel_gk20a *c);
49
50 static int channel_gk20a_alloc_priv_cmdbuf(struct channel_gk20a *c);
51 static void channel_gk20a_free_priv_cmdbuf(struct channel_gk20a *c);
52
53 static int channel_gk20a_commit_userd(struct channel_gk20a *c);
54 static int channel_gk20a_setup_userd(struct channel_gk20a *c);
55 static int channel_gk20a_setup_ramfc(struct channel_gk20a *c,
56                         u64 gpfifo_base, u32 gpfifo_entries);
57
58 static void channel_gk20a_bind(struct channel_gk20a *ch_gk20a);
59 static void channel_gk20a_unbind(struct channel_gk20a *ch_gk20a);
60
61 static int channel_gk20a_alloc_inst(struct gk20a *g,
62                                 struct channel_gk20a *ch);
63 static void channel_gk20a_free_inst(struct gk20a *g,
64                                 struct channel_gk20a *ch);
65
66 static int channel_gk20a_update_runlist(struct channel_gk20a *c,
67                                         bool add);
68
69 static struct channel_gk20a *acquire_unused_channel(struct fifo_gk20a *f)
70 {
71         struct channel_gk20a *ch = NULL;
72         int chid;
73
74         mutex_lock(&f->ch_inuse_mutex);
75         for (chid = 0; chid < f->num_channels; chid++) {
76                 if (!f->channel[chid].in_use) {
77                         f->channel[chid].in_use = true;
78                         ch = &f->channel[chid];
79                         break;
80                 }
81         }
82         mutex_unlock(&f->ch_inuse_mutex);
83
84         return ch;
85 }
86
87 static void release_used_channel(struct fifo_gk20a *f, struct channel_gk20a *c)
88 {
89         mutex_lock(&f->ch_inuse_mutex);
90         f->channel[c->hw_chid].in_use = false;
91         mutex_unlock(&f->ch_inuse_mutex);
92 }
93
94 int channel_gk20a_commit_va(struct channel_gk20a *c)
95 {
96         phys_addr_t addr;
97         u32 addr_lo;
98         u32 addr_hi;
99         void *inst_ptr;
100
101         nvhost_dbg_fn("");
102
103         inst_ptr = nvhost_memmgr_mmap(c->inst_block.mem.ref);
104         if (IS_ERR(inst_ptr))
105                 return -ENOMEM;
106
107         addr = sg_phys(c->vm->pdes.sgt->sgl);
108         addr_lo = u64_lo32(addr) >> 12;
109         addr_hi = u64_hi32(addr);
110
111         nvhost_dbg_info("pde pa=0x%llx addr_lo=0x%x addr_hi=0x%x",
112                    (u64)addr, addr_lo, addr_hi);
113
114         mem_wr32(inst_ptr, ram_in_page_dir_base_lo_w(),
115                 ram_in_page_dir_base_target_vid_mem_f() |
116                 ram_in_page_dir_base_vol_true_f() |
117                 ram_in_page_dir_base_lo_f(addr_lo));
118
119         mem_wr32(inst_ptr, ram_in_page_dir_base_hi_w(),
120                 ram_in_page_dir_base_hi_f(addr_hi));
121
122         mem_wr32(inst_ptr, ram_in_adr_limit_lo_w(),
123                  u64_lo32(c->vm->va_limit) | 0xFFF);
124
125         mem_wr32(inst_ptr, ram_in_adr_limit_hi_w(),
126                 ram_in_adr_limit_hi_f(u64_hi32(c->vm->va_limit)));
127
128         nvhost_memmgr_munmap(c->inst_block.mem.ref, inst_ptr);
129
130         return 0;
131 }
132
133 static int channel_gk20a_commit_userd(struct channel_gk20a *c)
134 {
135         u32 addr_lo;
136         u32 addr_hi;
137         void *inst_ptr;
138
139         nvhost_dbg_fn("");
140
141         inst_ptr = nvhost_memmgr_mmap(c->inst_block.mem.ref);
142         if (IS_ERR(inst_ptr))
143                 return -ENOMEM;
144
145         addr_lo = u64_lo32(c->userd_cpu_pa >> ram_userd_base_shift_v());
146         addr_hi = u64_hi32(c->userd_cpu_pa);
147
148         nvhost_dbg_info("channel %d : set ramfc userd 0x%16llx",
149                 c->hw_chid, (u64)c->userd_cpu_pa);
150
151         mem_wr32(inst_ptr, ram_in_ramfc_w() + ram_fc_userd_w(),
152                  pbdma_userd_target_vid_mem_f() |
153                  pbdma_userd_addr_f(addr_lo));
154
155         mem_wr32(inst_ptr, ram_in_ramfc_w() + ram_fc_userd_hi_w(),
156                  pbdma_userd_target_vid_mem_f() |
157                  pbdma_userd_hi_addr_f(addr_hi));
158
159         nvhost_memmgr_munmap(c->inst_block.mem.ref, inst_ptr);
160
161         return 0;
162 }
163
164 static int channel_gk20a_setup_ramfc(struct channel_gk20a *c,
165                                 u64 gpfifo_base, u32 gpfifo_entries)
166 {
167         void *inst_ptr;
168
169         nvhost_dbg_fn("");
170
171         inst_ptr = nvhost_memmgr_mmap(c->inst_block.mem.ref);
172         if (IS_ERR(inst_ptr))
173                 return -ENOMEM;
174
175         memset(inst_ptr, 0, ram_fc_size_val_v());
176
177         mem_wr32(inst_ptr, ram_fc_gp_base_w(),
178                 pbdma_gp_base_offset_f(
179                 u64_lo32(gpfifo_base >> pbdma_gp_base_rsvd_s())));
180
181         mem_wr32(inst_ptr, ram_fc_gp_base_hi_w(),
182                 pbdma_gp_base_hi_offset_f(u64_hi32(gpfifo_base)) |
183                 pbdma_gp_base_hi_limit2_f(ilog2(gpfifo_entries)));
184
185         mem_wr32(inst_ptr, ram_fc_signature_w(),
186                  pbdma_signature_hw_valid_f() | pbdma_signature_sw_zero_f());
187
188         mem_wr32(inst_ptr, ram_fc_formats_w(),
189                 pbdma_formats_gp_fermi0_f() |
190                 pbdma_formats_pb_fermi1_f() |
191                 pbdma_formats_mp_fermi0_f());
192
193         mem_wr32(inst_ptr, ram_fc_pb_header_w(),
194                 pbdma_pb_header_priv_user_f() |
195                 pbdma_pb_header_method_zero_f() |
196                 pbdma_pb_header_subchannel_zero_f() |
197                 pbdma_pb_header_level_main_f() |
198                 pbdma_pb_header_first_true_f() |
199                 pbdma_pb_header_type_inc_f());
200
201         mem_wr32(inst_ptr, ram_fc_subdevice_w(),
202                 pbdma_subdevice_id_f(1) |
203                 pbdma_subdevice_status_active_f() |
204                 pbdma_subdevice_channel_dma_enable_f());
205
206         mem_wr32(inst_ptr, ram_fc_target_w(), pbdma_target_engine_sw_f());
207
208         mem_wr32(inst_ptr, ram_fc_acquire_w(),
209                 pbdma_acquire_retry_man_2_f() |
210                 pbdma_acquire_retry_exp_2_f() |
211                 pbdma_acquire_timeout_exp_max_f() |
212                 pbdma_acquire_timeout_man_max_f() |
213                 pbdma_acquire_timeout_en_disable_f());
214
215         mem_wr32(inst_ptr, ram_fc_eng_timeslice_w(),
216                 fifo_eng_timeslice_timeout_128_f() |
217                 fifo_eng_timeslice_timescale_3_f() |
218                 fifo_eng_timeslice_enable_true_f());
219
220         mem_wr32(inst_ptr, ram_fc_pb_timeslice_w(),
221                 fifo_pb_timeslice_timeout_16_f() |
222                 fifo_pb_timeslice_timescale_0_f() |
223                 fifo_pb_timeslice_enable_true_f());
224
225         mem_wr32(inst_ptr, ram_fc_chid_w(), ram_fc_chid_f(c->hw_chid));
226
227         /* TBD: alwasy priv mode? */
228         mem_wr32(inst_ptr, ram_fc_hce_ctrl_w(),
229                  pbdma_hce_ctrl_hce_priv_mode_yes_f());
230
231         nvhost_memmgr_munmap(c->inst_block.mem.ref, inst_ptr);
232
233         return 0;
234 }
235
236 static int channel_gk20a_setup_userd(struct channel_gk20a *c)
237 {
238         BUG_ON(!c->userd_cpu_va);
239
240         nvhost_dbg_fn("");
241
242         mem_wr32(c->userd_cpu_va, ram_userd_put_w(), 0);
243         mem_wr32(c->userd_cpu_va, ram_userd_get_w(), 0);
244         mem_wr32(c->userd_cpu_va, ram_userd_ref_w(), 0);
245         mem_wr32(c->userd_cpu_va, ram_userd_put_hi_w(), 0);
246         mem_wr32(c->userd_cpu_va, ram_userd_ref_threshold_w(), 0);
247         mem_wr32(c->userd_cpu_va, ram_userd_gp_top_level_get_w(), 0);
248         mem_wr32(c->userd_cpu_va, ram_userd_gp_top_level_get_hi_w(), 0);
249         mem_wr32(c->userd_cpu_va, ram_userd_get_hi_w(), 0);
250         mem_wr32(c->userd_cpu_va, ram_userd_gp_get_w(), 0);
251         mem_wr32(c->userd_cpu_va, ram_userd_gp_put_w(), 0);
252
253         return 0;
254 }
255
256 static void channel_gk20a_bind(struct channel_gk20a *ch_gk20a)
257 {
258         struct gk20a *g = get_gk20a(ch_gk20a->ch->dev);
259         struct fifo_gk20a *f = &g->fifo;
260         struct fifo_engine_info_gk20a *engine_info =
261                 f->engine_info + ENGINE_GR_GK20A;
262
263         u32 inst_ptr = sg_phys(ch_gk20a->inst_block.mem.sgt->sgl)
264                 >> ram_in_base_shift_v();
265
266         nvhost_dbg_info("bind channel %d inst ptr 0x%08x",
267                 ch_gk20a->hw_chid, inst_ptr);
268
269         ch_gk20a->bound = true;
270
271         gk20a_writel(g, ccsr_channel_r(ch_gk20a->hw_chid),
272                 (gk20a_readl(g, ccsr_channel_r(ch_gk20a->hw_chid)) &
273                  ~ccsr_channel_runlist_f(~0)) |
274                  ccsr_channel_runlist_f(engine_info->runlist_id));
275
276         gk20a_writel(g, ccsr_channel_inst_r(ch_gk20a->hw_chid),
277                 ccsr_channel_inst_ptr_f(inst_ptr) |
278                 ccsr_channel_inst_target_vid_mem_f() |
279                 ccsr_channel_inst_bind_true_f());
280
281         gk20a_writel(g, ccsr_channel_r(ch_gk20a->hw_chid),
282                 (gk20a_readl(g, ccsr_channel_r(ch_gk20a->hw_chid)) &
283                  ~ccsr_channel_enable_set_f(~0)) |
284                  ccsr_channel_enable_set_true_f());
285 }
286
287 static void channel_gk20a_unbind(struct channel_gk20a *ch_gk20a)
288 {
289         struct gk20a *g = get_gk20a(ch_gk20a->ch->dev);
290
291         nvhost_dbg_fn("");
292
293         if (ch_gk20a->bound)
294                 gk20a_writel(g, ccsr_channel_inst_r(ch_gk20a->hw_chid),
295                         ccsr_channel_inst_ptr_f(0) |
296                         ccsr_channel_inst_bind_false_f());
297
298         ch_gk20a->bound = false;
299 }
300
301 static int channel_gk20a_alloc_inst(struct gk20a *g,
302                                 struct channel_gk20a *ch)
303 {
304         struct mem_mgr *memmgr = mem_mgr_from_g(g);
305
306         nvhost_dbg_fn("");
307
308         ch->inst_block.mem.ref =
309                 nvhost_memmgr_alloc(memmgr, ram_in_alloc_size_v(),
310                                     DEFAULT_ALLOC_ALIGNMENT,
311                                     DEFAULT_ALLOC_FLAGS,
312                                     0);
313
314         if (IS_ERR(ch->inst_block.mem.ref)) {
315                 ch->inst_block.mem.ref = 0;
316                 goto clean_up;
317         }
318
319         ch->inst_block.mem.sgt =
320                 nvhost_memmgr_sg_table(memmgr, ch->inst_block.mem.ref);
321
322         /* IS_ERR throws a warning here (expecting void *) */
323         if (IS_ERR(ch->inst_block.mem.sgt)) {
324                 ch->inst_block.mem.sgt = NULL;
325                 goto clean_up;
326         }
327
328         nvhost_dbg_info("channel %d inst block physical addr: 0x%16llx",
329                 ch->hw_chid, (u64)sg_phys(ch->inst_block.mem.sgt->sgl));
330
331         ch->inst_block.mem.size = ram_in_alloc_size_v();
332
333         nvhost_dbg_fn("done");
334         return 0;
335
336 clean_up:
337         nvhost_dbg(dbg_fn | dbg_err, "fail");
338         channel_gk20a_free_inst(g, ch);
339         return -ENOMEM;
340 }
341
342 static void channel_gk20a_free_inst(struct gk20a *g,
343                                 struct channel_gk20a *ch)
344 {
345         struct mem_mgr *memmgr = mem_mgr_from_g(g);
346
347         nvhost_memmgr_free_sg_table(memmgr, ch->inst_block.mem.ref,
348                         ch->inst_block.mem.sgt);
349         nvhost_memmgr_put(memmgr, ch->inst_block.mem.ref);
350         memset(&ch->inst_block, 0, sizeof(struct inst_desc));
351 }
352
353 static int channel_gk20a_update_runlist(struct channel_gk20a *c,
354                                         bool add)
355 {
356         return gk20a_fifo_update_runlist(c->g,
357                 ENGINE_GR_GK20A, c->hw_chid, add);
358 }
359
360 void gk20a_disable_channel(struct channel_gk20a *ch,
361                            bool finish,
362                            long finish_timeout)
363 {
364         struct nvhost_device_data *pdata = nvhost_get_devdata(ch->g->dev);
365         struct nvhost_master *host = host_from_gk20a_channel(ch);
366         int err;
367
368         if (finish) {
369                 err = gk20a_channel_finish(ch, finish_timeout);
370                 WARN_ON(err);
371         }
372
373         /* ensure no fences are pending */
374         nvhost_syncpt_set_min_eq_max(&host->syncpt,
375                         ch->hw_chid + pdata->syncpt_base);
376
377         /* disable channel */
378         gk20a_writel(ch->g, ccsr_channel_r(ch->hw_chid),
379                 gk20a_readl(ch->g, ccsr_channel_r(ch->hw_chid)) |
380                 ccsr_channel_enable_clr_true_f());
381
382         /* preempt the channel */
383         gk20a_fifo_preempt_channel(ch->g,
384                 ENGINE_GR_GK20A, ch->hw_chid);
385
386         /* remove channel from runlist */
387         channel_gk20a_update_runlist(ch, false);
388 }
389
390 void gk20a_free_channel(struct nvhost_hwctx *ctx, bool finish)
391 {
392         struct channel_gk20a *ch = ctx->priv;
393         struct gk20a *g = ch->g;
394         struct fifo_gk20a *f = &g->fifo;
395         struct gr_gk20a *gr = &g->gr;
396         struct mem_mgr *memmgr = gk20a_channel_mem_mgr(ch);
397         struct vm_gk20a *ch_vm = ch->vm;
398         unsigned long timeout = CONFIG_TEGRA_GRHOST_DEFAULT_TIMEOUT;
399
400         nvhost_dbg_fn("");
401
402         if (!ch->bound)
403                 return;
404
405         if (!gk20a_channel_as_bound(ch))
406                 goto unbind;
407
408         if (!tegra_platform_is_silicon())
409                 timeout = MAX_SCHEDULE_TIMEOUT;
410
411         nvhost_dbg_info("freeing bound channel context, timeout=%ld",
412                         timeout);
413
414         gk20a_disable_channel(ch, finish, timeout);
415
416         /* release channel ctx */
417         gk20a_free_channel_ctx(ch);
418
419         gk20a_gr_flush_channel_tlb(gr);
420
421         memset(&ch->ramfc, 0, sizeof(struct mem_desc_sub));
422
423         /* free gpfifo */
424         ch_vm->unmap(ch_vm, ch->gpfifo.gpu_va);
425         nvhost_memmgr_munmap(ch->gpfifo.mem.ref, ch->gpfifo.cpu_va);
426         nvhost_memmgr_put(memmgr, ch->gpfifo.mem.ref);
427         memset(&ch->gpfifo, 0, sizeof(struct gpfifo_desc));
428
429         ctx->priv = NULL;
430         channel_gk20a_free_priv_cmdbuf(ch);
431
432         /* release hwctx binding to the as_share */
433         nvhost_as_release_share(ch_vm->as_share, ctx);
434
435 unbind:
436         channel_gk20a_unbind(ch);
437         channel_gk20a_free_inst(g, ch);
438
439         ch->vpr = false;
440
441         /* ALWAYS last */
442         release_used_channel(f, ch);
443 }
444
445 struct nvhost_hwctx *gk20a_open_channel(struct nvhost_channel *ch,
446                                          struct nvhost_hwctx *ctx)
447 {
448         struct gk20a *g = get_gk20a(ch->dev);
449         struct fifo_gk20a *f = &g->fifo;
450         struct channel_gk20a *ch_gk20a;
451
452         ch_gk20a = acquire_unused_channel(f);
453         if (ch_gk20a == NULL) {
454                 /* TBD: we want to make this virtualizable */
455                 nvhost_err(dev_from_gk20a(g), "out of hw chids");
456                 return 0;
457         }
458
459         ctx->priv = ch_gk20a;
460         ch_gk20a->g = g;
461         /* note the ch here is the same for *EVERY* gk20a channel */
462         ch_gk20a->ch = ch;
463         /* but thre's one hwctx per gk20a channel */
464         ch_gk20a->hwctx = ctx;
465
466         if (channel_gk20a_alloc_inst(g, ch_gk20a)) {
467                 ch_gk20a->in_use = false;
468                 ctx->priv = 0;
469                 nvhost_err(dev_from_gk20a(g),
470                            "failed to open gk20a channel, out of inst mem");
471
472                 return 0;
473         }
474         channel_gk20a_bind(ch_gk20a);
475
476         /* The channel is *not* runnable at this point. It still needs to have
477          * an address space bound and allocate a gpfifo and grctx. */
478
479
480         init_waitqueue_head(&ch_gk20a->notifier_wq);
481         init_waitqueue_head(&ch_gk20a->semaphore_wq);
482         init_waitqueue_head(&ch_gk20a->submit_wq);
483
484         return ctx;
485 }
486
487 #if 0
488 /* move to debug_gk20a.c ... */
489 static void dump_gpfifo(struct channel_gk20a *c)
490 {
491         void *inst_ptr;
492         u32 chid = c->hw_chid;
493
494         nvhost_dbg_fn("");
495
496         inst_ptr = nvhost_memmgr_mmap(c->inst_block.mem.ref);
497         if (IS_ERR(inst_ptr))
498                 return;
499
500         nvhost_dbg_info("ramfc for channel %d:\n"
501                 "ramfc: gp_base 0x%08x, gp_base_hi 0x%08x, "
502                 "gp_fetch 0x%08x, gp_get 0x%08x, gp_put 0x%08x, "
503                 "pb_fetch 0x%08x, pb_fetch_hi 0x%08x, "
504                 "pb_get 0x%08x, pb_get_hi 0x%08x, "
505                 "pb_put 0x%08x, pb_put_hi 0x%08x\n"
506                 "userd: gp_put 0x%08x, gp_get 0x%08x, "
507                 "get 0x%08x, get_hi 0x%08x, "
508                 "put 0x%08x, put_hi 0x%08x\n"
509                 "pbdma: status 0x%08x, channel 0x%08x, userd 0x%08x, "
510                 "gp_base 0x%08x, gp_base_hi 0x%08x, "
511                 "gp_fetch 0x%08x, gp_get 0x%08x, gp_put 0x%08x, "
512                 "pb_fetch 0x%08x, pb_fetch_hi 0x%08x, "
513                 "get 0x%08x, get_hi 0x%08x, put 0x%08x, put_hi 0x%08x\n"
514                 "channel: ccsr_channel 0x%08x",
515                 chid,
516                 mem_rd32(inst_ptr, ram_fc_gp_base_w()),
517                 mem_rd32(inst_ptr, ram_fc_gp_base_hi_w()),
518                 mem_rd32(inst_ptr, ram_fc_gp_fetch_w()),
519                 mem_rd32(inst_ptr, ram_fc_gp_get_w()),
520                 mem_rd32(inst_ptr, ram_fc_gp_put_w()),
521                 mem_rd32(inst_ptr, ram_fc_pb_fetch_w()),
522                 mem_rd32(inst_ptr, ram_fc_pb_fetch_hi_w()),
523                 mem_rd32(inst_ptr, ram_fc_pb_get_w()),
524                 mem_rd32(inst_ptr, ram_fc_pb_get_hi_w()),
525                 mem_rd32(inst_ptr, ram_fc_pb_put_w()),
526                 mem_rd32(inst_ptr, ram_fc_pb_put_hi_w()),
527                 mem_rd32(c->userd_cpu_va, ram_userd_gp_put_w()),
528                 mem_rd32(c->userd_cpu_va, ram_userd_gp_get_w()),
529                 mem_rd32(c->userd_cpu_va, ram_userd_get_w()),
530                 mem_rd32(c->userd_cpu_va, ram_userd_get_hi_w()),
531                 mem_rd32(c->userd_cpu_va, ram_userd_put_w()),
532                 mem_rd32(c->userd_cpu_va, ram_userd_put_hi_w()),
533                 gk20a_readl(c->g, pbdma_status_r(0)),
534                 gk20a_readl(c->g, pbdma_channel_r(0)),
535                 gk20a_readl(c->g, pbdma_userd_r(0)),
536                 gk20a_readl(c->g, pbdma_gp_base_r(0)),
537                 gk20a_readl(c->g, pbdma_gp_base_hi_r(0)),
538                 gk20a_readl(c->g, pbdma_gp_fetch_r(0)),
539                 gk20a_readl(c->g, pbdma_gp_get_r(0)),
540                 gk20a_readl(c->g, pbdma_gp_put_r(0)),
541                 gk20a_readl(c->g, pbdma_pb_fetch_r(0)),
542                 gk20a_readl(c->g, pbdma_pb_fetch_hi_r(0)),
543                 gk20a_readl(c->g, pbdma_get_r(0)),
544                 gk20a_readl(c->g, pbdma_get_hi_r(0)),
545                 gk20a_readl(c->g, pbdma_put_r(0)),
546                 gk20a_readl(c->g, pbdma_put_hi_r(0)),
547                 gk20a_readl(c->g, ccsr_channel_r(chid)));
548
549         nvhost_memmgr_munmap(c->inst_block.mem.ref, inst_ptr);
550 }
551 #endif
552
553 /* allocate private cmd buffer.
554    used for inserting commands before/after user submitted buffers. */
555 static int channel_gk20a_alloc_priv_cmdbuf(struct channel_gk20a *c)
556 {
557         struct device *d = dev_from_gk20a(c->g);
558         struct mem_mgr *memmgr = gk20a_channel_mem_mgr(c);
559         struct vm_gk20a *ch_vm = c->vm;
560         struct priv_cmd_queue *q = &c->priv_cmd_q;
561         struct priv_cmd_entry *e;
562         u32 i = 0, size;
563
564         /* Kernel can insert gpfifos before and after user gpfifos.
565            Before user gpfifos, kernel inserts fence_wait, which takes
566            syncpoint_a (2 dwords) + syncpoint_b (2 dwords) = 4 dwords.
567            After user gpfifos, kernel inserts fence_get, which takes
568            wfi (2 dwords) + syncpoint_a (2 dwords) + syncpoint_b (2 dwords)
569            = 6 dwords.
570            Worse case if kernel adds both of them for every user gpfifo,
571            max size of priv_cmdbuf is :
572            (gpfifo entry number * (2 / 3) * (4 + 6) * 4 bytes */
573         size = roundup_pow_of_two(
574                 c->gpfifo.entry_num * 2 * 10 * sizeof(u32) / 3);
575
576         q->mem.ref = nvhost_memmgr_alloc(memmgr,
577                                          size,
578                                          DEFAULT_ALLOC_ALIGNMENT,
579                                          DEFAULT_ALLOC_FLAGS,
580                                          0);
581         if (IS_ERR_OR_NULL(q->mem.ref)) {
582                 nvhost_err(d, "ch %d : failed to allocate"
583                            " priv cmd buffer(size: %d bytes)",
584                            c->hw_chid, size);
585                 goto clean_up;
586         }
587         q->mem.size = size;
588
589         q->base_ptr = (u32 *)nvhost_memmgr_mmap(q->mem.ref);
590         if (IS_ERR_OR_NULL(q->base_ptr)) {
591                 nvhost_err(d, "ch %d : failed to map cpu va"
592                            "for priv cmd buffer", c->hw_chid);
593                 goto clean_up;
594         }
595
596         memset(q->base_ptr, 0, size);
597
598         q->base_gva = ch_vm->map(ch_vm, memmgr,
599                         q->mem.ref,
600                          /*offset_align, flags, kind*/
601                         0, 0, 0, NULL);
602         if (!q->base_gva) {
603                 nvhost_err(d, "ch %d : failed to map gpu va"
604                            "for priv cmd buffer", c->hw_chid);
605                 goto clean_up;
606         }
607
608         q->size = q->mem.size / sizeof (u32);
609
610         INIT_LIST_HEAD(&q->head);
611         INIT_LIST_HEAD(&q->free);
612
613         /* pre-alloc 25% of priv cmdbuf entries and put them on free list */
614         for (i = 0; i < q->size / 4; i++) {
615                 e = kzalloc(sizeof(struct priv_cmd_entry), GFP_KERNEL);
616                 if (!e) {
617                         nvhost_err(d, "ch %d: fail to pre-alloc cmd entry",
618                                 c->hw_chid);
619                         goto clean_up;
620                 }
621                 e->pre_alloc = true;
622                 list_add(&e->list, &q->free);
623         }
624
625         return 0;
626
627 clean_up:
628         channel_gk20a_free_priv_cmdbuf(c);
629         return -ENOMEM;
630 }
631
632 static void channel_gk20a_free_priv_cmdbuf(struct channel_gk20a *c)
633 {
634         struct mem_mgr *memmgr = gk20a_channel_mem_mgr(c);
635         struct vm_gk20a *ch_vm = c->vm;
636         struct priv_cmd_queue *q = &c->priv_cmd_q;
637         struct priv_cmd_entry *e;
638         struct list_head *pos, *tmp, *head;
639
640         if (q->size == 0)
641                 return;
642
643         ch_vm->unmap(ch_vm, q->base_gva);
644         nvhost_memmgr_munmap(q->mem.ref, q->base_ptr);
645         nvhost_memmgr_put(memmgr, q->mem.ref);
646
647         /* free used list */
648         head = &q->head;
649         list_for_each_safe(pos, tmp, head) {
650                 e = container_of(pos, struct priv_cmd_entry, list);
651                 free_priv_cmdbuf(c, e);
652         }
653
654         /* free free list */
655         head = &q->free;
656         list_for_each_safe(pos, tmp, head) {
657                 e = container_of(pos, struct priv_cmd_entry, list);
658                 e->pre_alloc = false;
659                 free_priv_cmdbuf(c, e);
660         }
661
662         memset(q, 0, sizeof(struct priv_cmd_queue));
663 }
664
665 /* allocate a cmd buffer with given size. size is number of u32 entries */
666 static int alloc_priv_cmdbuf(struct channel_gk20a *c, u32 orig_size,
667                              struct priv_cmd_entry **entry)
668 {
669         struct priv_cmd_queue *q = &c->priv_cmd_q;
670         struct priv_cmd_entry *e;
671         struct list_head *node;
672         u32 free_count;
673         u32 size = orig_size;
674         bool no_retry = false;
675
676         nvhost_dbg_fn("size %d", orig_size);
677
678         *entry = NULL;
679
680         /* if free space in the end is less than requested, increase the size
681          * to make the real allocated space start from beginning. */
682         if (q->put + size > q->size)
683                 size = orig_size + (q->size - q->put);
684
685         nvhost_dbg_info("ch %d: priv cmd queue get:put %d:%d",
686                         c->hw_chid, q->get, q->put);
687
688 TRY_AGAIN:
689         free_count = (q->size - (q->put - q->get) - 1) % q->size;
690
691         if (size > free_count) {
692                 if (!no_retry) {
693                         recycle_priv_cmdbuf(c);
694                         no_retry = true;
695                         goto TRY_AGAIN;
696                 } else
697                         return -EAGAIN;
698         }
699
700         if (unlikely(list_empty(&q->free))) {
701
702                 nvhost_dbg_info("ch %d: run out of pre-alloc entries",
703                         c->hw_chid);
704
705                 e = kzalloc(sizeof(struct priv_cmd_entry), GFP_KERNEL);
706                 if (!e) {
707                         nvhost_err(dev_from_gk20a(c->g),
708                                 "ch %d: fail to allocate priv cmd entry",
709                                 c->hw_chid);
710                         return -ENOMEM;
711                 }
712         } else  {
713                 node = q->free.next;
714                 list_del(node);
715                 e = container_of(node, struct priv_cmd_entry, list);
716         }
717
718         e->size = orig_size;
719         e->gp_get = c->gpfifo.get;
720         e->gp_put = c->gpfifo.put;
721         e->gp_wrap = c->gpfifo.wrap;
722
723         /* if we have increased size to skip free space in the end, set put
724            to beginning of cmd buffer (0) + size */
725         if (size != orig_size) {
726                 e->ptr = q->base_ptr;
727                 e->gva = q->base_gva;
728                 q->put = orig_size;
729         } else {
730                 e->ptr = q->base_ptr + q->put;
731                 e->gva = q->base_gva + q->put * sizeof(u32);
732                 q->put = (q->put + orig_size) & (q->size - 1);
733         }
734
735         /* we already handled q->put + size > q->size so BUG_ON this */
736         BUG_ON(q->put > q->size);
737
738         /* add new entry to head since we free from head */
739         list_add(&e->list, &q->head);
740
741         *entry = e;
742
743         nvhost_dbg_fn("done");
744
745         return 0;
746 }
747
748 /* Don't call this to free an explict cmd entry.
749  * It doesn't update priv_cmd_queue get/put */
750 static void free_priv_cmdbuf(struct channel_gk20a *c,
751                              struct priv_cmd_entry *e)
752 {
753         struct priv_cmd_queue *q = &c->priv_cmd_q;
754
755         if (!e)
756                 return;
757
758         list_del(&e->list);
759
760         if (unlikely(!e->pre_alloc))
761                 kfree(e);
762         else {
763                 memset(e, 0, sizeof(struct priv_cmd_entry));
764                 e->pre_alloc = true;
765                 list_add(&e->list, &q->free);
766         }
767 }
768
769 /* free entries if they're no longer being used */
770 static void recycle_priv_cmdbuf(struct channel_gk20a *c)
771 {
772         struct priv_cmd_queue *q = &c->priv_cmd_q;
773         struct priv_cmd_entry *e;
774         struct list_head *pos, *tmp, *head = &q->head;
775         bool wrap_around;
776
777         nvhost_dbg_fn("");
778
779         /* Find the most recent free entry. Free it and everything before it */
780         list_for_each(pos, head) {
781
782                 e = list_entry(pos, struct priv_cmd_entry, list);
783
784                 nvhost_dbg_info("ch %d: cmd entry get:put:wrap %d:%d:%d "
785                         "curr get:put:wrap %d:%d:%d",
786                         c->hw_chid, e->gp_get, e->gp_put, e->gp_wrap,
787                         c->gpfifo.get, c->gpfifo.put, c->gpfifo.wrap);
788
789                 wrap_around = (c->gpfifo.wrap != e->gp_wrap);
790                 if (e->gp_get < e->gp_put) {
791                         if (c->gpfifo.get >= e->gp_put ||
792                             wrap_around)
793                                 break;
794                         else
795                                 e->gp_get = c->gpfifo.get;
796                 } else if (e->gp_get > e->gp_put) {
797                         if (wrap_around &&
798                             c->gpfifo.get >= e->gp_put)
799                                 break;
800                         else
801                                 e->gp_get = c->gpfifo.get;
802                 }
803         }
804
805         if (pos != head)
806                 q->get = (e->ptr - q->base_ptr) + e->size;
807         else
808                 nvhost_dbg_info("no free entry recycled");
809                 return;
810
811         head = pos->prev;
812         list_for_each_safe(pos, tmp, head) {
813                 e = container_of(pos, struct priv_cmd_entry, list);
814                 free_priv_cmdbuf(c, e);
815         }
816
817         nvhost_dbg_fn("done");
818 }
819
820
821 int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c,
822                                struct nvhost_alloc_gpfifo_args *args)
823 {
824         struct mem_mgr *memmgr = gk20a_channel_mem_mgr(c);
825         struct gk20a *g = c->g;
826         struct nvhost_device_data *pdata = nvhost_get_devdata(g->dev);
827         struct device *d = dev_from_gk20a(g);
828         struct vm_gk20a *ch_vm;
829         u32 gpfifo_size;
830         u32 ret;
831
832         /* Kernel can insert one extra gpfifo entry before user submitted gpfifos
833            and another one after, for internal usage. Triple the requested size. */
834         gpfifo_size = roundup_pow_of_two(args->num_entries * 3);
835
836         if (args->flags & NVHOST_ALLOC_GPFIFO_FLAGS_VPR_ENABLED)
837                 c->vpr = true;
838
839         /* an address space needs to have been bound at this point.   */
840         if (!gk20a_channel_as_bound(c)) {
841                 int err;
842                 nvhost_warn(d,
843                             "not bound to an address space at time of gpfifo"
844                             " allocation.  Attempting to create and bind to"
845                             " one...");
846                 /*
847                  * Eventually this will be a fatal error. For now attempt to
848                  * create and bind a share here.  This helps until we change
849                  * clients to use the new address space API.  However doing this
850                  * can mask errors in programming access to the address space
851                  * through the front door...
852                  */
853                 err = nvhost_as_alloc_and_bind_share(c->ch, c->hwctx);
854                 if (err || !gk20a_channel_as_bound(c)) {
855                         nvhost_err(d,
856                                    "not bound to address space at time"
857                                    " of gpfifo allocation");
858                         return err;
859                 }
860         }
861         ch_vm = c->vm;
862
863         c->cmds_pending = false;
864
865         c->last_submit_fence.valid        = false;
866         c->last_submit_fence.syncpt_value = 0;
867         c->last_submit_fence.syncpt_id    = c->hw_chid + pdata->syncpt_base;
868
869         c->ramfc.offset = 0;
870         c->ramfc.size = ram_in_ramfc_s() / 8;
871
872         if (c->gpfifo.mem.ref) {
873                 nvhost_err(d, "channel %d :"
874                            "gpfifo already allocated", c->hw_chid);
875                 return -EEXIST;
876         }
877
878         c->gpfifo.mem.ref =
879                 nvhost_memmgr_alloc(memmgr,
880                                     gpfifo_size * sizeof(struct gpfifo),
881                                     DEFAULT_ALLOC_ALIGNMENT,
882                                     DEFAULT_ALLOC_FLAGS,
883                                     0);
884         if (IS_ERR_OR_NULL(c->gpfifo.mem.ref)) {
885                 nvhost_err(d, "channel %d :"
886                            " failed to allocate gpfifo (size: %d bytes)",
887                            c->hw_chid, gpfifo_size);
888                 c->gpfifo.mem.ref = 0;
889                 return -ENOMEM;
890         }
891         c->gpfifo.entry_num = gpfifo_size;
892
893         c->gpfifo.cpu_va =
894                 (struct gpfifo *)nvhost_memmgr_mmap(c->gpfifo.mem.ref);
895         if (IS_ERR_OR_NULL(c->gpfifo.cpu_va))
896                 goto clean_up;
897
898         c->gpfifo.get = c->gpfifo.put = 0;
899
900         c->gpfifo.gpu_va = ch_vm->map(ch_vm, memmgr,
901                                 c->gpfifo.mem.ref,
902                                 /*offset_align, flags, kind*/
903                                 0, 0, 0, NULL);
904         if (!c->gpfifo.gpu_va) {
905                 nvhost_err(d, "channel %d : failed to map"
906                            " gpu_va for gpfifo", c->hw_chid);
907                 goto clean_up;
908         }
909
910         nvhost_dbg_info("channel %d : gpfifo_base 0x%016llx, size %d",
911                 c->hw_chid, c->gpfifo.gpu_va, c->gpfifo.entry_num);
912
913         channel_gk20a_setup_ramfc(c, c->gpfifo.gpu_va, c->gpfifo.entry_num);
914
915         channel_gk20a_setup_userd(c);
916         channel_gk20a_commit_userd(c);
917
918         /* TBD: setup engine contexts */
919
920         ret = channel_gk20a_alloc_priv_cmdbuf(c);
921         if (ret)
922                 goto clean_up;
923
924         ret = channel_gk20a_update_runlist(c, true);
925         if (ret)
926                 goto clean_up;
927
928         nvhost_dbg_fn("done");
929         return 0;
930
931 clean_up:
932         nvhost_dbg(dbg_fn | dbg_err, "fail");
933         ch_vm->unmap(ch_vm, c->gpfifo.gpu_va);
934         nvhost_memmgr_munmap(c->gpfifo.mem.ref, c->gpfifo.cpu_va);
935         nvhost_memmgr_put(memmgr, c->gpfifo.mem.ref);
936         memset(&c->gpfifo, 0, sizeof(struct gpfifo_desc));
937         return -ENOMEM;
938 }
939
940 static inline int wfi_cmd_size(void)
941 {
942         return 2;
943 }
944 void add_wfi_cmd(struct priv_cmd_entry *cmd, int *i)
945 {
946         /* wfi */
947         cmd->ptr[(*i)++] = 0x2001001E;
948         /* handle, ignored */
949         cmd->ptr[(*i)++] = 0x00000000;
950 }
951
952 static inline bool check_gp_put(struct gk20a *g,
953                                 struct channel_gk20a *c)
954 {
955         u32 put;
956         /* gp_put changed unexpectedly since last update? */
957         put = gk20a_bar1_readl(g,
958                c->userd_gpu_va + 4 * ram_userd_gp_put_w());
959         if (c->gpfifo.put != put) {
960                 /*TBD: BUG_ON/teardown on this*/
961                 nvhost_err(dev_from_gk20a(g), "gp_put changed unexpectedly "
962                            "since last update");
963                 c->gpfifo.put = put;
964                 return false; /* surprise! */
965         }
966         return true; /* checked out ok */
967 }
968
969 /* Update with this periodically to determine how the gpfifo is draining. */
970 static inline u32 update_gp_get(struct gk20a *g,
971                                 struct channel_gk20a *c)
972 {
973         u32 new_get = gk20a_bar1_readl(g,
974                 c->userd_gpu_va + sizeof(u32) * ram_userd_gp_get_w());
975         if (new_get < c->gpfifo.get)
976                 c->gpfifo.wrap = !c->gpfifo.wrap;
977         c->gpfifo.get = new_get;
978         return new_get;
979 }
980
981 static inline u32 gp_free_count(struct channel_gk20a *c)
982 {
983         return (c->gpfifo.entry_num - (c->gpfifo.put - c->gpfifo.get) - 1) %
984                 c->gpfifo.entry_num;
985 }
986
987 /* Issue a syncpoint increment *preceded* by a wait-for-idle
988  * command.  All commands on the channel will have been
989  * consumed at the time the fence syncpoint increment occurs.
990  */
991 int gk20a_channel_submit_wfi_fence(struct gk20a *g,
992                                    struct channel_gk20a *c,
993                                    struct nvhost_syncpt *sp,
994                                    struct nvhost_fence *fence)
995 {
996         struct priv_cmd_entry *cmd = NULL;
997         int cmd_size, j = 0;
998         u32 free_count;
999
1000         cmd_size =  4 + wfi_cmd_size();
1001
1002         update_gp_get(g, c);
1003         free_count = gp_free_count(c);
1004         if (unlikely(!free_count)) {
1005                 nvhost_err(dev_from_gk20a(g),
1006                            "not enough gpfifo space");
1007                 return -EAGAIN;
1008         }
1009
1010         alloc_priv_cmdbuf(c, cmd_size, &cmd);
1011         if (unlikely(IS_ERR_OR_NULL(cmd))) {
1012                 nvhost_err(dev_from_gk20a(g),
1013                            "not enough priv cmd buffer space");
1014                 return -EAGAIN;
1015         }
1016
1017         fence->value = nvhost_syncpt_incr_max(sp, fence->syncpt_id, 1);
1018
1019         c->last_submit_fence.valid        = true;
1020         c->last_submit_fence.syncpt_value = fence->value;
1021         c->last_submit_fence.syncpt_id    = fence->syncpt_id;
1022         c->last_submit_fence.wfi          = true;
1023
1024         trace_nvhost_ioctl_ctrl_syncpt_incr(fence->syncpt_id);
1025
1026
1027         add_wfi_cmd(cmd, &j);
1028
1029         /* syncpoint_a */
1030         cmd->ptr[j++] = 0x2001001C;
1031         /* payload, ignored */
1032         cmd->ptr[j++] = 0;
1033         /* syncpoint_b */
1034         cmd->ptr[j++] = 0x2001001D;
1035         /* syncpt_id, incr */
1036         cmd->ptr[j++] = (fence->syncpt_id << 8) | 0x1;
1037
1038         c->gpfifo.cpu_va[c->gpfifo.put].entry0 = u64_lo32(cmd->gva);
1039         c->gpfifo.cpu_va[c->gpfifo.put].entry1 = u64_hi32(cmd->gva) |
1040                 (cmd->size << 10);
1041
1042         c->gpfifo.put = (c->gpfifo.put + 1) & (c->gpfifo.entry_num - 1);
1043
1044         /* save gp_put */
1045         cmd->gp_put = c->gpfifo.put;
1046
1047         gk20a_bar1_writel(g,
1048                 c->userd_gpu_va + 4 * ram_userd_gp_put_w(),
1049                 c->gpfifo.put);
1050
1051         nvhost_dbg_info("post-submit put %d, get %d, size %d",
1052                 c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num);
1053
1054         return 0;
1055 }
1056
1057 static u32 get_gp_free_count(struct channel_gk20a *c)
1058 {
1059         update_gp_get(c->g, c);
1060         return gp_free_count(c);
1061 }
1062
1063 int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
1064                                 struct nvhost_gpfifo *gpfifo,
1065                                 u32 num_entries,
1066                                 struct nvhost_fence *fence,
1067                                 u32 flags)
1068 {
1069         struct gk20a *g = c->g;
1070         struct nvhost_device_data *pdata = nvhost_get_devdata(g->dev);
1071         struct device *d = dev_from_gk20a(g);
1072         struct nvhost_syncpt *sp = syncpt_from_gk20a(g);
1073         u32 i, incr_id = ~0, wait_id = ~0;
1074         u32 err = 0;
1075         int incr_cmd_size;
1076         bool wfi_cmd;
1077         struct priv_cmd_entry *wait_cmd = NULL;
1078         struct priv_cmd_entry *incr_cmd = NULL;
1079         /* we might need two extra gpfifo entries - one for syncpoint
1080          * wait and one for syncpoint increment */
1081         const int extra_entries = 2;
1082
1083         if ((flags & (NVHOST_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT |
1084                       NVHOST_SUBMIT_GPFIFO_FLAGS_FENCE_GET)) &&
1085             !fence)
1086                 return -EINVAL;
1087
1088         nvhost_dbg_info("channel %d", c->hw_chid);
1089
1090         check_gp_put(g, c);
1091         update_gp_get(g, c);
1092
1093         nvhost_dbg_info("pre-submit put %d, get %d, size %d",
1094                 c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num);
1095
1096         /* If the caller has requested a fence "get" then we need to be
1097          * sure the fence represents work completion.  In that case
1098          * issue a wait-for-idle before the syncpoint increment.
1099          */
1100         wfi_cmd = !!(flags & NVHOST_SUBMIT_GPFIFO_FLAGS_FENCE_GET);
1101
1102         /* Invalidate tlb if it's dirty...                                   */
1103         /* TBD: this should be done in the cmd stream, not with PRIs.        */
1104         /* We don't know what context is currently running...                */
1105         /* Note also: there can be more than one context associated with the */
1106         /* address space (vm).   */
1107         if (c->vm->tlb_dirty) {
1108                 c->vm->tlb_inval(c->vm);
1109                 c->vm->tlb_dirty = false;
1110         }
1111
1112         /* Make sure we have enough space for gpfifo entries. If not,
1113          * wait for signals from completed submits */
1114         if (gp_free_count(c) < num_entries + extra_entries) {
1115                 err = wait_event_interruptible(c->submit_wq,
1116                         get_gp_free_count(c) >= num_entries + extra_entries);
1117         }
1118
1119         if (err) {
1120                 nvhost_err(d, "not enough gpfifo space");
1121                 err = -EAGAIN;
1122                 goto clean_up;
1123         }
1124
1125         /* optionally insert syncpt wait in the beginning of gpfifo submission
1126            when user requested and the wait hasn't expired.
1127         */
1128
1129         /* validate that the id makes sense, elide if not */
1130         /* the only reason this isn't being unceremoniously killed is to
1131          * keep running some tests which trigger this condition*/
1132         if ((flags & NVHOST_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT) &&
1133             ((fence->syncpt_id < 0) ||
1134              (fence->syncpt_id >= nvhost_syncpt_nb_pts(sp)))) {
1135                 dev_warn(d, "invalid wait id in gpfifo submit, elided");
1136                 flags &= ~NVHOST_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT;
1137         }
1138
1139         if ((flags & NVHOST_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT) &&
1140             !nvhost_syncpt_is_expired(sp, fence->syncpt_id, fence->value)) {
1141                 alloc_priv_cmdbuf(c, 4, &wait_cmd);
1142                 if (wait_cmd == NULL) {
1143                         nvhost_err(d, "not enough priv cmd buffer space");
1144                         err = -EAGAIN;
1145                         goto clean_up;
1146                 }
1147         }
1148
1149         /* always insert syncpt increment at end of gpfifo submission
1150            to keep track of method completion for idle railgating */
1151         /* TODO: we need to find a way to get rid of these wfi on every
1152          * submission...
1153          */
1154         incr_cmd_size = 4;
1155         if (wfi_cmd)
1156                 incr_cmd_size += wfi_cmd_size();
1157         alloc_priv_cmdbuf(c, incr_cmd_size, &incr_cmd);
1158         if (incr_cmd == NULL) {
1159                 nvhost_err(d, "not enough priv cmd buffer space");
1160                 err = -EAGAIN;
1161                 goto clean_up;
1162         }
1163
1164         if (wait_cmd) {
1165                 wait_id = fence->syncpt_id;
1166                 /* syncpoint_a */
1167                 wait_cmd->ptr[0] = 0x2001001C;
1168                 /* payload */
1169                 wait_cmd->ptr[1] = fence->value;
1170                 /* syncpoint_b */
1171                 wait_cmd->ptr[2] = 0x2001001D;
1172                 /* syncpt_id, switch_en, wait */
1173                 wait_cmd->ptr[3] = (wait_id << 8) | 0x10;
1174
1175                 c->gpfifo.cpu_va[c->gpfifo.put].entry0 =
1176                         u64_lo32(wait_cmd->gva);
1177                 c->gpfifo.cpu_va[c->gpfifo.put].entry1 =
1178                         u64_hi32(wait_cmd->gva) |
1179                         (wait_cmd->size << 10);
1180
1181                 c->gpfifo.put = (c->gpfifo.put + 1) &
1182                         (c->gpfifo.entry_num - 1);
1183
1184                 /* save gp_put */
1185                 wait_cmd->gp_put = c->gpfifo.put;
1186         }
1187
1188         for (i = 0; i < num_entries; i++) {
1189                 c->gpfifo.cpu_va[c->gpfifo.put].entry0 =
1190                         gpfifo[i].entry0; /* cmd buf va low 32 */
1191                 c->gpfifo.cpu_va[c->gpfifo.put].entry1 =
1192                         gpfifo[i].entry1; /* cmd buf va high 32 | words << 10 */
1193                 c->gpfifo.put = (c->gpfifo.put + 1) &
1194                         (c->gpfifo.entry_num - 1);
1195         }
1196
1197         if (incr_cmd) {
1198                 int j = 0;
1199                 incr_id = c->hw_chid + pdata->syncpt_base;
1200                 fence->syncpt_id = incr_id;
1201                 fence->value     = nvhost_syncpt_incr_max(sp, incr_id, 1);
1202
1203                 c->last_submit_fence.valid        = true;
1204                 c->last_submit_fence.syncpt_value = fence->value;
1205                 c->last_submit_fence.syncpt_id    = fence->syncpt_id;
1206                 c->last_submit_fence.wfi          = wfi_cmd;
1207
1208                 trace_nvhost_ioctl_ctrl_syncpt_incr(fence->syncpt_id);
1209                 if (wfi_cmd)
1210                         add_wfi_cmd(incr_cmd, &j);
1211                 /* syncpoint_a */
1212                 incr_cmd->ptr[j++] = 0x2001001C;
1213                 /* payload, ignored */
1214                 incr_cmd->ptr[j++] = 0;
1215                 /* syncpoint_b */
1216                 incr_cmd->ptr[j++] = 0x2001001D;
1217                 /* syncpt_id, incr */
1218                 incr_cmd->ptr[j++] = (fence->syncpt_id << 8) | 0x1;
1219
1220                 c->gpfifo.cpu_va[c->gpfifo.put].entry0 =
1221                         u64_lo32(incr_cmd->gva);
1222                 c->gpfifo.cpu_va[c->gpfifo.put].entry1 =
1223                         u64_hi32(incr_cmd->gva) |
1224                         (incr_cmd->size << 10);
1225
1226                 c->gpfifo.put = (c->gpfifo.put + 1) &
1227                         (c->gpfifo.entry_num - 1);
1228
1229                 /* save gp_put */
1230                 incr_cmd->gp_put = c->gpfifo.put;
1231         }
1232
1233         /* Invalidate tlb if it's dirty...                                   */
1234         /* TBD: this should be done in the cmd stream, not with PRIs.        */
1235         /* We don't know what context is currently running...                */
1236         /* Note also: there can be more than one context associated with the */
1237         /* address space (vm).   */
1238         if (c->vm->tlb_dirty) {
1239                 c->vm->tlb_inval(c->vm);
1240                 c->vm->tlb_dirty = false;
1241         }
1242
1243         trace_nvhost_channel_submit_gpfifo(c->ch->dev->name,
1244                                            c->hw_chid,
1245                                            num_entries,
1246                                            flags,
1247                                            wait_id, incr_id);
1248
1249
1250         c->cmds_pending = true;
1251         gk20a_bar1_writel(g,
1252                 c->userd_gpu_va + 4 * ram_userd_gp_put_w(),
1253                 c->gpfifo.put);
1254
1255         nvhost_dbg_info("post-submit put %d, get %d, size %d",
1256                 c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num);
1257
1258         nvhost_dbg_fn("done");
1259         return 0;
1260
1261 clean_up:
1262         nvhost_dbg(dbg_fn | dbg_err, "fail");
1263         free_priv_cmdbuf(c, wait_cmd);
1264         free_priv_cmdbuf(c, incr_cmd);
1265         return err;
1266 }
1267
1268 void gk20a_remove_channel_support(struct channel_gk20a *c)
1269 {
1270
1271 }
1272
1273 int gk20a_init_channel_support(struct gk20a *g, u32 chid)
1274 {
1275         struct channel_gk20a *c = g->fifo.channel+chid;
1276         c->g = g;
1277         c->in_use = false;
1278         c->hw_chid = chid;
1279         c->bound = false;
1280         c->remove_support = gk20a_remove_channel_support;
1281 #if defined(CONFIG_TEGRA_GPU_CYCLE_STATS)
1282         mutex_init(&c->cyclestate.cyclestate_buffer_mutex);
1283 #endif
1284         return 0;
1285 }
1286
1287 int gk20a_channel_init(struct nvhost_channel *ch,
1288                        struct nvhost_master *host, int index)
1289 {
1290         return 0;
1291 }
1292
1293 int gk20a_channel_alloc_obj(struct nvhost_channel *channel,
1294                         u32 class_num,
1295                         u32 *obj_id,
1296                         u32 vaspace_share)
1297 {
1298         nvhost_dbg_fn("");
1299         return 0;
1300 }
1301
1302 int gk20a_channel_free_obj(struct nvhost_channel *channel, u32 obj_id)
1303 {
1304         nvhost_dbg_fn("");
1305         return 0;
1306 }
1307
1308 int gk20a_channel_finish(struct channel_gk20a *ch, long timeout)
1309 {
1310         struct nvhost_syncpt *sp = syncpt_from_gk20a(ch->g);
1311         struct nvhost_device_data *pdata = nvhost_get_devdata(ch->g->dev);
1312         struct nvhost_fence fence;
1313         int err = 0;
1314
1315         if (!ch->cmds_pending)
1316                 return 0;
1317
1318         if (!(ch->last_submit_fence.valid && ch->last_submit_fence.wfi)) {
1319                 nvhost_dbg_fn("issuing wfi, incr to finish the channel");
1320                 fence.syncpt_id = ch->hw_chid + pdata->syncpt_base;
1321                 err = gk20a_channel_submit_wfi_fence(ch->g, ch,
1322                                                      sp, &fence);
1323         }
1324         if (err)
1325                 return err;
1326
1327         BUG_ON(!(ch->last_submit_fence.valid && ch->last_submit_fence.wfi));
1328
1329         nvhost_dbg_fn("waiting for channel to finish syncpt:%d val:%d",
1330                       ch->last_submit_fence.syncpt_id,
1331                       ch->last_submit_fence.syncpt_value);
1332         err = nvhost_syncpt_wait_timeout(sp,
1333                                          ch->last_submit_fence.syncpt_id,
1334                                          ch->last_submit_fence.syncpt_value,
1335                                          timeout, &fence.value, NULL, false);
1336         if (WARN_ON(err))
1337                 dev_warn(dev_from_gk20a(ch->g),
1338                          "timed out waiting for gk20a channel to finish");
1339         else
1340                 ch->cmds_pending = false;
1341
1342         return err;
1343 }
1344
1345 int gk20a_channel_wait(struct channel_gk20a *ch,
1346                        struct nvhost_wait_args *args)
1347 {
1348         struct device *d = dev_from_gk20a(ch->g);
1349         struct platform_device *dev = ch->ch->dev;
1350         struct mem_mgr *memmgr = gk20a_channel_mem_mgr(ch);
1351         struct mem_handle *handle_ref;
1352         struct notification *notif;
1353         struct timespec tv;
1354         u64 jiffies;
1355         ulong id;
1356         u32 offset;
1357         long timeout;
1358         int remain, ret = 0;
1359
1360         nvhost_dbg_fn("");
1361
1362         if (args->timeout == NVHOST_NO_TIMEOUT)
1363                 timeout = MAX_SCHEDULE_TIMEOUT;
1364         else
1365                 timeout = (u32)msecs_to_jiffies(args->timeout);
1366
1367         switch (args->type) {
1368         case NVHOST_WAIT_TYPE_NOTIFIER:
1369                 id = args->condition.notifier.nvmap_handle;
1370                 offset = args->condition.notifier.offset;
1371
1372                 handle_ref = nvhost_memmgr_get(memmgr, id, dev);
1373                 if (!handle_ref) {
1374                         nvhost_err(d, "invalid notifier nvmap handle 0x%lx",
1375                                    id);
1376                         return -EINVAL;
1377                 }
1378
1379                 notif = nvhost_memmgr_mmap(handle_ref);
1380                 if (IS_ERR_OR_NULL(notif)) {
1381                         nvhost_err(d, "failed to map notifier memory");
1382                         return -ENOMEM;
1383                 }
1384
1385                 notif = (struct notification *)((uintptr_t)notif + offset);
1386
1387                 /* user should set status pending before
1388                  * calling this ioctl */
1389                 remain = wait_event_interruptible_timeout(
1390                                 ch->notifier_wq,
1391                                 notif->status == 0,
1392                                 timeout);
1393
1394                 if (remain == 0 && notif->status != 0) {
1395                         ret = -ETIMEDOUT;
1396                         goto notif_clean_up;
1397                 } else if (remain < 0) {
1398                         ret = -EINTR;
1399                         goto notif_clean_up;
1400                 }
1401
1402                 /* TBD: fill in correct information */
1403                 jiffies = get_jiffies_64();
1404                 jiffies_to_timespec(jiffies, &tv);
1405                 notif->timestamp.nanoseconds[0] = tv.tv_nsec;
1406                 notif->timestamp.nanoseconds[1] = tv.tv_sec;
1407                 notif->info32 = 0xDEADBEEF; /* should be object name */
1408                 notif->info16 = ch->hw_chid; /* should be method offset */
1409
1410 notif_clean_up:
1411                 nvhost_memmgr_munmap(handle_ref, notif);
1412                 return ret;
1413         case NVHOST_WAIT_TYPE_SEMAPHORE:
1414                 break;
1415         default:
1416                 return -EINVAL;
1417         }
1418
1419         return 0;
1420 }
1421
1422 #if defined(CONFIG_TEGRA_GPU_CYCLE_STATS)
1423 int gk20a_channel_cycle_stats(struct channel_gk20a *ch,
1424                        struct nvhost_cycle_stats_args *args)
1425 {
1426         struct mem_mgr *memmgr = gk20a_channel_mem_mgr(ch);
1427         struct mem_handle *handle_ref;
1428         void *virtual_address;
1429         u32 cyclestate_buffer_size;
1430         struct platform_device *dev = ch->ch->dev;
1431
1432         if (args->nvmap_handle && !ch->cyclestate.cyclestate_buffer_handler) {
1433                 /* set up new cyclestats buffer */
1434                 handle_ref = nvhost_memmgr_get(memmgr, args->nvmap_handle, dev);
1435                 if (handle_ref == NULL)
1436                         return -ENOMEM;
1437
1438                 virtual_address = nvhost_memmgr_mmap(handle_ref);
1439                 if (IS_ERR(virtual_address))
1440                         return -ENOMEM;
1441
1442                 nvhost_memmgr_get_param(memmgr, handle_ref,
1443                                         NVMAP_HANDLE_PARAM_SIZE,
1444                                         &cyclestate_buffer_size);
1445
1446                 ch->cyclestate.cyclestate_buffer_handler = handle_ref;
1447                 ch->cyclestate.cyclestate_buffer = virtual_address;
1448                 ch->cyclestate.cyclestate_buffer_size = cyclestate_buffer_size;
1449                 return 0;
1450
1451         } else if (!args->nvmap_handle && ch->cyclestate.cyclestate_buffer_handler) {
1452                 /* disable existing cyclestats buffer */
1453                 mutex_lock(&ch->cyclestate.cyclestate_buffer_mutex);
1454                 nvhost_memmgr_munmap(ch->cyclestate.cyclestate_buffer_handler,
1455                                      ch->cyclestate.cyclestate_buffer);
1456                 nvhost_memmgr_put(memmgr,
1457                                   ch->cyclestate.cyclestate_buffer_handler);
1458                 ch->cyclestate.cyclestate_buffer_handler = NULL;
1459                 ch->cyclestate.cyclestate_buffer = NULL;
1460                 ch->cyclestate.cyclestate_buffer_size = 0;
1461                 mutex_unlock(&ch->cyclestate.cyclestate_buffer_mutex);
1462                 return 0;
1463
1464         } else if (!args->nvmap_handle && !ch->cyclestate.cyclestate_buffer_handler) {
1465                 /* no requst from GL */
1466                 return 0;
1467
1468         } else {
1469                 return -EINVAL;
1470         }
1471 }
1472 #endif
1473
1474 int gk20a_channel_zcull_bind(struct channel_gk20a *ch,
1475                             struct nvhost_zcull_bind_args *args)
1476 {
1477         struct gk20a *g = ch->g;
1478         struct gr_gk20a *gr = &g->gr;
1479
1480         nvhost_dbg_fn("");
1481
1482         return gr_gk20a_bind_ctxsw_zcull(g, gr, ch,
1483                                 args->gpu_va, args->mode);
1484 }
1485
1486 /* in this context the "channel" is the host1x channel which
1487  * maps to *all* gk20a channels */
1488 int gk20a_channel_suspend(struct gk20a *g)
1489 {
1490         struct fifo_gk20a *f = &g->fifo;
1491         u32 chid;
1492         bool channels_in_use = false;
1493
1494         nvhost_dbg_fn("");
1495
1496         for (chid = 0; chid < f->num_channels; chid++) {
1497                 if (f->channel[chid].in_use) {
1498
1499                         nvhost_dbg_info("suspend channel %d", chid);
1500
1501                         /* disable channel */
1502                         gk20a_writel(g, ccsr_channel_r(chid),
1503                                 gk20a_readl(g, ccsr_channel_r(chid)) |
1504                                 ccsr_channel_enable_clr_true_f());
1505                         /* preempt the channel */
1506                         gk20a_fifo_preempt_channel(g,
1507                                 ENGINE_GR_GK20A, chid);
1508
1509                         channels_in_use = true;
1510                 }
1511         }
1512
1513         if (channels_in_use) {
1514                 gk20a_fifo_update_runlist(g, ENGINE_GR_GK20A, ~0, false);
1515
1516                 for (chid = 0; chid < f->num_channels; chid++) {
1517                         if (f->channel[chid].in_use)
1518                                 channel_gk20a_unbind(&f->channel[chid]);
1519                 }
1520         }
1521
1522         nvhost_dbg_fn("done");
1523         return 0;
1524 }
1525
1526 /* in this context the "channel" is the host1x channel which
1527  * maps to *all* gk20a channels */
1528 int gk20a_channel_resume(struct gk20a *g)
1529 {
1530         struct fifo_gk20a *f = &g->fifo;
1531         u32 chid;
1532         bool channels_in_use = false;
1533
1534         nvhost_dbg_fn("");
1535
1536         for (chid = 0; chid < f->num_channels; chid++) {
1537                 if (f->channel[chid].in_use) {
1538                         nvhost_dbg_info("resume channel %d", chid);
1539                         channel_gk20a_bind(&f->channel[chid]);
1540                         channels_in_use = true;
1541                 }
1542         }
1543
1544         if (channels_in_use)
1545                 gk20a_fifo_update_runlist(g, ENGINE_GR_GK20A, ~0, true);
1546
1547         nvhost_dbg_fn("done");
1548         return 0;
1549 }