baa1beecf53299933865f52c88c4bdf8ad59ba76
[linux-3.10.git] / drivers / video / tegra / host / gk20a / channel_gk20a.c
1 /*
2  * drivers/video/tegra/host/gk20a/channel_gk20a.c
3  *
4  * GK20A Graphics channel
5  *
6  * Copyright (c) 2011-2013, NVIDIA CORPORATION.  All rights reserved.
7  *
8  * This program is free software; you can redistribute it and/or modify it
9  * under the terms and conditions of the GNU General Public License,
10  * version 2, as published by the Free Software Foundation.
11  *
12  * This program is distributed in the hope it will be useful, but WITHOUT
13  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
15  * more details.
16  *
17  * You should have received a copy of the GNU General Public License along with
18  * this program; if not, write to the Free Software Foundation, Inc.,
19  * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
20  */
21
22 #include <linux/list.h>
23 #include <linux/delay.h>
24 #include <linux/highmem.h> /* need for nvmap.h*/
25 #include <trace/events/nvhost.h>
26 #include <linux/scatterlist.h>
27
28 #include "dev.h"
29 #include "nvhost_as.h"
30 #include "debug.h"
31 #include "nvhost_sync.h"
32
33 #include "gk20a.h"
34 #include "dbg_gpu_gk20a.h"
35
36 #include "hw_ram_gk20a.h"
37 #include "hw_fifo_gk20a.h"
38 #include "hw_pbdma_gk20a.h"
39 #include "hw_ccsr_gk20a.h"
40 #include "hw_ltc_gk20a.h"
41 #include "chip_support.h"
42
43 #define NVMAP_HANDLE_PARAM_SIZE 1
44
45 static struct channel_gk20a *acquire_unused_channel(struct fifo_gk20a *f);
46 static void release_used_channel(struct fifo_gk20a *f, struct channel_gk20a *c);
47
48 static int alloc_priv_cmdbuf(struct channel_gk20a *c, u32 size,
49                              struct priv_cmd_entry **entry);
50 static void free_priv_cmdbuf(struct channel_gk20a *c,
51                              struct priv_cmd_entry *e);
52 static void recycle_priv_cmdbuf(struct channel_gk20a *c);
53
54 static int channel_gk20a_alloc_priv_cmdbuf(struct channel_gk20a *c);
55 static void channel_gk20a_free_priv_cmdbuf(struct channel_gk20a *c);
56
57 static int channel_gk20a_commit_userd(struct channel_gk20a *c);
58 static int channel_gk20a_setup_userd(struct channel_gk20a *c);
59 static int channel_gk20a_setup_ramfc(struct channel_gk20a *c,
60                         u64 gpfifo_base, u32 gpfifo_entries);
61
62 static void channel_gk20a_bind(struct channel_gk20a *ch_gk20a);
63 static void channel_gk20a_unbind(struct channel_gk20a *ch_gk20a);
64
65 static int channel_gk20a_alloc_inst(struct gk20a *g,
66                                 struct channel_gk20a *ch);
67 static void channel_gk20a_free_inst(struct gk20a *g,
68                                 struct channel_gk20a *ch);
69
70 static int channel_gk20a_update_runlist(struct channel_gk20a *c,
71                                         bool add);
72
73 static struct channel_gk20a *acquire_unused_channel(struct fifo_gk20a *f)
74 {
75         struct channel_gk20a *ch = NULL;
76         int chid;
77
78         mutex_lock(&f->ch_inuse_mutex);
79         for (chid = 0; chid < f->num_channels; chid++) {
80                 if (!f->channel[chid].in_use) {
81                         f->channel[chid].in_use = true;
82                         ch = &f->channel[chid];
83                         break;
84                 }
85         }
86         mutex_unlock(&f->ch_inuse_mutex);
87
88         return ch;
89 }
90
91 static void release_used_channel(struct fifo_gk20a *f, struct channel_gk20a *c)
92 {
93         mutex_lock(&f->ch_inuse_mutex);
94         f->channel[c->hw_chid].in_use = false;
95         mutex_unlock(&f->ch_inuse_mutex);
96 }
97
98 int channel_gk20a_commit_va(struct channel_gk20a *c)
99 {
100         u64 addr;
101         u32 addr_lo;
102         u32 addr_hi;
103         void *inst_ptr;
104
105         nvhost_dbg_fn("");
106
107         inst_ptr = c->inst_block.cpuva;
108         if (!inst_ptr)
109                 return -ENOMEM;
110
111         addr = gk20a_mm_iova_addr(c->vm->pdes.sgt->sgl);
112         addr_lo = u64_lo32(addr >> 12);
113         addr_hi = u64_hi32(addr);
114
115         nvhost_dbg_info("pde pa=0x%llx addr_lo=0x%x addr_hi=0x%x",
116                    (u64)addr, addr_lo, addr_hi);
117
118         mem_wr32(inst_ptr, ram_in_page_dir_base_lo_w(),
119                 ram_in_page_dir_base_target_vid_mem_f() |
120                 ram_in_page_dir_base_vol_true_f() |
121                 ram_in_page_dir_base_lo_f(addr_lo));
122
123         mem_wr32(inst_ptr, ram_in_page_dir_base_hi_w(),
124                 ram_in_page_dir_base_hi_f(addr_hi));
125
126         mem_wr32(inst_ptr, ram_in_adr_limit_lo_w(),
127                  u64_lo32(c->vm->va_limit) | 0xFFF);
128
129         mem_wr32(inst_ptr, ram_in_adr_limit_hi_w(),
130                 ram_in_adr_limit_hi_f(u64_hi32(c->vm->va_limit)));
131
132         gk20a_mm_l2_invalidate(c->g);
133
134         return 0;
135 }
136
137 static int channel_gk20a_commit_userd(struct channel_gk20a *c)
138 {
139         u32 addr_lo;
140         u32 addr_hi;
141         void *inst_ptr;
142
143         nvhost_dbg_fn("");
144
145         inst_ptr = c->inst_block.cpuva;
146         if (!inst_ptr)
147                 return -ENOMEM;
148
149         addr_lo = u64_lo32(c->userd_iova >> ram_userd_base_shift_v());
150         addr_hi = u64_hi32(c->userd_iova);
151
152         nvhost_dbg_info("channel %d : set ramfc userd 0x%16llx",
153                 c->hw_chid, c->userd_iova);
154
155         mem_wr32(inst_ptr, ram_in_ramfc_w() + ram_fc_userd_w(),
156                  pbdma_userd_target_vid_mem_f() |
157                  pbdma_userd_addr_f(addr_lo));
158
159         mem_wr32(inst_ptr, ram_in_ramfc_w() + ram_fc_userd_hi_w(),
160                  pbdma_userd_target_vid_mem_f() |
161                  pbdma_userd_hi_addr_f(addr_hi));
162
163         gk20a_mm_l2_invalidate(c->g);
164
165         return 0;
166 }
167
168 static int channel_gk20a_set_schedule_params(struct channel_gk20a *c,
169                                 u32 timeslice_timeout)
170 {
171         void *inst_ptr;
172         int shift = 3;
173         int value = timeslice_timeout;
174
175         inst_ptr = c->inst_block.cpuva;
176         if (!inst_ptr)
177                 return -ENOMEM;
178
179         /* disable channel */
180         gk20a_writel(c->g, ccsr_channel_r(c->hw_chid),
181                 gk20a_readl(c->g, ccsr_channel_r(c->hw_chid)) |
182                 ccsr_channel_enable_clr_true_f());
183
184         /* preempt the channel */
185         WARN_ON(gk20a_fifo_preempt_channel(c->g, c->hw_chid));
186
187         /* flush GPU cache */
188         gk20a_mm_l2_flush(c->g, true);
189
190         /* value field is 8 bits long */
191         while (value >= 1 << 8) {
192                 value >>= 1;
193                 shift++;
194         }
195
196         /* time slice register is only 18bits long */
197         if ((value << shift) >= 1<<19) {
198                 pr_err("Requested timeslice value is clamped to 18 bits\n");
199                 value = 255;
200                 shift = 10;
201         }
202
203         /* set new timeslice */
204         mem_wr32(inst_ptr, ram_fc_eng_timeslice_w(),
205                 value | (shift << 12) |
206                 fifo_eng_timeslice_enable_true_f());
207
208         /* enable channel */
209         gk20a_writel(c->g, ccsr_channel_r(c->hw_chid),
210                 gk20a_readl(c->g, ccsr_channel_r(c->hw_chid)) |
211                 ccsr_channel_enable_set_true_f());
212
213         gk20a_mm_l2_invalidate(c->g);
214
215         return 0;
216 }
217
218 static int channel_gk20a_setup_ramfc(struct channel_gk20a *c,
219                                 u64 gpfifo_base, u32 gpfifo_entries)
220 {
221         void *inst_ptr;
222
223         nvhost_dbg_fn("");
224
225         inst_ptr = c->inst_block.cpuva;
226         if (!inst_ptr)
227                 return -ENOMEM;
228
229         memset(inst_ptr, 0, ram_fc_size_val_v());
230
231         mem_wr32(inst_ptr, ram_fc_gp_base_w(),
232                 pbdma_gp_base_offset_f(
233                 u64_lo32(gpfifo_base >> pbdma_gp_base_rsvd_s())));
234
235         mem_wr32(inst_ptr, ram_fc_gp_base_hi_w(),
236                 pbdma_gp_base_hi_offset_f(u64_hi32(gpfifo_base)) |
237                 pbdma_gp_base_hi_limit2_f(ilog2(gpfifo_entries)));
238
239         mem_wr32(inst_ptr, ram_fc_signature_w(),
240                  pbdma_signature_hw_valid_f() | pbdma_signature_sw_zero_f());
241
242         mem_wr32(inst_ptr, ram_fc_formats_w(),
243                 pbdma_formats_gp_fermi0_f() |
244                 pbdma_formats_pb_fermi1_f() |
245                 pbdma_formats_mp_fermi0_f());
246
247         mem_wr32(inst_ptr, ram_fc_pb_header_w(),
248                 pbdma_pb_header_priv_user_f() |
249                 pbdma_pb_header_method_zero_f() |
250                 pbdma_pb_header_subchannel_zero_f() |
251                 pbdma_pb_header_level_main_f() |
252                 pbdma_pb_header_first_true_f() |
253                 pbdma_pb_header_type_inc_f());
254
255         mem_wr32(inst_ptr, ram_fc_subdevice_w(),
256                 pbdma_subdevice_id_f(1) |
257                 pbdma_subdevice_status_active_f() |
258                 pbdma_subdevice_channel_dma_enable_f());
259
260         mem_wr32(inst_ptr, ram_fc_target_w(), pbdma_target_engine_sw_f());
261
262         mem_wr32(inst_ptr, ram_fc_acquire_w(),
263                 pbdma_acquire_retry_man_2_f() |
264                 pbdma_acquire_retry_exp_2_f() |
265                 pbdma_acquire_timeout_exp_max_f() |
266                 pbdma_acquire_timeout_man_max_f() |
267                 pbdma_acquire_timeout_en_disable_f());
268
269         mem_wr32(inst_ptr, ram_fc_eng_timeslice_w(),
270                 fifo_eng_timeslice_timeout_128_f() |
271                 fifo_eng_timeslice_timescale_3_f() |
272                 fifo_eng_timeslice_enable_true_f());
273
274         mem_wr32(inst_ptr, ram_fc_pb_timeslice_w(),
275                 fifo_pb_timeslice_timeout_16_f() |
276                 fifo_pb_timeslice_timescale_0_f() |
277                 fifo_pb_timeslice_enable_true_f());
278
279         mem_wr32(inst_ptr, ram_fc_chid_w(), ram_fc_chid_id_f(c->hw_chid));
280
281         /* TBD: alwasy priv mode? */
282         mem_wr32(inst_ptr, ram_fc_hce_ctrl_w(),
283                  pbdma_hce_ctrl_hce_priv_mode_yes_f());
284
285         gk20a_mm_l2_invalidate(c->g);
286
287         return 0;
288 }
289
290 static int channel_gk20a_setup_userd(struct channel_gk20a *c)
291 {
292         BUG_ON(!c->userd_cpu_va);
293
294         nvhost_dbg_fn("");
295
296         mem_wr32(c->userd_cpu_va, ram_userd_put_w(), 0);
297         mem_wr32(c->userd_cpu_va, ram_userd_get_w(), 0);
298         mem_wr32(c->userd_cpu_va, ram_userd_ref_w(), 0);
299         mem_wr32(c->userd_cpu_va, ram_userd_put_hi_w(), 0);
300         mem_wr32(c->userd_cpu_va, ram_userd_ref_threshold_w(), 0);
301         mem_wr32(c->userd_cpu_va, ram_userd_gp_top_level_get_w(), 0);
302         mem_wr32(c->userd_cpu_va, ram_userd_gp_top_level_get_hi_w(), 0);
303         mem_wr32(c->userd_cpu_va, ram_userd_get_hi_w(), 0);
304         mem_wr32(c->userd_cpu_va, ram_userd_gp_get_w(), 0);
305         mem_wr32(c->userd_cpu_va, ram_userd_gp_put_w(), 0);
306
307         gk20a_mm_l2_invalidate(c->g);
308
309         return 0;
310 }
311
312 static void channel_gk20a_bind(struct channel_gk20a *ch_gk20a)
313 {
314         struct gk20a *g = get_gk20a(ch_gk20a->ch->dev);
315         struct fifo_gk20a *f = &g->fifo;
316         struct fifo_engine_info_gk20a *engine_info =
317                 f->engine_info + ENGINE_GR_GK20A;
318
319         u32 inst_ptr = ch_gk20a->inst_block.cpu_pa
320                 >> ram_in_base_shift_v();
321
322         nvhost_dbg_info("bind channel %d inst ptr 0x%08x",
323                 ch_gk20a->hw_chid, inst_ptr);
324
325         ch_gk20a->bound = true;
326
327         gk20a_writel(g, ccsr_channel_r(ch_gk20a->hw_chid),
328                 (gk20a_readl(g, ccsr_channel_r(ch_gk20a->hw_chid)) &
329                  ~ccsr_channel_runlist_f(~0)) |
330                  ccsr_channel_runlist_f(engine_info->runlist_id));
331
332         gk20a_writel(g, ccsr_channel_inst_r(ch_gk20a->hw_chid),
333                 ccsr_channel_inst_ptr_f(inst_ptr) |
334                 ccsr_channel_inst_target_vid_mem_f() |
335                 ccsr_channel_inst_bind_true_f());
336
337         gk20a_writel(g, ccsr_channel_r(ch_gk20a->hw_chid),
338                 (gk20a_readl(g, ccsr_channel_r(ch_gk20a->hw_chid)) &
339                  ~ccsr_channel_enable_set_f(~0)) |
340                  ccsr_channel_enable_set_true_f());
341 }
342
343 static void channel_gk20a_unbind(struct channel_gk20a *ch_gk20a)
344 {
345         struct gk20a *g = get_gk20a(ch_gk20a->ch->dev);
346
347         nvhost_dbg_fn("");
348
349         if (ch_gk20a->bound)
350                 gk20a_writel(g, ccsr_channel_inst_r(ch_gk20a->hw_chid),
351                         ccsr_channel_inst_ptr_f(0) |
352                         ccsr_channel_inst_bind_false_f());
353
354         ch_gk20a->bound = false;
355 }
356
357 static int channel_gk20a_alloc_inst(struct gk20a *g,
358                                 struct channel_gk20a *ch)
359 {
360         struct device *d = dev_from_gk20a(g);
361         int err = 0;
362
363         nvhost_dbg_fn("");
364
365         ch->inst_block.size = ram_in_alloc_size_v();
366         ch->inst_block.cpuva = dma_alloc_coherent(d,
367                                         ch->inst_block.size,
368                                         &ch->inst_block.iova,
369                                         GFP_KERNEL);
370         if (!ch->inst_block.cpuva) {
371                 nvhost_err(d, "%s: memory allocation failed\n", __func__);
372                 err = -ENOMEM;
373                 goto clean_up;
374         }
375
376         ch->inst_block.cpu_pa = gk20a_get_phys_from_iova(d,
377                                                         ch->inst_block.iova);
378         if (!ch->inst_block.cpu_pa) {
379                 nvhost_err(d, "%s: failed to get physical address\n", __func__);
380                 err = -ENOMEM;
381                 goto clean_up;
382         }
383
384         nvhost_dbg_info("channel %d inst block physical addr: 0x%16llx",
385                 ch->hw_chid, ch->inst_block.cpu_pa);
386
387         nvhost_dbg_fn("done");
388         return 0;
389
390 clean_up:
391         nvhost_err(d, "fail");
392         channel_gk20a_free_inst(g, ch);
393         return err;
394 }
395
396 static void channel_gk20a_free_inst(struct gk20a *g,
397                                 struct channel_gk20a *ch)
398 {
399         struct device *d = dev_from_gk20a(g);
400
401         if (ch->inst_block.cpuva)
402                 dma_free_coherent(d, ch->inst_block.size,
403                                 ch->inst_block.cpuva, ch->inst_block.iova);
404         ch->inst_block.cpuva = NULL;
405         ch->inst_block.iova = 0;
406         memset(&ch->inst_block, 0, sizeof(struct inst_desc));
407 }
408
409 static int channel_gk20a_update_runlist(struct channel_gk20a *c, bool add)
410 {
411         return gk20a_fifo_update_runlist(c->g, 0, c->hw_chid, add, true);
412 }
413
414 void gk20a_disable_channel_no_update(struct channel_gk20a *ch)
415 {
416         struct nvhost_device_data *pdata = nvhost_get_devdata(ch->g->dev);
417         struct nvhost_master *host = host_from_gk20a_channel(ch);
418
419         /* ensure no fences are pending */
420         nvhost_syncpt_set_min_eq_max(&host->syncpt,
421                                      ch->hw_chid + pdata->syncpt_base);
422
423         /* disable channel */
424         gk20a_writel(ch->g, ccsr_channel_r(ch->hw_chid),
425                      gk20a_readl(ch->g,
426                      ccsr_channel_r(ch->hw_chid)) |
427                      ccsr_channel_enable_clr_true_f());
428 }
429
430 static int gk20a_wait_channel_idle(struct channel_gk20a *ch)
431 {
432         bool channel_idle = false;
433         unsigned long end_jiffies = jiffies +
434                 msecs_to_jiffies(gk20a_get_gr_idle_timeout(ch->g));
435
436         do {
437                 mutex_lock(&ch->jobs_lock);
438                 channel_idle = list_empty(&ch->jobs);
439                 mutex_unlock(&ch->jobs_lock);
440                 if (channel_idle)
441                         break;
442
443                 usleep_range(1000, 3000);
444         } while (time_before(jiffies, end_jiffies));
445
446         if (!channel_idle)
447                 nvhost_err(dev_from_gk20a(ch->g), "channel jobs not freed");
448
449         return 0;
450 }
451
452 void gk20a_disable_channel(struct channel_gk20a *ch,
453                            bool finish,
454                            unsigned long finish_timeout)
455 {
456         if (finish) {
457                 int err = gk20a_channel_finish(ch, finish_timeout);
458                 WARN_ON(err);
459         }
460
461         /* disable the channel from hw and increment syncpoints */
462         gk20a_disable_channel_no_update(ch);
463
464         gk20a_wait_channel_idle(ch);
465
466         /* preempt the channel */
467         gk20a_fifo_preempt_channel(ch->g, ch->hw_chid);
468
469         /* remove channel from runlist */
470         channel_gk20a_update_runlist(ch, false);
471 }
472
473 #if defined(CONFIG_TEGRA_GPU_CYCLE_STATS)
474
475 static void gk20a_free_cycle_stats_buffer(struct channel_gk20a *ch)
476 {
477         struct mem_mgr *memmgr = gk20a_channel_mem_mgr(ch);
478         /* disable existing cyclestats buffer */
479         mutex_lock(&ch->cyclestate.cyclestate_buffer_mutex);
480         if (ch->cyclestate.cyclestate_buffer_handler) {
481                 nvhost_memmgr_munmap(ch->cyclestate.cyclestate_buffer_handler,
482                                 ch->cyclestate.cyclestate_buffer);
483                 nvhost_memmgr_put(memmgr,
484                                 ch->cyclestate.cyclestate_buffer_handler);
485                 ch->cyclestate.cyclestate_buffer_handler = NULL;
486                 ch->cyclestate.cyclestate_buffer = NULL;
487                 ch->cyclestate.cyclestate_buffer_size = 0;
488         }
489         mutex_unlock(&ch->cyclestate.cyclestate_buffer_mutex);
490 }
491
492 int gk20a_channel_cycle_stats(struct channel_gk20a *ch,
493                        struct nvhost_cycle_stats_args *args)
494 {
495         struct mem_mgr *memmgr = gk20a_channel_mem_mgr(ch);
496         struct mem_handle *handle_ref;
497         void *virtual_address;
498         u64 cyclestate_buffer_size;
499         struct platform_device *dev = ch->ch->dev;
500
501         if (args->nvmap_handle && !ch->cyclestate.cyclestate_buffer_handler) {
502
503                 /* set up new cyclestats buffer */
504                 handle_ref = nvhost_memmgr_get(memmgr,
505                                 args->nvmap_handle, dev);
506                 if (IS_ERR(handle_ref))
507                         return PTR_ERR(handle_ref);
508                 virtual_address = nvhost_memmgr_mmap(handle_ref);
509                 if (!virtual_address)
510                         return -ENOMEM;
511
512                 nvhost_memmgr_get_param(memmgr, handle_ref,
513                                         NVMAP_HANDLE_PARAM_SIZE,
514                                         &cyclestate_buffer_size);
515
516                 ch->cyclestate.cyclestate_buffer_handler = handle_ref;
517                 ch->cyclestate.cyclestate_buffer = virtual_address;
518                 ch->cyclestate.cyclestate_buffer_size = cyclestate_buffer_size;
519                 return 0;
520
521         } else if (!args->nvmap_handle &&
522                         ch->cyclestate.cyclestate_buffer_handler) {
523                 gk20a_free_cycle_stats_buffer(ch);
524                 return 0;
525
526         } else if (!args->nvmap_handle &&
527                         !ch->cyclestate.cyclestate_buffer_handler) {
528                 /* no requst from GL */
529                 return 0;
530
531         } else {
532                 pr_err("channel already has cyclestats buffer\n");
533                 return -EINVAL;
534         }
535 }
536 #endif
537
538 int gk20a_init_error_notifier(struct nvhost_hwctx *ctx,
539                 u32 memhandle, u64 offset) {
540         struct channel_gk20a *ch = ctx->priv;
541         struct platform_device *dev = ch->ch->dev;
542         void *va;
543
544         struct mem_mgr *memmgr;
545         struct mem_handle *handle_ref;
546
547         if (!memhandle) {
548                 pr_err("gk20a_init_error_notifier: invalid memory handle\n");
549                 return -EINVAL;
550         }
551
552         memmgr = gk20a_channel_mem_mgr(ch);
553         handle_ref = nvhost_memmgr_get(memmgr, memhandle, dev);
554
555         if (ctx->error_notifier_ref)
556                 gk20a_free_error_notifiers(ctx);
557
558         if (IS_ERR(handle_ref)) {
559                 pr_err("Invalid handle: %d\n", memhandle);
560                 return -EINVAL;
561         }
562         /* map handle */
563         va = nvhost_memmgr_mmap(handle_ref);
564         if (!va) {
565                 nvhost_memmgr_put(memmgr, handle_ref);
566                 pr_err("Cannot map notifier handle\n");
567                 return -ENOMEM;
568         }
569
570         /* set hwctx notifiers pointer */
571         ctx->error_notifier_ref = handle_ref;
572         ctx->error_notifier = va + offset;
573         ctx->error_notifier_va = va;
574         return 0;
575 }
576
577 void gk20a_set_error_notifier(struct nvhost_hwctx *ctx, __u32 error)
578 {
579         if (ctx->error_notifier_ref) {
580                 struct timespec time_data;
581                 u64 nsec;
582                 getnstimeofday(&time_data);
583                 nsec = ((u64)time_data.tv_sec) * 1000000000u +
584                                 (u64)time_data.tv_nsec;
585                 ctx->error_notifier->time_stamp.nanoseconds[0] =
586                                 (u32)nsec;
587                 ctx->error_notifier->time_stamp.nanoseconds[1] =
588                                 (u32)(nsec >> 32);
589                 ctx->error_notifier->info32 = error;
590                 ctx->error_notifier->status = 0xffff;
591                 nvhost_err(&ctx->channel->dev->dev,
592                                 "error notifier set to %d\n", error);
593         }
594 }
595
596 void gk20a_free_error_notifiers(struct nvhost_hwctx *ctx)
597 {
598         if (ctx->error_notifier_ref) {
599                 struct channel_gk20a *ch = ctx->priv;
600                 struct mem_mgr *memmgr = gk20a_channel_mem_mgr(ch);
601                 nvhost_memmgr_munmap(ctx->error_notifier_ref,
602                                 ctx->error_notifier_va);
603                 nvhost_memmgr_put(memmgr, ctx->error_notifier_ref);
604                 ctx->error_notifier_ref = 0;
605         }
606 }
607
608 void gk20a_free_channel(struct nvhost_hwctx *ctx, bool finish)
609 {
610         struct channel_gk20a *ch = ctx->priv;
611         struct gk20a *g = ch->g;
612         struct device *d = dev_from_gk20a(g);
613         struct fifo_gk20a *f = &g->fifo;
614         struct gr_gk20a *gr = &g->gr;
615         struct vm_gk20a *ch_vm = ch->vm;
616         unsigned long timeout = gk20a_get_gr_idle_timeout(g);
617         struct dbg_session_gk20a *dbg_s;
618
619         nvhost_dbg_fn("");
620
621         /* if engine reset was deferred, perform it now */
622         mutex_lock(&f->deferred_reset_mutex);
623         if (g->fifo.deferred_reset_pending) {
624                 nvhost_dbg(dbg_intr | dbg_gpu_dbg, "engine reset was"
625                            " deferred, running now");
626                 fifo_gk20a_finish_mmu_fault_handling(g, g->fifo.mmu_fault_engines);
627                 g->fifo.deferred_reset_pending = false;
628         }
629         mutex_unlock(&f->deferred_reset_mutex);
630
631         if (!ch->bound)
632                 return;
633
634         if (!gk20a_channel_as_bound(ch))
635                 goto unbind;
636
637         nvhost_dbg_info("freeing bound channel context, timeout=%ld",
638                         timeout);
639
640         gk20a_disable_channel(ch, finish, timeout);
641
642         gk20a_free_error_notifiers(ctx);
643
644         /* release channel ctx */
645         gk20a_free_channel_ctx(ch);
646
647         gk20a_gr_flush_channel_tlb(gr);
648
649         memset(&ch->ramfc, 0, sizeof(struct mem_desc_sub));
650
651         /* free gpfifo */
652         if (ch->gpfifo.gpu_va)
653                 gk20a_gmmu_unmap(ch_vm, ch->gpfifo.gpu_va,
654                         ch->gpfifo.size, mem_flag_none);
655         if (ch->gpfifo.cpu_va)
656                 dma_free_coherent(d, ch->gpfifo.size,
657                         ch->gpfifo.cpu_va, ch->gpfifo.iova);
658         ch->gpfifo.cpu_va = NULL;
659         ch->gpfifo.iova = 0;
660
661         gk20a_mm_l2_invalidate(ch->g);
662
663         memset(&ch->gpfifo, 0, sizeof(struct gpfifo_desc));
664
665 #if defined(CONFIG_TEGRA_GPU_CYCLE_STATS)
666         gk20a_free_cycle_stats_buffer(ch);
667 #endif
668
669         ctx->priv = NULL;
670         channel_gk20a_free_priv_cmdbuf(ch);
671
672         /* release hwctx binding to the as_share */
673         nvhost_as_release_share(ch_vm->as_share, ctx);
674
675 unbind:
676         channel_gk20a_unbind(ch);
677         channel_gk20a_free_inst(g, ch);
678
679         ch->vpr = false;
680
681         /* unlink all debug sessions */
682         mutex_lock(&ch->dbg_s_lock);
683
684         list_for_each_entry(dbg_s, &ch->dbg_s_list, dbg_s_list_node) {
685                 dbg_s->ch = NULL;
686                 list_del_init(&dbg_s->dbg_s_list_node);
687         }
688
689         mutex_unlock(&ch->dbg_s_lock);
690
691         /* ALWAYS last */
692         release_used_channel(f, ch);
693 }
694
695 struct nvhost_hwctx *gk20a_open_channel(struct nvhost_channel *ch,
696                                          struct nvhost_hwctx *ctx)
697 {
698         struct gk20a *g = get_gk20a(ch->dev);
699         struct fifo_gk20a *f = &g->fifo;
700         struct channel_gk20a *ch_gk20a;
701
702         ch_gk20a = acquire_unused_channel(f);
703         if (ch_gk20a == NULL) {
704                 /* TBD: we want to make this virtualizable */
705                 nvhost_err(dev_from_gk20a(g), "out of hw chids");
706                 return 0;
707         }
708
709         ctx->priv = ch_gk20a;
710         ch_gk20a->g = g;
711         /* note the ch here is the same for *EVERY* gk20a channel */
712         ch_gk20a->ch = ch;
713         /* but thre's one hwctx per gk20a channel */
714         ch_gk20a->hwctx = ctx;
715
716         if (channel_gk20a_alloc_inst(g, ch_gk20a)) {
717                 ch_gk20a->in_use = false;
718                 ctx->priv = 0;
719                 nvhost_err(dev_from_gk20a(g),
720                            "failed to open gk20a channel, out of inst mem");
721
722                 return 0;
723         }
724         channel_gk20a_bind(ch_gk20a);
725         ch_gk20a->pid = current->pid;
726
727         /* The channel is *not* runnable at this point. It still needs to have
728          * an address space bound and allocate a gpfifo and grctx. */
729
730
731         init_waitqueue_head(&ch_gk20a->notifier_wq);
732         init_waitqueue_head(&ch_gk20a->semaphore_wq);
733         init_waitqueue_head(&ch_gk20a->submit_wq);
734
735         return ctx;
736 }
737
738 #if 0
739 /* move to debug_gk20a.c ... */
740 static void dump_gpfifo(struct channel_gk20a *c)
741 {
742         void *inst_ptr;
743         u32 chid = c->hw_chid;
744
745         nvhost_dbg_fn("");
746
747         inst_ptr = nvhost_memmgr_mmap(c->inst_block.mem.ref);
748         if (!inst_ptr)
749                 return;
750
751         nvhost_dbg_info("ramfc for channel %d:\n"
752                 "ramfc: gp_base 0x%08x, gp_base_hi 0x%08x, "
753                 "gp_fetch 0x%08x, gp_get 0x%08x, gp_put 0x%08x, "
754                 "pb_fetch 0x%08x, pb_fetch_hi 0x%08x, "
755                 "pb_get 0x%08x, pb_get_hi 0x%08x, "
756                 "pb_put 0x%08x, pb_put_hi 0x%08x\n"
757                 "userd: gp_put 0x%08x, gp_get 0x%08x, "
758                 "get 0x%08x, get_hi 0x%08x, "
759                 "put 0x%08x, put_hi 0x%08x\n"
760                 "pbdma: status 0x%08x, channel 0x%08x, userd 0x%08x, "
761                 "gp_base 0x%08x, gp_base_hi 0x%08x, "
762                 "gp_fetch 0x%08x, gp_get 0x%08x, gp_put 0x%08x, "
763                 "pb_fetch 0x%08x, pb_fetch_hi 0x%08x, "
764                 "get 0x%08x, get_hi 0x%08x, put 0x%08x, put_hi 0x%08x\n"
765                 "channel: ccsr_channel 0x%08x",
766                 chid,
767                 mem_rd32(inst_ptr, ram_fc_gp_base_w()),
768                 mem_rd32(inst_ptr, ram_fc_gp_base_hi_w()),
769                 mem_rd32(inst_ptr, ram_fc_gp_fetch_w()),
770                 mem_rd32(inst_ptr, ram_fc_gp_get_w()),
771                 mem_rd32(inst_ptr, ram_fc_gp_put_w()),
772                 mem_rd32(inst_ptr, ram_fc_pb_fetch_w()),
773                 mem_rd32(inst_ptr, ram_fc_pb_fetch_hi_w()),
774                 mem_rd32(inst_ptr, ram_fc_pb_get_w()),
775                 mem_rd32(inst_ptr, ram_fc_pb_get_hi_w()),
776                 mem_rd32(inst_ptr, ram_fc_pb_put_w()),
777                 mem_rd32(inst_ptr, ram_fc_pb_put_hi_w()),
778                 mem_rd32(c->userd_cpu_va, ram_userd_gp_put_w()),
779                 mem_rd32(c->userd_cpu_va, ram_userd_gp_get_w()),
780                 mem_rd32(c->userd_cpu_va, ram_userd_get_w()),
781                 mem_rd32(c->userd_cpu_va, ram_userd_get_hi_w()),
782                 mem_rd32(c->userd_cpu_va, ram_userd_put_w()),
783                 mem_rd32(c->userd_cpu_va, ram_userd_put_hi_w()),
784                 gk20a_readl(c->g, pbdma_status_r(0)),
785                 gk20a_readl(c->g, pbdma_channel_r(0)),
786                 gk20a_readl(c->g, pbdma_userd_r(0)),
787                 gk20a_readl(c->g, pbdma_gp_base_r(0)),
788                 gk20a_readl(c->g, pbdma_gp_base_hi_r(0)),
789                 gk20a_readl(c->g, pbdma_gp_fetch_r(0)),
790                 gk20a_readl(c->g, pbdma_gp_get_r(0)),
791                 gk20a_readl(c->g, pbdma_gp_put_r(0)),
792                 gk20a_readl(c->g, pbdma_pb_fetch_r(0)),
793                 gk20a_readl(c->g, pbdma_pb_fetch_hi_r(0)),
794                 gk20a_readl(c->g, pbdma_get_r(0)),
795                 gk20a_readl(c->g, pbdma_get_hi_r(0)),
796                 gk20a_readl(c->g, pbdma_put_r(0)),
797                 gk20a_readl(c->g, pbdma_put_hi_r(0)),
798                 gk20a_readl(c->g, ccsr_channel_r(chid)));
799
800         nvhost_memmgr_munmap(c->inst_block.mem.ref, inst_ptr);
801         gk20a_mm_l2_invalidate(c->g);
802 }
803 #endif
804
805 /* allocate private cmd buffer.
806    used for inserting commands before/after user submitted buffers. */
807 static int channel_gk20a_alloc_priv_cmdbuf(struct channel_gk20a *c)
808 {
809         struct device *d = dev_from_gk20a(c->g);
810         struct vm_gk20a *ch_vm = c->vm;
811         struct priv_cmd_queue *q = &c->priv_cmd_q;
812         struct priv_cmd_entry *e;
813         u32 i = 0, size;
814         int err = 0;
815         struct sg_table *sgt;
816
817         /* Kernel can insert gpfifos before and after user gpfifos.
818            Before user gpfifos, kernel inserts fence_wait, which takes
819            syncpoint_a (2 dwords) + syncpoint_b (2 dwords) = 4 dwords.
820            After user gpfifos, kernel inserts fence_get, which takes
821            wfi (2 dwords) + syncpoint_a (2 dwords) + syncpoint_b (2 dwords)
822            = 6 dwords.
823            Worse case if kernel adds both of them for every user gpfifo,
824            max size of priv_cmdbuf is :
825            (gpfifo entry number * (2 / 3) * (4 + 6) * 4 bytes */
826         size = roundup_pow_of_two(
827                 c->gpfifo.entry_num * 2 * 10 * sizeof(u32) / 3);
828
829         q->mem.base_cpuva = dma_alloc_coherent(d, size,
830                                         &q->mem.base_iova,
831                                         GFP_KERNEL);
832         if (!q->mem.base_cpuva) {
833                 nvhost_err(d, "%s: memory allocation failed\n", __func__);
834                 err = -ENOMEM;
835                 goto clean_up;
836         }
837
838         q->mem.size = size;
839
840         err = gk20a_get_sgtable(d, &sgt,
841                         q->mem.base_cpuva, q->mem.base_iova, size);
842         if (err) {
843                 nvhost_err(d, "%s: failed to create sg table\n", __func__);
844                 goto clean_up;
845         }
846
847         memset(q->mem.base_cpuva, 0, size);
848
849         q->base_gpuva = gk20a_gmmu_map(ch_vm, &sgt,
850                                         size,
851                                         0, /* flags */
852                                         mem_flag_none);
853         if (!q->base_gpuva) {
854                 nvhost_err(d, "ch %d : failed to map gpu va"
855                            "for priv cmd buffer", c->hw_chid);
856                 err = -ENOMEM;
857                 goto clean_up_sgt;
858         }
859
860         q->size = q->mem.size / sizeof (u32);
861
862         INIT_LIST_HEAD(&q->head);
863         INIT_LIST_HEAD(&q->free);
864
865         /* pre-alloc 25% of priv cmdbuf entries and put them on free list */
866         for (i = 0; i < q->size / 4; i++) {
867                 e = kzalloc(sizeof(struct priv_cmd_entry), GFP_KERNEL);
868                 if (!e) {
869                         nvhost_err(d, "ch %d: fail to pre-alloc cmd entry",
870                                 c->hw_chid);
871                         err = -ENOMEM;
872                         goto clean_up_sgt;
873                 }
874                 e->pre_alloc = true;
875                 list_add(&e->list, &q->free);
876         }
877
878         gk20a_free_sgtable(&sgt);
879
880         return 0;
881
882 clean_up_sgt:
883         gk20a_free_sgtable(&sgt);
884 clean_up:
885         channel_gk20a_free_priv_cmdbuf(c);
886         return err;
887 }
888
889 static void channel_gk20a_free_priv_cmdbuf(struct channel_gk20a *c)
890 {
891         struct device *d = dev_from_gk20a(c->g);
892         struct vm_gk20a *ch_vm = c->vm;
893         struct priv_cmd_queue *q = &c->priv_cmd_q;
894         struct priv_cmd_entry *e;
895         struct list_head *pos, *tmp, *head;
896
897         if (q->size == 0)
898                 return;
899
900         if (q->base_gpuva)
901                 gk20a_gmmu_unmap(ch_vm, q->base_gpuva,
902                                 q->mem.size, mem_flag_none);
903         if (q->mem.base_cpuva)
904                 dma_free_coherent(d, q->mem.size,
905                         q->mem.base_cpuva, q->mem.base_iova);
906         q->mem.base_cpuva = NULL;
907         q->mem.base_iova = 0;
908
909         /* free used list */
910         head = &q->head;
911         list_for_each_safe(pos, tmp, head) {
912                 e = container_of(pos, struct priv_cmd_entry, list);
913                 free_priv_cmdbuf(c, e);
914         }
915
916         /* free free list */
917         head = &q->free;
918         list_for_each_safe(pos, tmp, head) {
919                 e = container_of(pos, struct priv_cmd_entry, list);
920                 e->pre_alloc = false;
921                 free_priv_cmdbuf(c, e);
922         }
923
924         memset(q, 0, sizeof(struct priv_cmd_queue));
925 }
926
927 /* allocate a cmd buffer with given size. size is number of u32 entries */
928 static int alloc_priv_cmdbuf(struct channel_gk20a *c, u32 orig_size,
929                              struct priv_cmd_entry **entry)
930 {
931         struct priv_cmd_queue *q = &c->priv_cmd_q;
932         struct priv_cmd_entry *e;
933         struct list_head *node;
934         u32 free_count;
935         u32 size = orig_size;
936         bool no_retry = false;
937
938         nvhost_dbg_fn("size %d", orig_size);
939
940         *entry = NULL;
941
942         /* if free space in the end is less than requested, increase the size
943          * to make the real allocated space start from beginning. */
944         if (q->put + size > q->size)
945                 size = orig_size + (q->size - q->put);
946
947         nvhost_dbg_info("ch %d: priv cmd queue get:put %d:%d",
948                         c->hw_chid, q->get, q->put);
949
950 TRY_AGAIN:
951         free_count = (q->size - (q->put - q->get) - 1) % q->size;
952
953         if (size > free_count) {
954                 if (!no_retry) {
955                         recycle_priv_cmdbuf(c);
956                         no_retry = true;
957                         goto TRY_AGAIN;
958                 } else
959                         return -EAGAIN;
960         }
961
962         if (unlikely(list_empty(&q->free))) {
963
964                 nvhost_dbg_info("ch %d: run out of pre-alloc entries",
965                         c->hw_chid);
966
967                 e = kzalloc(sizeof(struct priv_cmd_entry), GFP_KERNEL);
968                 if (!e) {
969                         nvhost_err(dev_from_gk20a(c->g),
970                                 "ch %d: fail to allocate priv cmd entry",
971                                 c->hw_chid);
972                         return -ENOMEM;
973                 }
974         } else  {
975                 node = q->free.next;
976                 list_del(node);
977                 e = container_of(node, struct priv_cmd_entry, list);
978         }
979
980         e->size = orig_size;
981         e->gp_get = c->gpfifo.get;
982         e->gp_put = c->gpfifo.put;
983         e->gp_wrap = c->gpfifo.wrap;
984
985         /* if we have increased size to skip free space in the end, set put
986            to beginning of cmd buffer (0) + size */
987         if (size != orig_size) {
988                 e->ptr = q->mem.base_cpuva;
989                 e->gva = q->base_gpuva;
990                 q->put = orig_size;
991         } else {
992                 e->ptr = q->mem.base_cpuva + q->put;
993                 e->gva = q->base_gpuva + q->put * sizeof(u32);
994                 q->put = (q->put + orig_size) & (q->size - 1);
995         }
996
997         /* we already handled q->put + size > q->size so BUG_ON this */
998         BUG_ON(q->put > q->size);
999
1000         /* add new entry to head since we free from head */
1001         list_add(&e->list, &q->head);
1002
1003         *entry = e;
1004
1005         nvhost_dbg_fn("done");
1006
1007         return 0;
1008 }
1009
1010 /* Don't call this to free an explict cmd entry.
1011  * It doesn't update priv_cmd_queue get/put */
1012 static void free_priv_cmdbuf(struct channel_gk20a *c,
1013                              struct priv_cmd_entry *e)
1014 {
1015         struct priv_cmd_queue *q = &c->priv_cmd_q;
1016
1017         if (!e)
1018                 return;
1019
1020         list_del(&e->list);
1021
1022         if (unlikely(!e->pre_alloc))
1023                 kfree(e);
1024         else {
1025                 memset(e, 0, sizeof(struct priv_cmd_entry));
1026                 e->pre_alloc = true;
1027                 list_add(&e->list, &q->free);
1028         }
1029 }
1030
1031 /* free entries if they're no longer being used */
1032 static void recycle_priv_cmdbuf(struct channel_gk20a *c)
1033 {
1034         struct priv_cmd_queue *q = &c->priv_cmd_q;
1035         struct priv_cmd_entry *e, *tmp;
1036         struct list_head *head = &q->head;
1037         bool wrap_around, found = false;
1038
1039         nvhost_dbg_fn("");
1040
1041         /* Find the most recent free entry. Free it and everything before it */
1042         list_for_each_entry(e, head, list) {
1043
1044                 nvhost_dbg_info("ch %d: cmd entry get:put:wrap %d:%d:%d "
1045                         "curr get:put:wrap %d:%d:%d",
1046                         c->hw_chid, e->gp_get, e->gp_put, e->gp_wrap,
1047                         c->gpfifo.get, c->gpfifo.put, c->gpfifo.wrap);
1048
1049                 wrap_around = (c->gpfifo.wrap != e->gp_wrap);
1050                 if (e->gp_get < e->gp_put) {
1051                         if (c->gpfifo.get >= e->gp_put ||
1052                             wrap_around) {
1053                                 found = true;
1054                                 break;
1055                         } else
1056                                 e->gp_get = c->gpfifo.get;
1057                 } else if (e->gp_get > e->gp_put) {
1058                         if (wrap_around &&
1059                             c->gpfifo.get >= e->gp_put) {
1060                                 found = true;
1061                                 break;
1062                         } else
1063                                 e->gp_get = c->gpfifo.get;
1064                 }
1065         }
1066
1067         if (found)
1068                 q->get = (e->ptr - q->mem.base_cpuva) + e->size;
1069         else {
1070                 nvhost_dbg_info("no free entry recycled");
1071                 return;
1072         }
1073
1074         list_for_each_entry_safe_continue(e, tmp, head, list) {
1075                 free_priv_cmdbuf(c, e);
1076         }
1077
1078         nvhost_dbg_fn("done");
1079 }
1080
1081
1082 int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c,
1083                                struct nvhost_alloc_gpfifo_args *args)
1084 {
1085         struct gk20a *g = c->g;
1086         struct nvhost_device_data *pdata = nvhost_get_devdata(g->dev);
1087         struct device *d = dev_from_gk20a(g);
1088         struct vm_gk20a *ch_vm;
1089         u32 gpfifo_size;
1090         int err = 0;
1091         struct sg_table *sgt;
1092
1093         /* Kernel can insert one extra gpfifo entry before user submitted gpfifos
1094            and another one after, for internal usage. Triple the requested size. */
1095         gpfifo_size = roundup_pow_of_two(args->num_entries * 3);
1096
1097         if (args->flags & NVHOST_ALLOC_GPFIFO_FLAGS_VPR_ENABLED)
1098                 c->vpr = true;
1099
1100         /* an address space needs to have been bound at this point.   */
1101         if (!gk20a_channel_as_bound(c)) {
1102                 nvhost_err(d,
1103                             "not bound to an address space at time of gpfifo"
1104                             " allocation.  Attempting to create and bind to"
1105                             " one...");
1106                 return -EINVAL;
1107         }
1108         ch_vm = c->vm;
1109
1110         c->cmds_pending = false;
1111
1112         c->last_submit_fence.valid        = false;
1113         c->last_submit_fence.syncpt_value = 0;
1114         c->last_submit_fence.syncpt_id    = c->hw_chid + pdata->syncpt_base;
1115
1116         c->ramfc.offset = 0;
1117         c->ramfc.size = ram_in_ramfc_s() / 8;
1118
1119         if (c->gpfifo.cpu_va) {
1120                 nvhost_err(d, "channel %d :"
1121                            "gpfifo already allocated", c->hw_chid);
1122                 return -EEXIST;
1123         }
1124
1125         c->gpfifo.size = gpfifo_size * sizeof(struct gpfifo);
1126         c->gpfifo.cpu_va = (struct gpfifo *)dma_alloc_coherent(d,
1127                                                 c->gpfifo.size,
1128                                                 &c->gpfifo.iova,
1129                                                 GFP_KERNEL);
1130         if (!c->gpfifo.cpu_va) {
1131                 nvhost_err(d, "%s: memory allocation failed\n", __func__);
1132                 err = -ENOMEM;
1133                 goto clean_up;
1134         }
1135
1136         c->gpfifo.entry_num = gpfifo_size;
1137
1138         c->gpfifo.get = c->gpfifo.put = 0;
1139
1140         err = gk20a_get_sgtable(d, &sgt,
1141                         c->gpfifo.cpu_va, c->gpfifo.iova, c->gpfifo.size);
1142         if (err) {
1143                 nvhost_err(d, "%s: failed to allocate sg table\n", __func__);
1144                 goto clean_up;
1145         }
1146
1147         c->gpfifo.gpu_va = gk20a_gmmu_map(ch_vm,
1148                                         &sgt,
1149                                         c->gpfifo.size,
1150                                         0, /* flags */
1151                                         mem_flag_none);
1152         if (!c->gpfifo.gpu_va) {
1153                 nvhost_err(d, "channel %d : failed to map"
1154                            " gpu_va for gpfifo", c->hw_chid);
1155                 err = -ENOMEM;
1156                 goto clean_up_sgt;
1157         }
1158
1159         nvhost_dbg_info("channel %d : gpfifo_base 0x%016llx, size %d",
1160                 c->hw_chid, c->gpfifo.gpu_va, c->gpfifo.entry_num);
1161
1162         channel_gk20a_setup_ramfc(c, c->gpfifo.gpu_va, c->gpfifo.entry_num);
1163
1164         channel_gk20a_setup_userd(c);
1165         channel_gk20a_commit_userd(c);
1166
1167         gk20a_mm_l2_invalidate(c->g);
1168
1169         /* TBD: setup engine contexts */
1170
1171         err = channel_gk20a_alloc_priv_cmdbuf(c);
1172         if (err)
1173                 goto clean_up_unmap;
1174
1175         err = channel_gk20a_update_runlist(c, true);
1176         if (err)
1177                 goto clean_up_unmap;
1178
1179         gk20a_free_sgtable(&sgt);
1180
1181         nvhost_dbg_fn("done");
1182         return 0;
1183
1184 clean_up_unmap:
1185         gk20a_gmmu_unmap(ch_vm, c->gpfifo.gpu_va,
1186                 c->gpfifo.size, mem_flag_none);
1187 clean_up_sgt:
1188         gk20a_free_sgtable(&sgt);
1189 clean_up:
1190         dma_free_coherent(d, c->gpfifo.size,
1191                 c->gpfifo.cpu_va, c->gpfifo.iova);
1192         c->gpfifo.cpu_va = NULL;
1193         c->gpfifo.iova = 0;
1194         memset(&c->gpfifo, 0, sizeof(struct gpfifo_desc));
1195         nvhost_err(d, "fail");
1196         return err;
1197 }
1198
1199 static inline int wfi_cmd_size(void)
1200 {
1201         return 2;
1202 }
1203 void add_wfi_cmd(struct priv_cmd_entry *cmd, int *i)
1204 {
1205         /* wfi */
1206         cmd->ptr[(*i)++] = 0x2001001E;
1207         /* handle, ignored */
1208         cmd->ptr[(*i)++] = 0x00000000;
1209 }
1210
1211 static inline bool check_gp_put(struct gk20a *g,
1212                                 struct channel_gk20a *c)
1213 {
1214         u32 put;
1215         /* gp_put changed unexpectedly since last update? */
1216         put = gk20a_bar1_readl(g,
1217                c->userd_gpu_va + 4 * ram_userd_gp_put_w());
1218         if (c->gpfifo.put != put) {
1219                 /*TBD: BUG_ON/teardown on this*/
1220                 nvhost_err(dev_from_gk20a(g), "gp_put changed unexpectedly "
1221                            "since last update");
1222                 c->gpfifo.put = put;
1223                 return false; /* surprise! */
1224         }
1225         return true; /* checked out ok */
1226 }
1227
1228 /* Update with this periodically to determine how the gpfifo is draining. */
1229 static inline u32 update_gp_get(struct gk20a *g,
1230                                 struct channel_gk20a *c)
1231 {
1232         u32 new_get = gk20a_bar1_readl(g,
1233                 c->userd_gpu_va + sizeof(u32) * ram_userd_gp_get_w());
1234         if (new_get < c->gpfifo.get)
1235                 c->gpfifo.wrap = !c->gpfifo.wrap;
1236         c->gpfifo.get = new_get;
1237         return new_get;
1238 }
1239
1240 static inline u32 gp_free_count(struct channel_gk20a *c)
1241 {
1242         return (c->gpfifo.entry_num - (c->gpfifo.put - c->gpfifo.get) - 1) %
1243                 c->gpfifo.entry_num;
1244 }
1245
1246 /* Issue a syncpoint increment *preceded* by a wait-for-idle
1247  * command.  All commands on the channel will have been
1248  * consumed at the time the fence syncpoint increment occurs.
1249  */
1250 int gk20a_channel_submit_wfi_fence(struct gk20a *g,
1251                                    struct channel_gk20a *c,
1252                                    struct nvhost_syncpt *sp,
1253                                    struct nvhost_fence *fence)
1254 {
1255         struct priv_cmd_entry *cmd = NULL;
1256         int cmd_size, j = 0;
1257         u32 free_count;
1258         int err;
1259
1260         cmd_size =  4 + wfi_cmd_size();
1261
1262         update_gp_get(g, c);
1263         free_count = gp_free_count(c);
1264         if (unlikely(!free_count)) {
1265                 nvhost_err(dev_from_gk20a(g),
1266                            "not enough gpfifo space");
1267                 return -EAGAIN;
1268         }
1269
1270         err = alloc_priv_cmdbuf(c, cmd_size, &cmd);
1271         if (unlikely(err)) {
1272                 nvhost_err(dev_from_gk20a(g),
1273                            "not enough priv cmd buffer space");
1274                 return err;
1275         }
1276
1277         fence->value = nvhost_syncpt_incr_max(sp, fence->syncpt_id, 1);
1278
1279         c->last_submit_fence.valid        = true;
1280         c->last_submit_fence.syncpt_value = fence->value;
1281         c->last_submit_fence.syncpt_id    = fence->syncpt_id;
1282         c->last_submit_fence.wfi          = true;
1283
1284         trace_nvhost_ioctl_ctrl_syncpt_incr(fence->syncpt_id);
1285
1286         add_wfi_cmd(cmd, &j);
1287
1288         /* syncpoint_a */
1289         cmd->ptr[j++] = 0x2001001C;
1290         /* payload, ignored */
1291         cmd->ptr[j++] = 0;
1292         /* syncpoint_b */
1293         cmd->ptr[j++] = 0x2001001D;
1294         /* syncpt_id, incr */
1295         cmd->ptr[j++] = (fence->syncpt_id << 8) | 0x1;
1296
1297         c->gpfifo.cpu_va[c->gpfifo.put].entry0 = u64_lo32(cmd->gva);
1298         c->gpfifo.cpu_va[c->gpfifo.put].entry1 = u64_hi32(cmd->gva) |
1299                 pbdma_gp_entry1_length_f(cmd->size);
1300
1301         c->gpfifo.put = (c->gpfifo.put + 1) & (c->gpfifo.entry_num - 1);
1302
1303         /* save gp_put */
1304         cmd->gp_put = c->gpfifo.put;
1305
1306         gk20a_bar1_writel(g,
1307                 c->userd_gpu_va + 4 * ram_userd_gp_put_w(),
1308                 c->gpfifo.put);
1309
1310         nvhost_dbg_info("post-submit put %d, get %d, size %d",
1311                 c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num);
1312
1313         return 0;
1314 }
1315
1316 static u32 get_gp_free_count(struct channel_gk20a *c)
1317 {
1318         update_gp_get(c->g, c);
1319         return gp_free_count(c);
1320 }
1321
1322 static void trace_write_pushbuffer(struct channel_gk20a *c, struct gpfifo *g)
1323 {
1324         void *mem = NULL;
1325         unsigned int words;
1326         u64 offset;
1327         struct mem_handle *r = NULL;
1328
1329         if (nvhost_debug_trace_cmdbuf) {
1330                 u64 gpu_va = (u64)g->entry0 |
1331                         (u64)((u64)pbdma_gp_entry1_get_hi_v(g->entry1) << 32);
1332                 struct mem_mgr *memmgr = NULL;
1333                 int err;
1334
1335                 words = pbdma_gp_entry1_length_v(g->entry1);
1336                 err = gk20a_vm_find_buffer(c->vm, gpu_va, &memmgr, &r,
1337                                            &offset);
1338                 if (!err)
1339                         mem = nvhost_memmgr_mmap(r);
1340         }
1341
1342         if (mem) {
1343                 u32 i;
1344                 /*
1345                  * Write in batches of 128 as there seems to be a limit
1346                  * of how much you can output to ftrace at once.
1347                  */
1348                 for (i = 0; i < words; i += TRACE_MAX_LENGTH) {
1349                         trace_nvhost_cdma_push_gather(
1350                                 c->ch->dev->name,
1351                                 0,
1352                                 min(words - i, TRACE_MAX_LENGTH),
1353                                 offset + i * sizeof(u32),
1354                                 mem);
1355                 }
1356                 nvhost_memmgr_munmap(r, mem);
1357         }
1358 }
1359
1360 static int gk20a_channel_add_job(struct channel_gk20a *c,
1361                                  struct nvhost_fence *fence)
1362 {
1363         struct vm_gk20a *vm = c->vm;
1364         struct channel_gk20a_job *job = NULL;
1365         struct mapped_buffer_node **mapped_buffers = NULL;
1366         int err = 0, num_mapped_buffers;
1367
1368         /* job needs reference to this vm */
1369         gk20a_vm_get(vm);
1370
1371         err = gk20a_vm_get_buffers(vm, &mapped_buffers, &num_mapped_buffers);
1372         if (err) {
1373                 gk20a_vm_put(vm);
1374                 return err;
1375         }
1376
1377         job = kzalloc(sizeof(*job), GFP_KERNEL);
1378         if (!job) {
1379                 gk20a_vm_put_buffers(vm, mapped_buffers, num_mapped_buffers);
1380                 gk20a_vm_put(vm);
1381                 return -ENOMEM;
1382         }
1383
1384         job->num_mapped_buffers = num_mapped_buffers;
1385         job->mapped_buffers = mapped_buffers;
1386         job->fence = *fence;
1387
1388         mutex_lock(&c->jobs_lock);
1389         list_add_tail(&job->list, &c->jobs);
1390         mutex_unlock(&c->jobs_lock);
1391
1392         return 0;
1393 }
1394
1395 void gk20a_channel_update(struct channel_gk20a *c)
1396 {
1397         struct gk20a *g = c->g;
1398         struct nvhost_syncpt *sp = syncpt_from_gk20a(g);
1399         struct vm_gk20a *vm = c->vm;
1400         struct channel_gk20a_job *job, *n;
1401
1402         mutex_lock(&c->jobs_lock);
1403         list_for_each_entry_safe(job, n, &c->jobs, list) {
1404                 bool completed = nvhost_syncpt_is_expired(sp,
1405                         job->fence.syncpt_id, job->fence.value);
1406                 if (!completed)
1407                         break;
1408
1409                 gk20a_vm_put_buffers(vm, job->mapped_buffers,
1410                                 job->num_mapped_buffers);
1411
1412                 /* job is done. release its reference to vm */
1413                 gk20a_vm_put(vm);
1414
1415                 list_del_init(&job->list);
1416                 kfree(job);
1417                 nvhost_module_idle(g->dev);
1418         }
1419         mutex_unlock(&c->jobs_lock);
1420 }
1421 #ifdef CONFIG_DEBUG_FS
1422 static void gk20a_sync_debugfs(struct gk20a *g)
1423 {
1424         u32 reg_f = ltc_ltcs_ltss_tstg_set_mgmt_2_l2_bypass_mode_enabled_f();
1425         spin_lock(&g->debugfs_lock);
1426         if (g->mm.ltc_enabled != g->mm.ltc_enabled_debug) {
1427                 u32 reg = gk20a_readl(g, ltc_ltcs_ltss_tstg_set_mgmt_2_r());
1428                 if (g->mm.ltc_enabled_debug)
1429                         /* bypass disabled (normal caching ops)*/
1430                         reg &= ~reg_f;
1431                 else
1432                         /* bypass enabled (no caching) */
1433                         reg |= reg_f;
1434
1435                 gk20a_writel(g, ltc_ltcs_ltss_tstg_set_mgmt_2_r(), reg);
1436                 g->mm.ltc_enabled = g->mm.ltc_enabled_debug;
1437         }
1438         spin_unlock(&g->debugfs_lock);
1439 }
1440 #endif
1441
1442 void add_wait_cmd(u32 *ptr, u32 id, u32 thresh)
1443 {
1444         /* syncpoint_a */
1445         ptr[0] = 0x2001001C;
1446         /* payload */
1447         ptr[1] = thresh;
1448         /* syncpoint_b */
1449         ptr[2] = 0x2001001D;
1450         /* syncpt_id, switch_en, wait */
1451         ptr[3] = (id << 8) | 0x10;
1452 }
1453
1454 int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
1455                                 struct nvhost_gpfifo *gpfifo,
1456                                 u32 num_entries,
1457                                 struct nvhost_fence *fence,
1458                                 u32 flags)
1459 {
1460         struct gk20a *g = c->g;
1461         struct nvhost_device_data *pdata = nvhost_get_devdata(g->dev);
1462         struct device *d = dev_from_gk20a(g);
1463         struct nvhost_syncpt *sp = syncpt_from_gk20a(g);
1464         u32 i, incr_id = ~0, wait_id = ~0, wait_value = 0;
1465         u32 err = 0;
1466         int incr_cmd_size;
1467         bool wfi_cmd;
1468         int num_wait_cmds = 0;
1469         struct priv_cmd_entry *wait_cmd = NULL;
1470         struct priv_cmd_entry *incr_cmd = NULL;
1471         struct sync_fence *sync_fence = NULL;
1472         /* we might need two extra gpfifo entries - one for syncpoint
1473          * wait and one for syncpoint increment */
1474         const int extra_entries = 2;
1475
1476         if ((flags & (NVHOST_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT |
1477                       NVHOST_SUBMIT_GPFIFO_FLAGS_FENCE_GET)) &&
1478             !fence)
1479                 return -EINVAL;
1480 #ifdef CONFIG_DEBUG_FS
1481         /* update debug settings */
1482         gk20a_sync_debugfs(g);
1483 #endif
1484
1485         nvhost_dbg_info("channel %d", c->hw_chid);
1486
1487         nvhost_module_busy(g->dev);
1488         trace_nvhost_channel_submit_gpfifo(c->ch->dev->name,
1489                                            c->hw_chid,
1490                                            num_entries,
1491                                            flags,
1492                                            fence->syncpt_id, fence->value,
1493                                            c->hw_chid + pdata->syncpt_base);
1494         check_gp_put(g, c);
1495         update_gp_get(g, c);
1496
1497         nvhost_dbg_info("pre-submit put %d, get %d, size %d",
1498                 c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num);
1499
1500         /* If the caller has requested a fence "get" then we need to be
1501          * sure the fence represents work completion.  In that case
1502          * issue a wait-for-idle before the syncpoint increment.
1503          */
1504         wfi_cmd = !!(flags & NVHOST_SUBMIT_GPFIFO_FLAGS_FENCE_GET);
1505
1506         /* Invalidate tlb if it's dirty...                                   */
1507         /* TBD: this should be done in the cmd stream, not with PRIs.        */
1508         /* We don't know what context is currently running...                */
1509         /* Note also: there can be more than one context associated with the */
1510         /* address space (vm).   */
1511         gk20a_mm_tlb_invalidate(c->vm);
1512
1513         /* Make sure we have enough space for gpfifo entries. If not,
1514          * wait for signals from completed submits */
1515         if (gp_free_count(c) < num_entries + extra_entries) {
1516                 err = wait_event_interruptible(c->submit_wq,
1517                         get_gp_free_count(c) >= num_entries + extra_entries);
1518         }
1519
1520         if (err) {
1521                 nvhost_err(d, "not enough gpfifo space");
1522                 err = -EAGAIN;
1523                 goto clean_up;
1524         }
1525
1526
1527         if (flags & NVHOST_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE
1528                         && flags & NVHOST_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT) {
1529                 sync_fence = nvhost_sync_fdget(fence->syncpt_id);
1530                 if (!sync_fence) {
1531                         nvhost_err(d, "invalid fence fd");
1532                         err = -EINVAL;
1533                         goto clean_up;
1534                 }
1535                 num_wait_cmds = nvhost_sync_num_pts(sync_fence);
1536         }
1537         /*
1538          * optionally insert syncpt wait in the beginning of gpfifo submission
1539          * when user requested and the wait hasn't expired.
1540          * validate that the id makes sense, elide if not
1541          * the only reason this isn't being unceremoniously killed is to
1542          * keep running some tests which trigger this condition
1543          */
1544         else if (flags & NVHOST_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT) {
1545                 if (fence->syncpt_id >= nvhost_syncpt_nb_pts(sp))
1546                         dev_warn(d,
1547                                 "invalid wait id in gpfifo submit, elided");
1548                 if (!nvhost_syncpt_is_expired(sp,
1549                                         fence->syncpt_id, fence->value))
1550                         num_wait_cmds = 1;
1551         }
1552
1553         if (num_wait_cmds) {
1554                 alloc_priv_cmdbuf(c, 4 * num_wait_cmds, &wait_cmd);
1555                 if (wait_cmd == NULL) {
1556                         nvhost_err(d, "not enough priv cmd buffer space");
1557                         err = -EAGAIN;
1558                         goto clean_up;
1559                 }
1560         }
1561
1562         /* always insert syncpt increment at end of gpfifo submission
1563            to keep track of method completion for idle railgating */
1564         /* TODO: we need to find a way to get rid of these wfi on every
1565          * submission...
1566          */
1567         incr_cmd_size = 4;
1568         if (wfi_cmd)
1569                 incr_cmd_size += wfi_cmd_size();
1570         alloc_priv_cmdbuf(c, incr_cmd_size, &incr_cmd);
1571         if (incr_cmd == NULL) {
1572                 nvhost_err(d, "not enough priv cmd buffer space");
1573                 err = -EAGAIN;
1574                 goto clean_up;
1575         }
1576
1577         if (num_wait_cmds) {
1578                 if (sync_fence) {
1579                         struct sync_pt *pos;
1580                         struct nvhost_sync_pt *pt;
1581                         i = 0;
1582
1583                         list_for_each_entry(pos, &sync_fence->pt_list_head,
1584                                         pt_list) {
1585                                 pt = to_nvhost_sync_pt(pos);
1586
1587                                 wait_id = nvhost_sync_pt_id(pt);
1588                                 wait_value = nvhost_sync_pt_thresh(pt);
1589
1590                                 add_wait_cmd(&wait_cmd->ptr[i * 4],
1591                                                 wait_id, wait_value);
1592
1593                                 i++;
1594                         }
1595                         sync_fence_put(sync_fence);
1596                         sync_fence = NULL;
1597                 } else {
1598                                 wait_id = fence->syncpt_id;
1599                                 wait_value = fence->value;
1600                                 add_wait_cmd(&wait_cmd->ptr[0],
1601                                                 wait_id, wait_value);
1602                 }
1603
1604                 c->gpfifo.cpu_va[c->gpfifo.put].entry0 =
1605                         u64_lo32(wait_cmd->gva);
1606                 c->gpfifo.cpu_va[c->gpfifo.put].entry1 =
1607                         u64_hi32(wait_cmd->gva) |
1608                         pbdma_gp_entry1_length_f(wait_cmd->size);
1609                 trace_write_pushbuffer(c, &c->gpfifo.cpu_va[c->gpfifo.put]);
1610
1611                 c->gpfifo.put = (c->gpfifo.put + 1) &
1612                         (c->gpfifo.entry_num - 1);
1613
1614                 /* save gp_put */
1615                 wait_cmd->gp_put = c->gpfifo.put;
1616         }
1617
1618         for (i = 0; i < num_entries; i++) {
1619                 c->gpfifo.cpu_va[c->gpfifo.put].entry0 =
1620                         gpfifo[i].entry0; /* cmd buf va low 32 */
1621                 c->gpfifo.cpu_va[c->gpfifo.put].entry1 =
1622                         gpfifo[i].entry1; /* cmd buf va high 32 | words << 10 */
1623                 trace_write_pushbuffer(c, &c->gpfifo.cpu_va[c->gpfifo.put]);
1624                 c->gpfifo.put = (c->gpfifo.put + 1) &
1625                         (c->gpfifo.entry_num - 1);
1626         }
1627
1628         if (incr_cmd) {
1629                 int j = 0;
1630                 incr_id = c->hw_chid + pdata->syncpt_base;
1631                 fence->syncpt_id = incr_id;
1632                 fence->value     = nvhost_syncpt_incr_max(sp, incr_id, 1);
1633
1634                 c->last_submit_fence.valid        = true;
1635                 c->last_submit_fence.syncpt_value = fence->value;
1636                 c->last_submit_fence.syncpt_id    = fence->syncpt_id;
1637                 c->last_submit_fence.wfi          = wfi_cmd;
1638
1639                 trace_nvhost_ioctl_ctrl_syncpt_incr(fence->syncpt_id);
1640                 if (wfi_cmd)
1641                         add_wfi_cmd(incr_cmd, &j);
1642                 /* syncpoint_a */
1643                 incr_cmd->ptr[j++] = 0x2001001C;
1644                 /* payload, ignored */
1645                 incr_cmd->ptr[j++] = 0;
1646                 /* syncpoint_b */
1647                 incr_cmd->ptr[j++] = 0x2001001D;
1648                 /* syncpt_id, incr */
1649                 incr_cmd->ptr[j++] = (fence->syncpt_id << 8) | 0x1;
1650
1651                 c->gpfifo.cpu_va[c->gpfifo.put].entry0 =
1652                         u64_lo32(incr_cmd->gva);
1653                 c->gpfifo.cpu_va[c->gpfifo.put].entry1 =
1654                         u64_hi32(incr_cmd->gva) |
1655                         pbdma_gp_entry1_length_f(incr_cmd->size);
1656                 trace_write_pushbuffer(c, &c->gpfifo.cpu_va[c->gpfifo.put]);
1657
1658                 c->gpfifo.put = (c->gpfifo.put + 1) &
1659                         (c->gpfifo.entry_num - 1);
1660
1661                 /* save gp_put */
1662                 incr_cmd->gp_put = c->gpfifo.put;
1663
1664                 if (flags & NVHOST_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE) {
1665                         struct nvhost_ctrl_sync_fence_info pts;
1666
1667                         pts.id = fence->syncpt_id;
1668                         pts.thresh = fence->value;
1669
1670                         fence->syncpt_id = 0;
1671                         fence->value = 0;
1672                         err = nvhost_sync_create_fence(sp, &pts, 1, "fence",
1673                                         &fence->syncpt_id);
1674                 }
1675         }
1676
1677         /* Invalidate tlb if it's dirty...                                   */
1678         /* TBD: this should be done in the cmd stream, not with PRIs.        */
1679         /* We don't know what context is currently running...                */
1680         /* Note also: there can be more than one context associated with the */
1681         /* address space (vm).   */
1682         gk20a_mm_tlb_invalidate(c->vm);
1683
1684         trace_nvhost_channel_submitted_gpfifo(c->ch->dev->name,
1685                                            c->hw_chid,
1686                                            num_entries,
1687                                            flags,
1688                                            wait_id, wait_value,
1689                                            fence->syncpt_id, fence->value);
1690
1691
1692         /* TODO! Check for errors... */
1693         gk20a_channel_add_job(c, fence);
1694
1695         c->cmds_pending = true;
1696         gk20a_bar1_writel(g,
1697                 c->userd_gpu_va + 4 * ram_userd_gp_put_w(),
1698                 c->gpfifo.put);
1699
1700         nvhost_dbg_info("post-submit put %d, get %d, size %d",
1701                 c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num);
1702
1703         nvhost_dbg_fn("done");
1704         return err;
1705
1706 clean_up:
1707         if (sync_fence)
1708                 sync_fence_put(sync_fence);
1709         nvhost_err(d, "fail");
1710         free_priv_cmdbuf(c, wait_cmd);
1711         free_priv_cmdbuf(c, incr_cmd);
1712         nvhost_module_idle(g->dev);
1713         return err;
1714 }
1715
1716 void gk20a_remove_channel_support(struct channel_gk20a *c)
1717 {
1718
1719 }
1720
1721 int gk20a_init_channel_support(struct gk20a *g, u32 chid)
1722 {
1723         struct channel_gk20a *c = g->fifo.channel+chid;
1724         c->g = g;
1725         c->in_use = false;
1726         c->hw_chid = chid;
1727         c->bound = false;
1728         c->remove_support = gk20a_remove_channel_support;
1729         mutex_init(&c->jobs_lock);
1730         INIT_LIST_HEAD(&c->jobs);
1731 #if defined(CONFIG_TEGRA_GPU_CYCLE_STATS)
1732         mutex_init(&c->cyclestate.cyclestate_buffer_mutex);
1733 #endif
1734         INIT_LIST_HEAD(&c->dbg_s_list);
1735         mutex_init(&c->dbg_s_lock);
1736
1737         return 0;
1738 }
1739
1740 int gk20a_channel_init(struct nvhost_channel *ch,
1741                        struct nvhost_master *host, int index)
1742 {
1743         return 0;
1744 }
1745
1746 int gk20a_channel_alloc_obj(struct nvhost_channel *channel,
1747                         u32 class_num,
1748                         u32 *obj_id,
1749                         u32 vaspace_share)
1750 {
1751         nvhost_dbg_fn("");
1752         return 0;
1753 }
1754
1755 int gk20a_channel_free_obj(struct nvhost_channel *channel, u32 obj_id)
1756 {
1757         nvhost_dbg_fn("");
1758         return 0;
1759 }
1760
1761 int gk20a_channel_finish(struct channel_gk20a *ch, unsigned long timeout)
1762 {
1763         struct nvhost_syncpt *sp = syncpt_from_gk20a(ch->g);
1764         struct nvhost_device_data *pdata = nvhost_get_devdata(ch->g->dev);
1765         struct nvhost_fence fence;
1766         int err = 0;
1767
1768         if (!ch->cmds_pending)
1769                 return 0;
1770
1771         if (!(ch->last_submit_fence.valid && ch->last_submit_fence.wfi)) {
1772                 nvhost_dbg_fn("issuing wfi, incr to finish the channel");
1773                 fence.syncpt_id = ch->hw_chid + pdata->syncpt_base;
1774                 err = gk20a_channel_submit_wfi_fence(ch->g, ch,
1775                                                      sp, &fence);
1776         }
1777         if (err)
1778                 return err;
1779
1780         BUG_ON(!(ch->last_submit_fence.valid && ch->last_submit_fence.wfi));
1781
1782         nvhost_dbg_fn("waiting for channel to finish syncpt:%d val:%d",
1783                       ch->last_submit_fence.syncpt_id,
1784                       ch->last_submit_fence.syncpt_value);
1785
1786         /* Do not wait for a timedout channel. Just check if it's done */
1787         if (ch->hwctx && ch->hwctx->has_timedout)
1788                 timeout = 0;
1789
1790         err = nvhost_syncpt_wait_timeout(sp,
1791                                          ch->last_submit_fence.syncpt_id,
1792                                          ch->last_submit_fence.syncpt_value,
1793                                          timeout, &fence.value, NULL, false);
1794         if (WARN_ON(err))
1795                 dev_warn(dev_from_gk20a(ch->g),
1796                          "timed out waiting for gk20a channel to finish");
1797         else
1798                 ch->cmds_pending = false;
1799
1800         return err;
1801 }
1802
1803 static int gk20a_channel_wait_semaphore(struct channel_gk20a *ch,
1804                                         ulong id, u32 offset,
1805                                         u32 payload, long timeout)
1806 {
1807         struct platform_device *pdev = ch->ch->dev;
1808         struct mem_mgr *memmgr = gk20a_channel_mem_mgr(ch);
1809         struct mem_handle *handle_ref;
1810         void *data;
1811         u32 *semaphore;
1812         int ret = 0;
1813         long remain;
1814
1815         handle_ref = nvhost_memmgr_get(memmgr, id, pdev);
1816         if (IS_ERR(handle_ref)) {
1817                 nvhost_err(&pdev->dev, "invalid notifier nvmap handle 0x%lx",
1818                            id);
1819                 return -EINVAL;
1820         }
1821
1822         data = nvhost_memmgr_kmap(handle_ref, offset >> PAGE_SHIFT);
1823         if (!data) {
1824                 nvhost_err(&pdev->dev, "failed to map notifier memory");
1825                 ret = -EINVAL;
1826                 goto cleanup_put;
1827         }
1828
1829         semaphore = data + (offset & ~PAGE_MASK);
1830
1831         remain = wait_event_interruptible_timeout(
1832                         ch->semaphore_wq,
1833                         *semaphore == payload,
1834                         timeout);
1835
1836         if (remain == 0 && *semaphore != payload)
1837                 ret = -ETIMEDOUT;
1838         else if (remain < 0)
1839                 ret = remain;
1840
1841         nvhost_memmgr_kunmap(handle_ref, offset >> PAGE_SHIFT, data);
1842 cleanup_put:
1843         nvhost_memmgr_put(memmgr, handle_ref);
1844         return ret;
1845 }
1846
1847 int gk20a_channel_wait(struct channel_gk20a *ch,
1848                        struct nvhost_wait_args *args)
1849 {
1850         struct device *d = dev_from_gk20a(ch->g);
1851         struct platform_device *dev = ch->ch->dev;
1852         struct mem_mgr *memmgr = gk20a_channel_mem_mgr(ch);
1853         struct mem_handle *handle_ref;
1854         struct notification *notif;
1855         struct timespec tv;
1856         u64 jiffies;
1857         ulong id;
1858         u32 offset;
1859         unsigned long timeout;
1860         int remain, ret = 0;
1861
1862         nvhost_dbg_fn("");
1863
1864         if (args->timeout == NVHOST_NO_TIMEOUT)
1865                 timeout = MAX_SCHEDULE_TIMEOUT;
1866         else
1867                 timeout = (u32)msecs_to_jiffies(args->timeout);
1868
1869         switch (args->type) {
1870         case NVHOST_WAIT_TYPE_NOTIFIER:
1871                 id = args->condition.notifier.nvmap_handle;
1872                 offset = args->condition.notifier.offset;
1873
1874                 handle_ref = nvhost_memmgr_get(memmgr, id, dev);
1875                 if (IS_ERR(handle_ref)) {
1876                         nvhost_err(d, "invalid notifier nvmap handle 0x%lx",
1877                                    id);
1878                         return -EINVAL;
1879                 }
1880
1881                 notif = nvhost_memmgr_mmap(handle_ref);
1882                 if (!notif) {
1883                         nvhost_err(d, "failed to map notifier memory");
1884                         return -ENOMEM;
1885                 }
1886
1887                 notif = (struct notification *)((uintptr_t)notif + offset);
1888
1889                 /* user should set status pending before
1890                  * calling this ioctl */
1891                 remain = wait_event_interruptible_timeout(
1892                                 ch->notifier_wq,
1893                                 notif->status == 0,
1894                                 timeout);
1895
1896                 if (remain == 0 && notif->status != 0) {
1897                         ret = -ETIMEDOUT;
1898                         goto notif_clean_up;
1899                 } else if (remain < 0) {
1900                         ret = -EINTR;
1901                         goto notif_clean_up;
1902                 }
1903
1904                 /* TBD: fill in correct information */
1905                 jiffies = get_jiffies_64();
1906                 jiffies_to_timespec(jiffies, &tv);
1907                 notif->timestamp.nanoseconds[0] = tv.tv_nsec;
1908                 notif->timestamp.nanoseconds[1] = tv.tv_sec;
1909                 notif->info32 = 0xDEADBEEF; /* should be object name */
1910                 notif->info16 = ch->hw_chid; /* should be method offset */
1911
1912 notif_clean_up:
1913                 nvhost_memmgr_munmap(handle_ref, notif);
1914                 return ret;
1915
1916         case NVHOST_WAIT_TYPE_SEMAPHORE:
1917                 ret = gk20a_channel_wait_semaphore(ch,
1918                                 args->condition.semaphore.nvmap_handle,
1919                                 args->condition.semaphore.offset,
1920                                 args->condition.semaphore.payload,
1921                                 timeout);
1922
1923                 break;
1924
1925         default:
1926                 ret = -EINVAL;
1927                 break;
1928         }
1929
1930         return ret;
1931 }
1932
1933 int gk20a_channel_set_priority(struct channel_gk20a *ch,
1934                 u32 priority)
1935 {
1936         u32 timeslice_timeout;
1937         /* set priority of graphics channel */
1938         switch (priority) {
1939         case NVHOST_PRIORITY_LOW:
1940                 /* 64 << 3 = 512us */
1941                 timeslice_timeout = 64;
1942                 break;
1943         case NVHOST_PRIORITY_MEDIUM:
1944                 /* 128 << 3 = 1024us */
1945                 timeslice_timeout = 128;
1946                 break;
1947         case NVHOST_PRIORITY_HIGH:
1948                 /* 255 << 3 = 2048us */
1949                 timeslice_timeout = 255;
1950                 break;
1951         default:
1952                 pr_err("Unsupported priority");
1953                 return -EINVAL;
1954         }
1955         channel_gk20a_set_schedule_params(ch,
1956                         timeslice_timeout);
1957         return 0;
1958 }
1959
1960 int gk20a_channel_zcull_bind(struct channel_gk20a *ch,
1961                             struct nvhost_zcull_bind_args *args)
1962 {
1963         struct gk20a *g = ch->g;
1964         struct gr_gk20a *gr = &g->gr;
1965
1966         nvhost_dbg_fn("");
1967
1968         return gr_gk20a_bind_ctxsw_zcull(g, gr, ch,
1969                                 args->gpu_va, args->mode);
1970 }
1971
1972 /* in this context the "channel" is the host1x channel which
1973  * maps to *all* gk20a channels */
1974 int gk20a_channel_suspend(struct gk20a *g)
1975 {
1976         struct fifo_gk20a *f = &g->fifo;
1977         u32 chid;
1978         bool channels_in_use = false;
1979
1980         nvhost_dbg_fn("");
1981
1982         for (chid = 0; chid < f->num_channels; chid++) {
1983                 if (f->channel[chid].in_use) {
1984
1985                         nvhost_dbg_info("suspend channel %d", chid);
1986
1987                         /* disable channel */
1988                         gk20a_writel(g, ccsr_channel_r(chid),
1989                                 gk20a_readl(g, ccsr_channel_r(chid)) |
1990                                 ccsr_channel_enable_clr_true_f());
1991                         /* preempt the channel */
1992                         gk20a_fifo_preempt_channel(g, chid);
1993
1994                         channels_in_use = true;
1995                 }
1996         }
1997
1998         if (channels_in_use) {
1999                 gk20a_fifo_update_runlist(g, 0, ~0, false, true);
2000
2001                 for (chid = 0; chid < f->num_channels; chid++) {
2002                         if (f->channel[chid].in_use)
2003                                 channel_gk20a_unbind(&f->channel[chid]);
2004                 }
2005         }
2006
2007         nvhost_dbg_fn("done");
2008         return 0;
2009 }
2010
2011 /* in this context the "channel" is the host1x channel which
2012  * maps to *all* gk20a channels */
2013 int gk20a_channel_resume(struct gk20a *g)
2014 {
2015         struct fifo_gk20a *f = &g->fifo;
2016         u32 chid;
2017         bool channels_in_use = false;
2018
2019         nvhost_dbg_fn("");
2020
2021         for (chid = 0; chid < f->num_channels; chid++) {
2022                 if (f->channel[chid].in_use) {
2023                         nvhost_dbg_info("resume channel %d", chid);
2024                         channel_gk20a_bind(&f->channel[chid]);
2025                         channels_in_use = true;
2026                 }
2027         }
2028
2029         if (channels_in_use)
2030                 gk20a_fifo_update_runlist(g, 0, ~0, true, true);
2031
2032         nvhost_dbg_fn("done");
2033         return 0;
2034 }
2035
2036 void gk20a_channel_semaphore_wakeup(struct gk20a *g)
2037 {
2038         struct fifo_gk20a *f = &g->fifo;
2039         u32 chid;
2040
2041         nvhost_dbg_fn("");
2042
2043         for (chid = 0; chid < f->num_channels; chid++) {
2044                 struct channel_gk20a *c = g->fifo.channel+chid;
2045                 if (c->in_use)
2046                         wake_up_interruptible_all(&c->semaphore_wq);
2047         }
2048 }