gpu: nvgpu: Keep host1x on when GPU on
[linux-3.10.git] / drivers / gpu / nvgpu / gk20a / channel_gk20a.c
1 /*
2  * drivers/video/tegra/host/gk20a/channel_gk20a.c
3  *
4  * GK20A Graphics channel
5  *
6  * Copyright (c) 2011-2014, NVIDIA CORPORATION.  All rights reserved.
7  *
8  * This program is free software; you can redistribute it and/or modify it
9  * under the terms and conditions of the GNU General Public License,
10  * version 2, as published by the Free Software Foundation.
11  *
12  * This program is distributed in the hope it will be useful, but WITHOUT
13  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
15  * more details.
16  *
17  * You should have received a copy of the GNU General Public License along with
18  * this program; if not, write to the Free Software Foundation, Inc.,
19  * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
20  */
21
22 #include <linux/nvhost.h>
23 #include <linux/list.h>
24 #include <linux/delay.h>
25 #include <linux/highmem.h> /* need for nvmap.h*/
26 #include <trace/events/gk20a.h>
27 #include <linux/scatterlist.h>
28 #include <linux/file.h>
29 #include <linux/anon_inodes.h>
30 #include <linux/dma-buf.h>
31
32 #include "debug_gk20a.h"
33
34 #include "gk20a.h"
35 #include "dbg_gpu_gk20a.h"
36
37 #include "hw_ram_gk20a.h"
38 #include "hw_fifo_gk20a.h"
39 #include "hw_pbdma_gk20a.h"
40 #include "hw_ccsr_gk20a.h"
41 #include "hw_ltc_gk20a.h"
42
43 #define NVMAP_HANDLE_PARAM_SIZE 1
44
45 static struct channel_gk20a *acquire_unused_channel(struct fifo_gk20a *f);
46 static void release_used_channel(struct fifo_gk20a *f, struct channel_gk20a *c);
47
48 static void free_priv_cmdbuf(struct channel_gk20a *c,
49                              struct priv_cmd_entry *e);
50 static void recycle_priv_cmdbuf(struct channel_gk20a *c);
51
52 static int channel_gk20a_alloc_priv_cmdbuf(struct channel_gk20a *c);
53 static void channel_gk20a_free_priv_cmdbuf(struct channel_gk20a *c);
54
55 static int channel_gk20a_commit_userd(struct channel_gk20a *c);
56 static int channel_gk20a_setup_userd(struct channel_gk20a *c);
57 static int channel_gk20a_setup_ramfc(struct channel_gk20a *c,
58                         u64 gpfifo_base, u32 gpfifo_entries);
59
60 static void channel_gk20a_bind(struct channel_gk20a *ch_gk20a);
61 static void channel_gk20a_unbind(struct channel_gk20a *ch_gk20a);
62
63 static int channel_gk20a_alloc_inst(struct gk20a *g,
64                                 struct channel_gk20a *ch);
65 static void channel_gk20a_free_inst(struct gk20a *g,
66                                 struct channel_gk20a *ch);
67
68 static int channel_gk20a_update_runlist(struct channel_gk20a *c,
69                                         bool add);
70 static void gk20a_free_error_notifiers(struct channel_gk20a *ch);
71
72 static struct channel_gk20a *acquire_unused_channel(struct fifo_gk20a *f)
73 {
74         struct channel_gk20a *ch = NULL;
75         int chid;
76
77         mutex_lock(&f->ch_inuse_mutex);
78         for (chid = 0; chid < f->num_channels; chid++) {
79                 if (!f->channel[chid].in_use) {
80                         f->channel[chid].in_use = true;
81                         ch = &f->channel[chid];
82                         break;
83                 }
84         }
85         mutex_unlock(&f->ch_inuse_mutex);
86
87         return ch;
88 }
89
90 static void release_used_channel(struct fifo_gk20a *f, struct channel_gk20a *c)
91 {
92         mutex_lock(&f->ch_inuse_mutex);
93         f->channel[c->hw_chid].in_use = false;
94         mutex_unlock(&f->ch_inuse_mutex);
95 }
96
97 int channel_gk20a_commit_va(struct channel_gk20a *c)
98 {
99         u64 addr;
100         u32 addr_lo;
101         u32 addr_hi;
102         void *inst_ptr;
103
104         gk20a_dbg_fn("");
105
106         inst_ptr = c->inst_block.cpuva;
107         if (!inst_ptr)
108                 return -ENOMEM;
109
110         addr = gk20a_mm_iova_addr(c->vm->pdes.sgt->sgl);
111         addr_lo = u64_lo32(addr >> 12);
112         addr_hi = u64_hi32(addr);
113
114         gk20a_dbg_info("pde pa=0x%llx addr_lo=0x%x addr_hi=0x%x",
115                    (u64)addr, addr_lo, addr_hi);
116
117         gk20a_mem_wr32(inst_ptr, ram_in_page_dir_base_lo_w(),
118                 ram_in_page_dir_base_target_vid_mem_f() |
119                 ram_in_page_dir_base_vol_true_f() |
120                 ram_in_page_dir_base_lo_f(addr_lo));
121
122         gk20a_mem_wr32(inst_ptr, ram_in_page_dir_base_hi_w(),
123                 ram_in_page_dir_base_hi_f(addr_hi));
124
125         gk20a_mem_wr32(inst_ptr, ram_in_adr_limit_lo_w(),
126                  u64_lo32(c->vm->va_limit) | 0xFFF);
127
128         gk20a_mem_wr32(inst_ptr, ram_in_adr_limit_hi_w(),
129                 ram_in_adr_limit_hi_f(u64_hi32(c->vm->va_limit)));
130
131         gk20a_mm_l2_invalidate(c->g);
132
133         return 0;
134 }
135
136 static int channel_gk20a_commit_userd(struct channel_gk20a *c)
137 {
138         u32 addr_lo;
139         u32 addr_hi;
140         void *inst_ptr;
141
142         gk20a_dbg_fn("");
143
144         inst_ptr = c->inst_block.cpuva;
145         if (!inst_ptr)
146                 return -ENOMEM;
147
148         addr_lo = u64_lo32(c->userd_iova >> ram_userd_base_shift_v());
149         addr_hi = u64_hi32(c->userd_iova);
150
151         gk20a_dbg_info("channel %d : set ramfc userd 0x%16llx",
152                 c->hw_chid, (u64)c->userd_iova);
153
154         gk20a_mem_wr32(inst_ptr, ram_in_ramfc_w() + ram_fc_userd_w(),
155                  pbdma_userd_target_vid_mem_f() |
156                  pbdma_userd_addr_f(addr_lo));
157
158         gk20a_mem_wr32(inst_ptr, ram_in_ramfc_w() + ram_fc_userd_hi_w(),
159                  pbdma_userd_target_vid_mem_f() |
160                  pbdma_userd_hi_addr_f(addr_hi));
161
162         gk20a_mm_l2_invalidate(c->g);
163
164         return 0;
165 }
166
167 static int channel_gk20a_set_schedule_params(struct channel_gk20a *c,
168                                 u32 timeslice_timeout)
169 {
170         void *inst_ptr;
171         int shift = 3;
172         int value = timeslice_timeout;
173
174         inst_ptr = c->inst_block.cpuva;
175         if (!inst_ptr)
176                 return -ENOMEM;
177
178         /* disable channel */
179         gk20a_writel(c->g, ccsr_channel_r(c->hw_chid),
180                 gk20a_readl(c->g, ccsr_channel_r(c->hw_chid)) |
181                 ccsr_channel_enable_clr_true_f());
182
183         /* preempt the channel */
184         WARN_ON(gk20a_fifo_preempt_channel(c->g, c->hw_chid));
185
186         /* flush GPU cache */
187         gk20a_mm_l2_flush(c->g, true);
188
189         /* value field is 8 bits long */
190         while (value >= 1 << 8) {
191                 value >>= 1;
192                 shift++;
193         }
194
195         /* time slice register is only 18bits long */
196         if ((value << shift) >= 1<<19) {
197                 pr_err("Requested timeslice value is clamped to 18 bits\n");
198                 value = 255;
199                 shift = 10;
200         }
201
202         /* set new timeslice */
203         gk20a_mem_wr32(inst_ptr, ram_fc_eng_timeslice_w(),
204                 value | (shift << 12) |
205                 fifo_eng_timeslice_enable_true_f());
206
207         /* enable channel */
208         gk20a_writel(c->g, ccsr_channel_r(c->hw_chid),
209                 gk20a_readl(c->g, ccsr_channel_r(c->hw_chid)) |
210                 ccsr_channel_enable_set_true_f());
211
212         gk20a_mm_l2_invalidate(c->g);
213
214         return 0;
215 }
216
217 static int channel_gk20a_setup_ramfc(struct channel_gk20a *c,
218                                 u64 gpfifo_base, u32 gpfifo_entries)
219 {
220         void *inst_ptr;
221
222         gk20a_dbg_fn("");
223
224         inst_ptr = c->inst_block.cpuva;
225         if (!inst_ptr)
226                 return -ENOMEM;
227
228         memset(inst_ptr, 0, ram_fc_size_val_v());
229
230         gk20a_mem_wr32(inst_ptr, ram_fc_gp_base_w(),
231                 pbdma_gp_base_offset_f(
232                 u64_lo32(gpfifo_base >> pbdma_gp_base_rsvd_s())));
233
234         gk20a_mem_wr32(inst_ptr, ram_fc_gp_base_hi_w(),
235                 pbdma_gp_base_hi_offset_f(u64_hi32(gpfifo_base)) |
236                 pbdma_gp_base_hi_limit2_f(ilog2(gpfifo_entries)));
237
238         gk20a_mem_wr32(inst_ptr, ram_fc_signature_w(),
239                  pbdma_signature_hw_valid_f() | pbdma_signature_sw_zero_f());
240
241         gk20a_mem_wr32(inst_ptr, ram_fc_formats_w(),
242                 pbdma_formats_gp_fermi0_f() |
243                 pbdma_formats_pb_fermi1_f() |
244                 pbdma_formats_mp_fermi0_f());
245
246         gk20a_mem_wr32(inst_ptr, ram_fc_pb_header_w(),
247                 pbdma_pb_header_priv_user_f() |
248                 pbdma_pb_header_method_zero_f() |
249                 pbdma_pb_header_subchannel_zero_f() |
250                 pbdma_pb_header_level_main_f() |
251                 pbdma_pb_header_first_true_f() |
252                 pbdma_pb_header_type_inc_f());
253
254         gk20a_mem_wr32(inst_ptr, ram_fc_subdevice_w(),
255                 pbdma_subdevice_id_f(1) |
256                 pbdma_subdevice_status_active_f() |
257                 pbdma_subdevice_channel_dma_enable_f());
258
259         gk20a_mem_wr32(inst_ptr, ram_fc_target_w(), pbdma_target_engine_sw_f());
260
261         gk20a_mem_wr32(inst_ptr, ram_fc_acquire_w(),
262                 pbdma_acquire_retry_man_2_f() |
263                 pbdma_acquire_retry_exp_2_f() |
264                 pbdma_acquire_timeout_exp_max_f() |
265                 pbdma_acquire_timeout_man_max_f() |
266                 pbdma_acquire_timeout_en_disable_f());
267
268         gk20a_mem_wr32(inst_ptr, ram_fc_eng_timeslice_w(),
269                 fifo_eng_timeslice_timeout_128_f() |
270                 fifo_eng_timeslice_timescale_3_f() |
271                 fifo_eng_timeslice_enable_true_f());
272
273         gk20a_mem_wr32(inst_ptr, ram_fc_pb_timeslice_w(),
274                 fifo_pb_timeslice_timeout_16_f() |
275                 fifo_pb_timeslice_timescale_0_f() |
276                 fifo_pb_timeslice_enable_true_f());
277
278         gk20a_mem_wr32(inst_ptr, ram_fc_chid_w(), ram_fc_chid_id_f(c->hw_chid));
279
280         gk20a_mm_l2_invalidate(c->g);
281
282         return 0;
283 }
284
285 static int channel_gk20a_setup_userd(struct channel_gk20a *c)
286 {
287         BUG_ON(!c->userd_cpu_va);
288
289         gk20a_dbg_fn("");
290
291         gk20a_mem_wr32(c->userd_cpu_va, ram_userd_put_w(), 0);
292         gk20a_mem_wr32(c->userd_cpu_va, ram_userd_get_w(), 0);
293         gk20a_mem_wr32(c->userd_cpu_va, ram_userd_ref_w(), 0);
294         gk20a_mem_wr32(c->userd_cpu_va, ram_userd_put_hi_w(), 0);
295         gk20a_mem_wr32(c->userd_cpu_va, ram_userd_ref_threshold_w(), 0);
296         gk20a_mem_wr32(c->userd_cpu_va, ram_userd_gp_top_level_get_w(), 0);
297         gk20a_mem_wr32(c->userd_cpu_va, ram_userd_gp_top_level_get_hi_w(), 0);
298         gk20a_mem_wr32(c->userd_cpu_va, ram_userd_get_hi_w(), 0);
299         gk20a_mem_wr32(c->userd_cpu_va, ram_userd_gp_get_w(), 0);
300         gk20a_mem_wr32(c->userd_cpu_va, ram_userd_gp_put_w(), 0);
301
302         gk20a_mm_l2_invalidate(c->g);
303
304         return 0;
305 }
306
307 static void channel_gk20a_bind(struct channel_gk20a *ch_gk20a)
308 {
309         struct gk20a *g = ch_gk20a->g;
310         struct fifo_gk20a *f = &g->fifo;
311         struct fifo_engine_info_gk20a *engine_info =
312                 f->engine_info + ENGINE_GR_GK20A;
313
314         u32 inst_ptr = ch_gk20a->inst_block.cpu_pa
315                 >> ram_in_base_shift_v();
316
317         gk20a_dbg_info("bind channel %d inst ptr 0x%08x",
318                 ch_gk20a->hw_chid, inst_ptr);
319
320         ch_gk20a->bound = true;
321
322         gk20a_writel(g, ccsr_channel_r(ch_gk20a->hw_chid),
323                 (gk20a_readl(g, ccsr_channel_r(ch_gk20a->hw_chid)) &
324                  ~ccsr_channel_runlist_f(~0)) |
325                  ccsr_channel_runlist_f(engine_info->runlist_id));
326
327         gk20a_writel(g, ccsr_channel_inst_r(ch_gk20a->hw_chid),
328                 ccsr_channel_inst_ptr_f(inst_ptr) |
329                 ccsr_channel_inst_target_vid_mem_f() |
330                 ccsr_channel_inst_bind_true_f());
331
332         gk20a_writel(g, ccsr_channel_r(ch_gk20a->hw_chid),
333                 (gk20a_readl(g, ccsr_channel_r(ch_gk20a->hw_chid)) &
334                  ~ccsr_channel_enable_set_f(~0)) |
335                  ccsr_channel_enable_set_true_f());
336 }
337
338 static void channel_gk20a_unbind(struct channel_gk20a *ch_gk20a)
339 {
340         struct gk20a *g = ch_gk20a->g;
341
342         gk20a_dbg_fn("");
343
344         if (ch_gk20a->bound)
345                 gk20a_writel(g, ccsr_channel_inst_r(ch_gk20a->hw_chid),
346                         ccsr_channel_inst_ptr_f(0) |
347                         ccsr_channel_inst_bind_false_f());
348
349         ch_gk20a->bound = false;
350
351         /*
352          * if we are agrressive then we can destroy the syncpt
353          * resource at this point
354          * if not, then it will be destroyed at channel_free()
355          */
356         if (ch_gk20a->sync && ch_gk20a->sync->syncpt_aggressive_destroy) {
357                 ch_gk20a->sync->destroy(ch_gk20a->sync);
358                 ch_gk20a->sync = NULL;
359         }
360 }
361
362 static int channel_gk20a_alloc_inst(struct gk20a *g,
363                                 struct channel_gk20a *ch)
364 {
365         struct device *d = dev_from_gk20a(g);
366         int err = 0;
367         dma_addr_t iova;
368
369         gk20a_dbg_fn("");
370
371         ch->inst_block.size = ram_in_alloc_size_v();
372         ch->inst_block.cpuva = dma_alloc_coherent(d,
373                                         ch->inst_block.size,
374                                         &iova,
375                                         GFP_KERNEL);
376         if (!ch->inst_block.cpuva) {
377                 gk20a_err(d, "%s: memory allocation failed\n", __func__);
378                 err = -ENOMEM;
379                 goto clean_up;
380         }
381
382         ch->inst_block.iova = iova;
383         ch->inst_block.cpu_pa = gk20a_get_phys_from_iova(d,
384                                                         ch->inst_block.iova);
385         if (!ch->inst_block.cpu_pa) {
386                 gk20a_err(d, "%s: failed to get physical address\n", __func__);
387                 err = -ENOMEM;
388                 goto clean_up;
389         }
390
391         gk20a_dbg_info("channel %d inst block physical addr: 0x%16llx",
392                 ch->hw_chid, (u64)ch->inst_block.cpu_pa);
393
394         gk20a_dbg_fn("done");
395         return 0;
396
397 clean_up:
398         gk20a_err(d, "fail");
399         channel_gk20a_free_inst(g, ch);
400         return err;
401 }
402
403 static void channel_gk20a_free_inst(struct gk20a *g,
404                                 struct channel_gk20a *ch)
405 {
406         struct device *d = dev_from_gk20a(g);
407
408         if (ch->inst_block.cpuva)
409                 dma_free_coherent(d, ch->inst_block.size,
410                                 ch->inst_block.cpuva, ch->inst_block.iova);
411         ch->inst_block.cpuva = NULL;
412         ch->inst_block.iova = 0;
413         memset(&ch->inst_block, 0, sizeof(struct inst_desc));
414 }
415
416 static int channel_gk20a_update_runlist(struct channel_gk20a *c, bool add)
417 {
418         return gk20a_fifo_update_runlist(c->g, 0, c->hw_chid, add, true);
419 }
420
421 void gk20a_disable_channel_no_update(struct channel_gk20a *ch)
422 {
423         /* ensure no fences are pending */
424         if (ch->sync)
425                 ch->sync->set_min_eq_max(ch->sync);
426
427         /* disable channel */
428         gk20a_writel(ch->g, ccsr_channel_r(ch->hw_chid),
429                      gk20a_readl(ch->g,
430                      ccsr_channel_r(ch->hw_chid)) |
431                      ccsr_channel_enable_clr_true_f());
432 }
433
434 static int gk20a_wait_channel_idle(struct channel_gk20a *ch)
435 {
436         bool channel_idle = false;
437         unsigned long end_jiffies = jiffies +
438                 msecs_to_jiffies(gk20a_get_gr_idle_timeout(ch->g));
439
440         do {
441                 mutex_lock(&ch->jobs_lock);
442                 channel_idle = list_empty(&ch->jobs);
443                 mutex_unlock(&ch->jobs_lock);
444                 if (channel_idle)
445                         break;
446
447                 usleep_range(1000, 3000);
448         } while (time_before(jiffies, end_jiffies)
449                         || !tegra_platform_is_silicon());
450
451         if (!channel_idle)
452                 gk20a_err(dev_from_gk20a(ch->g), "channel jobs not freed");
453
454         return 0;
455 }
456
457 void gk20a_disable_channel(struct channel_gk20a *ch,
458                            bool finish,
459                            unsigned long finish_timeout)
460 {
461         if (finish) {
462                 int err = gk20a_channel_finish(ch, finish_timeout);
463                 WARN_ON(err);
464         }
465
466         /* disable the channel from hw and increment syncpoints */
467         gk20a_disable_channel_no_update(ch);
468
469         gk20a_wait_channel_idle(ch);
470
471         /* preempt the channel */
472         gk20a_fifo_preempt_channel(ch->g, ch->hw_chid);
473
474         /* remove channel from runlist */
475         channel_gk20a_update_runlist(ch, false);
476 }
477
478 #if defined(CONFIG_GK20A_CYCLE_STATS)
479
480 static void gk20a_free_cycle_stats_buffer(struct channel_gk20a *ch)
481 {
482         /* disable existing cyclestats buffer */
483         mutex_lock(&ch->cyclestate.cyclestate_buffer_mutex);
484         if (ch->cyclestate.cyclestate_buffer_handler) {
485                 dma_buf_vunmap(ch->cyclestate.cyclestate_buffer_handler,
486                                 ch->cyclestate.cyclestate_buffer);
487                 dma_buf_put(ch->cyclestate.cyclestate_buffer_handler);
488                 ch->cyclestate.cyclestate_buffer_handler = NULL;
489                 ch->cyclestate.cyclestate_buffer = NULL;
490                 ch->cyclestate.cyclestate_buffer_size = 0;
491         }
492         mutex_unlock(&ch->cyclestate.cyclestate_buffer_mutex);
493 }
494
495 static int gk20a_channel_cycle_stats(struct channel_gk20a *ch,
496                        struct nvhost_cycle_stats_args *args)
497 {
498         struct dma_buf *dmabuf;
499         void *virtual_address;
500
501         if (args->nvmap_handle && !ch->cyclestate.cyclestate_buffer_handler) {
502
503                 /* set up new cyclestats buffer */
504                 dmabuf = dma_buf_get(args->nvmap_handle);
505                 if (IS_ERR(dmabuf))
506                         return PTR_ERR(dmabuf);
507                 virtual_address = dma_buf_vmap(dmabuf);
508                 if (!virtual_address)
509                         return -ENOMEM;
510
511                 ch->cyclestate.cyclestate_buffer_handler = dmabuf;
512                 ch->cyclestate.cyclestate_buffer = virtual_address;
513                 ch->cyclestate.cyclestate_buffer_size = dmabuf->size;
514                 return 0;
515
516         } else if (!args->nvmap_handle &&
517                         ch->cyclestate.cyclestate_buffer_handler) {
518                 gk20a_free_cycle_stats_buffer(ch);
519                 return 0;
520
521         } else if (!args->nvmap_handle &&
522                         !ch->cyclestate.cyclestate_buffer_handler) {
523                 /* no requst from GL */
524                 return 0;
525
526         } else {
527                 pr_err("channel already has cyclestats buffer\n");
528                 return -EINVAL;
529         }
530 }
531 #endif
532
533 static int gk20a_init_error_notifier(struct channel_gk20a *ch,
534                 struct nvhost_set_error_notifier *args) {
535         void *va;
536
537         struct dma_buf *dmabuf;
538
539         if (!args->mem) {
540                 pr_err("gk20a_init_error_notifier: invalid memory handle\n");
541                 return -EINVAL;
542         }
543
544         dmabuf = dma_buf_get(args->mem);
545
546         if (ch->error_notifier_ref)
547                 gk20a_free_error_notifiers(ch);
548
549         if (IS_ERR(dmabuf)) {
550                 pr_err("Invalid handle: %d\n", args->mem);
551                 return -EINVAL;
552         }
553         /* map handle */
554         va = dma_buf_vmap(dmabuf);
555         if (!va) {
556                 dma_buf_put(dmabuf);
557                 pr_err("Cannot map notifier handle\n");
558                 return -ENOMEM;
559         }
560
561         /* set channel notifiers pointer */
562         ch->error_notifier_ref = dmabuf;
563         ch->error_notifier = va + args->offset;
564         ch->error_notifier_va = va;
565         memset(ch->error_notifier, 0, sizeof(struct nvhost_notification));
566         return 0;
567 }
568
569 void gk20a_set_error_notifier(struct channel_gk20a *ch, __u32 error)
570 {
571         if (ch->error_notifier_ref) {
572                 struct timespec time_data;
573                 u64 nsec;
574                 getnstimeofday(&time_data);
575                 nsec = ((u64)time_data.tv_sec) * 1000000000u +
576                                 (u64)time_data.tv_nsec;
577                 ch->error_notifier->time_stamp.nanoseconds[0] =
578                                 (u32)nsec;
579                 ch->error_notifier->time_stamp.nanoseconds[1] =
580                                 (u32)(nsec >> 32);
581                 ch->error_notifier->info32 = error;
582                 ch->error_notifier->status = 0xffff;
583                 gk20a_err(dev_from_gk20a(ch->g),
584                                 "error notifier set to %d\n", error);
585         }
586 }
587
588 static void gk20a_free_error_notifiers(struct channel_gk20a *ch)
589 {
590         if (ch->error_notifier_ref) {
591                 dma_buf_vunmap(ch->error_notifier_ref, ch->error_notifier_va);
592                 dma_buf_put(ch->error_notifier_ref);
593                 ch->error_notifier_ref = 0;
594                 ch->error_notifier = 0;
595                 ch->error_notifier_va = 0;
596         }
597 }
598
599 void gk20a_free_channel(struct channel_gk20a *ch, bool finish)
600 {
601         struct gk20a *g = ch->g;
602         struct device *d = dev_from_gk20a(g);
603         struct fifo_gk20a *f = &g->fifo;
604         struct gr_gk20a *gr = &g->gr;
605         struct vm_gk20a *ch_vm = ch->vm;
606         unsigned long timeout = gk20a_get_gr_idle_timeout(g);
607         struct dbg_session_gk20a *dbg_s;
608
609         gk20a_dbg_fn("");
610
611         /* if engine reset was deferred, perform it now */
612         mutex_lock(&f->deferred_reset_mutex);
613         if (g->fifo.deferred_reset_pending) {
614                 gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, "engine reset was"
615                            " deferred, running now");
616                 fifo_gk20a_finish_mmu_fault_handling(g, g->fifo.mmu_fault_engines);
617                 g->fifo.mmu_fault_engines = 0;
618                 g->fifo.deferred_reset_pending = false;
619         }
620         mutex_unlock(&f->deferred_reset_mutex);
621
622         if (!ch->bound)
623                 return;
624
625         if (!gk20a_channel_as_bound(ch))
626                 goto unbind;
627
628         gk20a_dbg_info("freeing bound channel context, timeout=%ld",
629                         timeout);
630
631         gk20a_disable_channel(ch, finish && !ch->has_timedout, timeout);
632
633         gk20a_free_error_notifiers(ch);
634
635         /* release channel ctx */
636         gk20a_free_channel_ctx(ch);
637
638         gk20a_gr_flush_channel_tlb(gr);
639
640         memset(&ch->ramfc, 0, sizeof(struct mem_desc_sub));
641
642         /* free gpfifo */
643         if (ch->gpfifo.gpu_va)
644                 gk20a_gmmu_unmap(ch_vm, ch->gpfifo.gpu_va,
645                         ch->gpfifo.size, gk20a_mem_flag_none);
646         if (ch->gpfifo.cpu_va)
647                 dma_free_coherent(d, ch->gpfifo.size,
648                         ch->gpfifo.cpu_va, ch->gpfifo.iova);
649         ch->gpfifo.cpu_va = NULL;
650         ch->gpfifo.iova = 0;
651
652         gk20a_mm_l2_invalidate(ch->g);
653
654         memset(&ch->gpfifo, 0, sizeof(struct gpfifo_desc));
655
656 #if defined(CONFIG_GK20A_CYCLE_STATS)
657         gk20a_free_cycle_stats_buffer(ch);
658 #endif
659
660         channel_gk20a_free_priv_cmdbuf(ch);
661
662         /* release channel binding to the as_share */
663         gk20a_as_release_share(ch_vm->as_share);
664
665 unbind:
666         channel_gk20a_unbind(ch);
667         channel_gk20a_free_inst(g, ch);
668
669         ch->vpr = false;
670         ch->vm = NULL;
671
672         if (ch->sync) {
673                 ch->sync->destroy(ch->sync);
674                 ch->sync = NULL;
675         }
676         WARN_ON(ch->sync);
677
678         /* unlink all debug sessions */
679         mutex_lock(&ch->dbg_s_lock);
680
681         list_for_each_entry(dbg_s, &ch->dbg_s_list, dbg_s_list_node) {
682                 dbg_s->ch = NULL;
683                 list_del_init(&dbg_s->dbg_s_list_node);
684         }
685
686         mutex_unlock(&ch->dbg_s_lock);
687
688         /* ALWAYS last */
689         release_used_channel(f, ch);
690 }
691
692 int gk20a_channel_release(struct inode *inode, struct file *filp)
693 {
694         struct channel_gk20a *ch = (struct channel_gk20a *)filp->private_data;
695         struct gk20a *g = ch->g;
696         int err;
697
698         trace_gk20a_channel_release(dev_name(&g->dev->dev));
699
700         err = gk20a_busy(ch->g->dev);
701         if (err) {
702                 gk20a_err(dev_from_gk20a(g), "failed to release channel %d",
703                         ch->hw_chid);
704                 return err;
705         }
706         gk20a_free_channel(ch, true);
707         gk20a_idle(ch->g->dev);
708
709         gk20a_put_client(g);
710         filp->private_data = NULL;
711         return 0;
712 }
713
714 static struct channel_gk20a *gk20a_open_new_channel(struct gk20a *g)
715 {
716         struct fifo_gk20a *f = &g->fifo;
717         struct channel_gk20a *ch;
718
719         ch = acquire_unused_channel(f);
720         if (ch == NULL) {
721                 /* TBD: we want to make this virtualizable */
722                 gk20a_err(dev_from_gk20a(g), "out of hw chids");
723                 return 0;
724         }
725
726         ch->g = g;
727
728         if (channel_gk20a_alloc_inst(g, ch)) {
729                 ch->in_use = false;
730                 gk20a_err(dev_from_gk20a(g),
731                            "failed to open gk20a channel, out of inst mem");
732
733                 return 0;
734         }
735         g->ops.fifo.bind_channel(ch);
736         ch->pid = current->pid;
737
738         /* reset timeout counter and update timestamp */
739         ch->timeout_accumulated_ms = 0;
740         ch->timeout_gpfifo_get = 0;
741         /* set gr host default timeout */
742         ch->timeout_ms_max = gk20a_get_gr_idle_timeout(g);
743         ch->timeout_debug_dump = true;
744         ch->has_timedout = false;
745
746         /* The channel is *not* runnable at this point. It still needs to have
747          * an address space bound and allocate a gpfifo and grctx. */
748
749         init_waitqueue_head(&ch->notifier_wq);
750         init_waitqueue_head(&ch->semaphore_wq);
751         init_waitqueue_head(&ch->submit_wq);
752
753         return ch;
754 }
755
756 static int __gk20a_channel_open(struct gk20a *g, struct file *filp)
757 {
758         int err;
759         struct channel_gk20a *ch;
760
761         trace_gk20a_channel_open(dev_name(&g->dev->dev));
762
763         err = gk20a_get_client(g);
764         if (err) {
765                 gk20a_err(dev_from_gk20a(g),
766                         "failed to get client ref");
767                 return err;
768         }
769
770         err = gk20a_busy(g->dev);
771         if (err) {
772                 gk20a_put_client(g);
773                 gk20a_err(dev_from_gk20a(g), "failed to power on, %d", err);
774                 return err;
775         }
776         ch = gk20a_open_new_channel(g);
777         gk20a_idle(g->dev);
778         if (!ch) {
779                 gk20a_put_client(g);
780                 gk20a_err(dev_from_gk20a(g),
781                         "failed to get f");
782                 return -ENOMEM;
783         }
784
785         filp->private_data = ch;
786         return 0;
787 }
788
789 int gk20a_channel_open(struct inode *inode, struct file *filp)
790 {
791         struct gk20a *g = container_of(inode->i_cdev,
792                         struct gk20a, channel.cdev);
793         return __gk20a_channel_open(g, filp);
794 }
795
796 /* allocate private cmd buffer.
797    used for inserting commands before/after user submitted buffers. */
798 static int channel_gk20a_alloc_priv_cmdbuf(struct channel_gk20a *c)
799 {
800         struct device *d = dev_from_gk20a(c->g);
801         struct vm_gk20a *ch_vm = c->vm;
802         struct priv_cmd_queue *q = &c->priv_cmd_q;
803         struct priv_cmd_entry *e;
804         u32 i = 0, size;
805         int err = 0;
806         struct sg_table *sgt;
807         dma_addr_t iova;
808
809         /* Kernel can insert gpfifos before and after user gpfifos.
810            Before user gpfifos, kernel inserts fence_wait, which takes
811            syncpoint_a (2 dwords) + syncpoint_b (2 dwords) = 4 dwords.
812            After user gpfifos, kernel inserts fence_get, which takes
813            wfi (2 dwords) + syncpoint_a (2 dwords) + syncpoint_b (2 dwords)
814            = 6 dwords.
815            Worse case if kernel adds both of them for every user gpfifo,
816            max size of priv_cmdbuf is :
817            (gpfifo entry number * (2 / 3) * (4 + 6) * 4 bytes */
818         size = roundup_pow_of_two(
819                 c->gpfifo.entry_num * 2 * 10 * sizeof(u32) / 3);
820
821         q->mem.base_cpuva = dma_alloc_coherent(d, size,
822                                         &iova,
823                                         GFP_KERNEL);
824         if (!q->mem.base_cpuva) {
825                 gk20a_err(d, "%s: memory allocation failed\n", __func__);
826                 err = -ENOMEM;
827                 goto clean_up;
828         }
829
830         q->mem.base_iova = iova;
831         q->mem.size = size;
832
833         err = gk20a_get_sgtable(d, &sgt,
834                         q->mem.base_cpuva, q->mem.base_iova, size);
835         if (err) {
836                 gk20a_err(d, "%s: failed to create sg table\n", __func__);
837                 goto clean_up;
838         }
839
840         memset(q->mem.base_cpuva, 0, size);
841
842         q->base_gpuva = gk20a_gmmu_map(ch_vm, &sgt,
843                                         size,
844                                         0, /* flags */
845                                         gk20a_mem_flag_none);
846         if (!q->base_gpuva) {
847                 gk20a_err(d, "ch %d : failed to map gpu va"
848                            "for priv cmd buffer", c->hw_chid);
849                 err = -ENOMEM;
850                 goto clean_up_sgt;
851         }
852
853         q->size = q->mem.size / sizeof (u32);
854
855         INIT_LIST_HEAD(&q->head);
856         INIT_LIST_HEAD(&q->free);
857
858         /* pre-alloc 25% of priv cmdbuf entries and put them on free list */
859         for (i = 0; i < q->size / 4; i++) {
860                 e = kzalloc(sizeof(struct priv_cmd_entry), GFP_KERNEL);
861                 if (!e) {
862                         gk20a_err(d, "ch %d: fail to pre-alloc cmd entry",
863                                 c->hw_chid);
864                         err = -ENOMEM;
865                         goto clean_up_sgt;
866                 }
867                 e->pre_alloc = true;
868                 list_add(&e->list, &q->free);
869         }
870
871         gk20a_free_sgtable(&sgt);
872
873         return 0;
874
875 clean_up_sgt:
876         gk20a_free_sgtable(&sgt);
877 clean_up:
878         channel_gk20a_free_priv_cmdbuf(c);
879         return err;
880 }
881
882 static void channel_gk20a_free_priv_cmdbuf(struct channel_gk20a *c)
883 {
884         struct device *d = dev_from_gk20a(c->g);
885         struct vm_gk20a *ch_vm = c->vm;
886         struct priv_cmd_queue *q = &c->priv_cmd_q;
887         struct priv_cmd_entry *e;
888         struct list_head *pos, *tmp, *head;
889
890         if (q->size == 0)
891                 return;
892
893         if (q->base_gpuva)
894                 gk20a_gmmu_unmap(ch_vm, q->base_gpuva,
895                                 q->mem.size, gk20a_mem_flag_none);
896         if (q->mem.base_cpuva)
897                 dma_free_coherent(d, q->mem.size,
898                         q->mem.base_cpuva, q->mem.base_iova);
899         q->mem.base_cpuva = NULL;
900         q->mem.base_iova = 0;
901
902         /* free used list */
903         head = &q->head;
904         list_for_each_safe(pos, tmp, head) {
905                 e = container_of(pos, struct priv_cmd_entry, list);
906                 free_priv_cmdbuf(c, e);
907         }
908
909         /* free free list */
910         head = &q->free;
911         list_for_each_safe(pos, tmp, head) {
912                 e = container_of(pos, struct priv_cmd_entry, list);
913                 e->pre_alloc = false;
914                 free_priv_cmdbuf(c, e);
915         }
916
917         memset(q, 0, sizeof(struct priv_cmd_queue));
918 }
919
920 /* allocate a cmd buffer with given size. size is number of u32 entries */
921 int gk20a_channel_alloc_priv_cmdbuf(struct channel_gk20a *c, u32 orig_size,
922                              struct priv_cmd_entry **entry)
923 {
924         struct priv_cmd_queue *q = &c->priv_cmd_q;
925         struct priv_cmd_entry *e;
926         struct list_head *node;
927         u32 free_count;
928         u32 size = orig_size;
929         bool no_retry = false;
930
931         gk20a_dbg_fn("size %d", orig_size);
932
933         *entry = NULL;
934
935         /* if free space in the end is less than requested, increase the size
936          * to make the real allocated space start from beginning. */
937         if (q->put + size > q->size)
938                 size = orig_size + (q->size - q->put);
939
940         gk20a_dbg_info("ch %d: priv cmd queue get:put %d:%d",
941                         c->hw_chid, q->get, q->put);
942
943 TRY_AGAIN:
944         free_count = (q->size - (q->put - q->get) - 1) % q->size;
945
946         if (size > free_count) {
947                 if (!no_retry) {
948                         recycle_priv_cmdbuf(c);
949                         no_retry = true;
950                         goto TRY_AGAIN;
951                 } else
952                         return -EAGAIN;
953         }
954
955         if (unlikely(list_empty(&q->free))) {
956
957                 gk20a_dbg_info("ch %d: run out of pre-alloc entries",
958                         c->hw_chid);
959
960                 e = kzalloc(sizeof(struct priv_cmd_entry), GFP_KERNEL);
961                 if (!e) {
962                         gk20a_err(dev_from_gk20a(c->g),
963                                 "ch %d: fail to allocate priv cmd entry",
964                                 c->hw_chid);
965                         return -ENOMEM;
966                 }
967         } else  {
968                 node = q->free.next;
969                 list_del(node);
970                 e = container_of(node, struct priv_cmd_entry, list);
971         }
972
973         e->size = orig_size;
974         e->gp_get = c->gpfifo.get;
975         e->gp_put = c->gpfifo.put;
976         e->gp_wrap = c->gpfifo.wrap;
977
978         /* if we have increased size to skip free space in the end, set put
979            to beginning of cmd buffer (0) + size */
980         if (size != orig_size) {
981                 e->ptr = q->mem.base_cpuva;
982                 e->gva = q->base_gpuva;
983                 q->put = orig_size;
984         } else {
985                 e->ptr = q->mem.base_cpuva + q->put;
986                 e->gva = q->base_gpuva + q->put * sizeof(u32);
987                 q->put = (q->put + orig_size) & (q->size - 1);
988         }
989
990         /* we already handled q->put + size > q->size so BUG_ON this */
991         BUG_ON(q->put > q->size);
992
993         /* add new entry to head since we free from head */
994         list_add(&e->list, &q->head);
995
996         *entry = e;
997
998         gk20a_dbg_fn("done");
999
1000         return 0;
1001 }
1002
1003 /* Don't call this to free an explict cmd entry.
1004  * It doesn't update priv_cmd_queue get/put */
1005 static void free_priv_cmdbuf(struct channel_gk20a *c,
1006                              struct priv_cmd_entry *e)
1007 {
1008         struct priv_cmd_queue *q = &c->priv_cmd_q;
1009
1010         if (!e)
1011                 return;
1012
1013         list_del(&e->list);
1014
1015         if (unlikely(!e->pre_alloc))
1016                 kfree(e);
1017         else {
1018                 memset(e, 0, sizeof(struct priv_cmd_entry));
1019                 e->pre_alloc = true;
1020                 list_add(&e->list, &q->free);
1021         }
1022 }
1023
1024 /* free entries if they're no longer being used */
1025 static void recycle_priv_cmdbuf(struct channel_gk20a *c)
1026 {
1027         struct priv_cmd_queue *q = &c->priv_cmd_q;
1028         struct priv_cmd_entry *e, *tmp;
1029         struct list_head *head = &q->head;
1030         bool wrap_around, found = false;
1031
1032         gk20a_dbg_fn("");
1033
1034         /* Find the most recent free entry. Free it and everything before it */
1035         list_for_each_entry(e, head, list) {
1036
1037                 gk20a_dbg_info("ch %d: cmd entry get:put:wrap %d:%d:%d "
1038                         "curr get:put:wrap %d:%d:%d",
1039                         c->hw_chid, e->gp_get, e->gp_put, e->gp_wrap,
1040                         c->gpfifo.get, c->gpfifo.put, c->gpfifo.wrap);
1041
1042                 wrap_around = (c->gpfifo.wrap != e->gp_wrap);
1043                 if (e->gp_get < e->gp_put) {
1044                         if (c->gpfifo.get >= e->gp_put ||
1045                             wrap_around) {
1046                                 found = true;
1047                                 break;
1048                         } else
1049                                 e->gp_get = c->gpfifo.get;
1050                 } else if (e->gp_get > e->gp_put) {
1051                         if (wrap_around &&
1052                             c->gpfifo.get >= e->gp_put) {
1053                                 found = true;
1054                                 break;
1055                         } else
1056                                 e->gp_get = c->gpfifo.get;
1057                 }
1058         }
1059
1060         if (found)
1061                 q->get = (e->ptr - q->mem.base_cpuva) + e->size;
1062         else {
1063                 gk20a_dbg_info("no free entry recycled");
1064                 return;
1065         }
1066
1067         list_for_each_entry_safe_continue(e, tmp, head, list) {
1068                 free_priv_cmdbuf(c, e);
1069         }
1070
1071         gk20a_dbg_fn("done");
1072 }
1073
1074
1075 static int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c,
1076                                       struct nvhost_alloc_gpfifo_args *args)
1077 {
1078         struct gk20a *g = c->g;
1079         struct device *d = dev_from_gk20a(g);
1080         struct vm_gk20a *ch_vm;
1081         u32 gpfifo_size;
1082         int err = 0;
1083         struct sg_table *sgt;
1084         dma_addr_t iova;
1085
1086         /* Kernel can insert one extra gpfifo entry before user submitted gpfifos
1087            and another one after, for internal usage. Triple the requested size. */
1088         gpfifo_size = roundup_pow_of_two(args->num_entries * 3);
1089
1090         if (args->flags & NVHOST_ALLOC_GPFIFO_FLAGS_VPR_ENABLED)
1091                 c->vpr = true;
1092
1093         /* an address space needs to have been bound at this point.   */
1094         if (!gk20a_channel_as_bound(c)) {
1095                 gk20a_err(d,
1096                             "not bound to an address space at time of gpfifo"
1097                             " allocation.  Attempting to create and bind to"
1098                             " one...");
1099                 return -EINVAL;
1100         }
1101         ch_vm = c->vm;
1102
1103         c->cmds_pending = false;
1104         c->last_submit_fence.valid = false;
1105
1106         c->ramfc.offset = 0;
1107         c->ramfc.size = ram_in_ramfc_s() / 8;
1108
1109         if (c->gpfifo.cpu_va) {
1110                 gk20a_err(d, "channel %d :"
1111                            "gpfifo already allocated", c->hw_chid);
1112                 return -EEXIST;
1113         }
1114
1115         c->gpfifo.size = gpfifo_size * sizeof(struct gpfifo);
1116         c->gpfifo.cpu_va = (struct gpfifo *)dma_alloc_coherent(d,
1117                                                 c->gpfifo.size,
1118                                                 &iova,
1119                                                 GFP_KERNEL);
1120         if (!c->gpfifo.cpu_va) {
1121                 gk20a_err(d, "%s: memory allocation failed\n", __func__);
1122                 err = -ENOMEM;
1123                 goto clean_up;
1124         }
1125
1126         c->gpfifo.iova = iova;
1127         c->gpfifo.entry_num = gpfifo_size;
1128
1129         c->gpfifo.get = c->gpfifo.put = 0;
1130
1131         err = gk20a_get_sgtable(d, &sgt,
1132                         c->gpfifo.cpu_va, c->gpfifo.iova, c->gpfifo.size);
1133         if (err) {
1134                 gk20a_err(d, "%s: failed to allocate sg table\n", __func__);
1135                 goto clean_up;
1136         }
1137
1138         c->gpfifo.gpu_va = gk20a_gmmu_map(ch_vm,
1139                                         &sgt,
1140                                         c->gpfifo.size,
1141                                         0, /* flags */
1142                                         gk20a_mem_flag_none);
1143         if (!c->gpfifo.gpu_va) {
1144                 gk20a_err(d, "channel %d : failed to map"
1145                            " gpu_va for gpfifo", c->hw_chid);
1146                 err = -ENOMEM;
1147                 goto clean_up_sgt;
1148         }
1149
1150         gk20a_dbg_info("channel %d : gpfifo_base 0x%016llx, size %d",
1151                 c->hw_chid, c->gpfifo.gpu_va, c->gpfifo.entry_num);
1152
1153         channel_gk20a_setup_ramfc(c, c->gpfifo.gpu_va, c->gpfifo.entry_num);
1154
1155         channel_gk20a_setup_userd(c);
1156         channel_gk20a_commit_userd(c);
1157
1158         gk20a_mm_l2_invalidate(c->g);
1159
1160         /* TBD: setup engine contexts */
1161
1162         err = channel_gk20a_alloc_priv_cmdbuf(c);
1163         if (err)
1164                 goto clean_up_unmap;
1165
1166         err = channel_gk20a_update_runlist(c, true);
1167         if (err)
1168                 goto clean_up_unmap;
1169
1170         gk20a_free_sgtable(&sgt);
1171
1172         gk20a_dbg_fn("done");
1173         return 0;
1174
1175 clean_up_unmap:
1176         gk20a_gmmu_unmap(ch_vm, c->gpfifo.gpu_va,
1177                 c->gpfifo.size, gk20a_mem_flag_none);
1178 clean_up_sgt:
1179         gk20a_free_sgtable(&sgt);
1180 clean_up:
1181         dma_free_coherent(d, c->gpfifo.size,
1182                 c->gpfifo.cpu_va, c->gpfifo.iova);
1183         c->gpfifo.cpu_va = NULL;
1184         c->gpfifo.iova = 0;
1185         memset(&c->gpfifo, 0, sizeof(struct gpfifo_desc));
1186         gk20a_err(d, "fail");
1187         return err;
1188 }
1189
1190 static inline int wfi_cmd_size(void)
1191 {
1192         return 2;
1193 }
1194 void add_wfi_cmd(struct priv_cmd_entry *cmd, int *i)
1195 {
1196         /* wfi */
1197         cmd->ptr[(*i)++] = 0x2001001E;
1198         /* handle, ignored */
1199         cmd->ptr[(*i)++] = 0x00000000;
1200 }
1201
1202 static inline bool check_gp_put(struct gk20a *g,
1203                                 struct channel_gk20a *c)
1204 {
1205         u32 put;
1206         /* gp_put changed unexpectedly since last update? */
1207         put = gk20a_bar1_readl(g,
1208                c->userd_gpu_va + 4 * ram_userd_gp_put_w());
1209         if (c->gpfifo.put != put) {
1210                 /*TBD: BUG_ON/teardown on this*/
1211                 gk20a_err(dev_from_gk20a(g), "gp_put changed unexpectedly "
1212                            "since last update");
1213                 c->gpfifo.put = put;
1214                 return false; /* surprise! */
1215         }
1216         return true; /* checked out ok */
1217 }
1218
1219 /* Update with this periodically to determine how the gpfifo is draining. */
1220 static inline u32 update_gp_get(struct gk20a *g,
1221                                 struct channel_gk20a *c)
1222 {
1223         u32 new_get = gk20a_bar1_readl(g,
1224                 c->userd_gpu_va + sizeof(u32) * ram_userd_gp_get_w());
1225         if (new_get < c->gpfifo.get)
1226                 c->gpfifo.wrap = !c->gpfifo.wrap;
1227         c->gpfifo.get = new_get;
1228         return new_get;
1229 }
1230
1231 static inline u32 gp_free_count(struct channel_gk20a *c)
1232 {
1233         return (c->gpfifo.entry_num - (c->gpfifo.put - c->gpfifo.get) - 1) %
1234                 c->gpfifo.entry_num;
1235 }
1236
1237 bool gk20a_channel_update_and_check_timeout(struct channel_gk20a *ch,
1238                 u32 timeout_delta_ms)
1239 {
1240         u32 gpfifo_get = update_gp_get(ch->g, ch);
1241         /* Count consequent timeout isr */
1242         if (gpfifo_get == ch->timeout_gpfifo_get) {
1243                 /* we didn't advance since previous channel timeout check */
1244                 ch->timeout_accumulated_ms += timeout_delta_ms;
1245         } else {
1246                 /* first timeout isr encountered */
1247                 ch->timeout_accumulated_ms = timeout_delta_ms;
1248         }
1249
1250         ch->timeout_gpfifo_get = gpfifo_get;
1251
1252         return ch->g->timeouts_enabled &&
1253                 ch->timeout_accumulated_ms > ch->timeout_ms_max;
1254 }
1255
1256
1257 /* Issue a syncpoint increment *preceded* by a wait-for-idle
1258  * command.  All commands on the channel will have been
1259  * consumed at the time the fence syncpoint increment occurs.
1260  */
1261 static int gk20a_channel_submit_wfi(struct channel_gk20a *c)
1262 {
1263         struct priv_cmd_entry *cmd = NULL;
1264         struct gk20a *g = c->g;
1265         u32 free_count;
1266         int err;
1267
1268         if (c->has_timedout)
1269                 return -ETIMEDOUT;
1270
1271         update_gp_get(g, c);
1272         free_count = gp_free_count(c);
1273         if (unlikely(!free_count)) {
1274                 gk20a_err(dev_from_gk20a(g),
1275                            "not enough gpfifo space");
1276                 return -EAGAIN;
1277         }
1278
1279         mutex_lock(&c->submit_lock);
1280
1281         if (!c->sync) {
1282                 c->sync = gk20a_channel_sync_create(c);
1283                 if (!c->sync) {
1284                         mutex_unlock(&c->submit_lock);
1285                         return -ENOMEM;
1286                 }
1287         }
1288
1289         err = c->sync->incr_wfi(c->sync, &cmd, &c->last_submit_fence);
1290         if (unlikely(err)) {
1291                 mutex_unlock(&c->submit_lock);
1292                 return err;
1293         }
1294
1295         WARN_ON(!c->last_submit_fence.wfi);
1296
1297         c->gpfifo.cpu_va[c->gpfifo.put].entry0 = u64_lo32(cmd->gva);
1298         c->gpfifo.cpu_va[c->gpfifo.put].entry1 = u64_hi32(cmd->gva) |
1299                 pbdma_gp_entry1_length_f(cmd->size);
1300
1301         c->gpfifo.put = (c->gpfifo.put + 1) & (c->gpfifo.entry_num - 1);
1302
1303         /* save gp_put */
1304         cmd->gp_put = c->gpfifo.put;
1305
1306         gk20a_bar1_writel(g,
1307                 c->userd_gpu_va + 4 * ram_userd_gp_put_w(),
1308                 c->gpfifo.put);
1309
1310         mutex_unlock(&c->submit_lock);
1311
1312         gk20a_dbg_info("post-submit put %d, get %d, size %d",
1313                 c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num);
1314
1315         return 0;
1316 }
1317
1318 static u32 get_gp_free_count(struct channel_gk20a *c)
1319 {
1320         update_gp_get(c->g, c);
1321         return gp_free_count(c);
1322 }
1323
1324 static void trace_write_pushbuffer(struct channel_gk20a *c, struct gpfifo *g)
1325 {
1326         void *mem = NULL;
1327         unsigned int words;
1328         u64 offset;
1329         struct dma_buf *dmabuf = NULL;
1330
1331         if (gk20a_debug_trace_cmdbuf) {
1332                 u64 gpu_va = (u64)g->entry0 |
1333                         (u64)((u64)pbdma_gp_entry1_get_hi_v(g->entry1) << 32);
1334                 int err;
1335
1336                 words = pbdma_gp_entry1_length_v(g->entry1);
1337                 err = gk20a_vm_find_buffer(c->vm, gpu_va, &dmabuf, &offset);
1338                 if (!err)
1339                         mem = dma_buf_vmap(dmabuf);
1340         }
1341
1342         if (mem) {
1343                 u32 i;
1344                 /*
1345                  * Write in batches of 128 as there seems to be a limit
1346                  * of how much you can output to ftrace at once.
1347                  */
1348                 for (i = 0; i < words; i += 128U) {
1349                         trace_gk20a_push_cmdbuf(
1350                                 c->g->dev->name,
1351                                 0,
1352                                 min(words - i, 128U),
1353                                 offset + i * sizeof(u32),
1354                                 mem);
1355                 }
1356                 dma_buf_vunmap(dmabuf, mem);
1357         }
1358 }
1359
1360 static int gk20a_channel_add_job(struct channel_gk20a *c,
1361                                  struct gk20a_channel_fence *fence)
1362 {
1363         struct vm_gk20a *vm = c->vm;
1364         struct channel_gk20a_job *job = NULL;
1365         struct mapped_buffer_node **mapped_buffers = NULL;
1366         int err = 0, num_mapped_buffers;
1367
1368         /* job needs reference to this vm */
1369         gk20a_vm_get(vm);
1370
1371         err = gk20a_vm_get_buffers(vm, &mapped_buffers, &num_mapped_buffers);
1372         if (err) {
1373                 gk20a_vm_put(vm);
1374                 return err;
1375         }
1376
1377         job = kzalloc(sizeof(*job), GFP_KERNEL);
1378         if (!job) {
1379                 gk20a_vm_put_buffers(vm, mapped_buffers, num_mapped_buffers);
1380                 gk20a_vm_put(vm);
1381                 return -ENOMEM;
1382         }
1383
1384         job->num_mapped_buffers = num_mapped_buffers;
1385         job->mapped_buffers = mapped_buffers;
1386         job->fence = *fence;
1387
1388         mutex_lock(&c->jobs_lock);
1389         list_add_tail(&job->list, &c->jobs);
1390         mutex_unlock(&c->jobs_lock);
1391
1392         return 0;
1393 }
1394
1395 void gk20a_channel_update(struct channel_gk20a *c, int nr_completed)
1396 {
1397         struct gk20a *g = c->g;
1398         struct vm_gk20a *vm = c->vm;
1399         struct channel_gk20a_job *job, *n;
1400         int i;
1401
1402         wake_up(&c->submit_wq);
1403
1404         mutex_lock(&c->submit_lock);
1405         mutex_lock(&c->jobs_lock);
1406         list_for_each_entry_safe(job, n, &c->jobs, list) {
1407                 bool completed = WARN_ON(!c->sync) ||
1408                         c->sync->is_expired(c->sync, &job->fence);
1409                 if (!completed)
1410                         break;
1411
1412                 gk20a_vm_put_buffers(vm, job->mapped_buffers,
1413                                 job->num_mapped_buffers);
1414
1415                 /* job is done. release its reference to vm */
1416                 gk20a_vm_put(vm);
1417
1418                 list_del_init(&job->list);
1419                 kfree(job);
1420                 gk20a_idle(g->dev);
1421         }
1422
1423         /*
1424          * If job list is empty then channel is idle and we can free
1425          * the syncpt here (given aggressive_destroy flag is set)
1426          * Note: if WFI is already scheduled on some other path
1427          * then syncpt is still required to check for idle
1428          */
1429         if (list_empty(&c->jobs) && !c->last_submit_fence.wfi) {
1430                 if (c->sync && c->sync->syncpt_aggressive_destroy) {
1431                         c->sync->destroy(c->sync);
1432                         c->sync = NULL;
1433                 }
1434         }
1435         mutex_unlock(&c->jobs_lock);
1436         mutex_unlock(&c->submit_lock);
1437
1438         for (i = 0; i < nr_completed; i++)
1439                 gk20a_idle(c->g->dev);
1440 }
1441
1442 void add_wait_cmd(u32 *ptr, u32 id, u32 thresh)
1443 {
1444         /* syncpoint_a */
1445         ptr[0] = 0x2001001C;
1446         /* payload */
1447         ptr[1] = thresh;
1448         /* syncpoint_b */
1449         ptr[2] = 0x2001001D;
1450         /* syncpt_id, switch_en, wait */
1451         ptr[3] = (id << 8) | 0x10;
1452 }
1453
1454 static int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
1455                                 struct nvhost_gpfifo *gpfifo,
1456                                 u32 num_entries,
1457                                 struct nvhost_fence *fence,
1458                                 u32 flags)
1459 {
1460         struct gk20a *g = c->g;
1461         struct device *d = dev_from_gk20a(g);
1462         int err = 0;
1463         int i;
1464         struct priv_cmd_entry *wait_cmd = NULL;
1465         struct priv_cmd_entry *incr_cmd = NULL;
1466         /* we might need two extra gpfifo entries - one for pre fence
1467          * and one for post fence. */
1468         const int extra_entries = 2;
1469
1470         if (c->has_timedout)
1471                 return -ETIMEDOUT;
1472
1473         if ((flags & (NVHOST_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT |
1474                       NVHOST_SUBMIT_GPFIFO_FLAGS_FENCE_GET)) &&
1475             !fence)
1476                 return -EINVAL;
1477
1478 #ifdef CONFIG_DEBUG_FS
1479         /* update debug settings */
1480         if (g->ops.ltc.sync_debugfs)
1481                 g->ops.ltc.sync_debugfs(g);
1482 #endif
1483
1484         gk20a_dbg_info("channel %d", c->hw_chid);
1485
1486         /* gk20a_channel_update releases this ref. */
1487         err = gk20a_busy(g->dev);
1488         if (err) {
1489                 gk20a_err(d, "failed to host gk20a to submit gpfifo");
1490                 return err;
1491         }
1492
1493         trace_gk20a_channel_submit_gpfifo(c->g->dev->name,
1494                                           c->hw_chid,
1495                                           num_entries,
1496                                           flags,
1497                                           fence->syncpt_id, fence->value);
1498         check_gp_put(g, c);
1499         update_gp_get(g, c);
1500
1501         gk20a_dbg_info("pre-submit put %d, get %d, size %d",
1502                 c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num);
1503
1504         /* Invalidate tlb if it's dirty...                                   */
1505         /* TBD: this should be done in the cmd stream, not with PRIs.        */
1506         /* We don't know what context is currently running...                */
1507         /* Note also: there can be more than one context associated with the */
1508         /* address space (vm).   */
1509         gk20a_mm_tlb_invalidate(c->vm);
1510
1511         /* Make sure we have enough space for gpfifo entries. If not,
1512          * wait for signals from completed submits */
1513         if (gp_free_count(c) < num_entries + extra_entries) {
1514                 err = wait_event_interruptible(c->submit_wq,
1515                         get_gp_free_count(c) >= num_entries + extra_entries ||
1516                         c->has_timedout);
1517         }
1518
1519         if (c->has_timedout) {
1520                 err = -ETIMEDOUT;
1521                 goto clean_up;
1522         }
1523
1524         if (err) {
1525                 gk20a_err(d, "not enough gpfifo space");
1526                 err = -EAGAIN;
1527                 goto clean_up;
1528         }
1529
1530         mutex_lock(&c->submit_lock);
1531
1532         if (!c->sync) {
1533                 c->sync = gk20a_channel_sync_create(c);
1534                 if (!c->sync) {
1535                         err = -ENOMEM;
1536                         mutex_unlock(&c->submit_lock);
1537                         goto clean_up;
1538                 }
1539         }
1540
1541         /*
1542          * optionally insert syncpt wait in the beginning of gpfifo submission
1543          * when user requested and the wait hasn't expired.
1544          * validate that the id makes sense, elide if not
1545          * the only reason this isn't being unceremoniously killed is to
1546          * keep running some tests which trigger this condition
1547          */
1548         if (flags & NVHOST_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT) {
1549                 if (flags & NVHOST_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE)
1550                         err = c->sync->wait_fd(c->sync, fence->syncpt_id,
1551                                         &wait_cmd);
1552                 else
1553                         err = c->sync->wait_syncpt(c->sync, fence->syncpt_id,
1554                                         fence->value, &wait_cmd);
1555         }
1556         if (err) {
1557                 mutex_unlock(&c->submit_lock);
1558                 goto clean_up;
1559         }
1560
1561
1562         /* always insert syncpt increment at end of gpfifo submission
1563            to keep track of method completion for idle railgating */
1564         if (flags & NVHOST_SUBMIT_GPFIFO_FLAGS_FENCE_GET &&
1565                         flags & NVHOST_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE)
1566                 err = c->sync->incr_user_fd(c->sync, &incr_cmd,
1567                                             &c->last_submit_fence,
1568                                             &fence->syncpt_id);
1569         else if (flags & NVHOST_SUBMIT_GPFIFO_FLAGS_FENCE_GET)
1570                 err = c->sync->incr_user_syncpt(c->sync, &incr_cmd,
1571                                                 &c->last_submit_fence,
1572                                                 &fence->syncpt_id,
1573                                                 &fence->value);
1574         else
1575                 err = c->sync->incr(c->sync, &incr_cmd,
1576                                     &c->last_submit_fence);
1577         if (err) {
1578                 mutex_unlock(&c->submit_lock);
1579                 goto clean_up;
1580         }
1581
1582         if (wait_cmd) {
1583                 c->gpfifo.cpu_va[c->gpfifo.put].entry0 =
1584                         u64_lo32(wait_cmd->gva);
1585                 c->gpfifo.cpu_va[c->gpfifo.put].entry1 =
1586                         u64_hi32(wait_cmd->gva) |
1587                         pbdma_gp_entry1_length_f(wait_cmd->size);
1588                 trace_write_pushbuffer(c, &c->gpfifo.cpu_va[c->gpfifo.put]);
1589
1590                 c->gpfifo.put = (c->gpfifo.put + 1) &
1591                         (c->gpfifo.entry_num - 1);
1592
1593                 /* save gp_put */
1594                 wait_cmd->gp_put = c->gpfifo.put;
1595         }
1596
1597         for (i = 0; i < num_entries; i++) {
1598                 c->gpfifo.cpu_va[c->gpfifo.put].entry0 =
1599                         gpfifo[i].entry0; /* cmd buf va low 32 */
1600                 c->gpfifo.cpu_va[c->gpfifo.put].entry1 =
1601                         gpfifo[i].entry1; /* cmd buf va high 32 | words << 10 */
1602                 trace_write_pushbuffer(c, &c->gpfifo.cpu_va[c->gpfifo.put]);
1603                 c->gpfifo.put = (c->gpfifo.put + 1) &
1604                         (c->gpfifo.entry_num - 1);
1605         }
1606
1607         if (incr_cmd) {
1608                 c->gpfifo.cpu_va[c->gpfifo.put].entry0 =
1609                         u64_lo32(incr_cmd->gva);
1610                 c->gpfifo.cpu_va[c->gpfifo.put].entry1 =
1611                         u64_hi32(incr_cmd->gva) |
1612                         pbdma_gp_entry1_length_f(incr_cmd->size);
1613                 trace_write_pushbuffer(c, &c->gpfifo.cpu_va[c->gpfifo.put]);
1614
1615                 c->gpfifo.put = (c->gpfifo.put + 1) &
1616                         (c->gpfifo.entry_num - 1);
1617
1618                 /* save gp_put */
1619                 incr_cmd->gp_put = c->gpfifo.put;
1620         }
1621
1622         /* Invalidate tlb if it's dirty...                                   */
1623         /* TBD: this should be done in the cmd stream, not with PRIs.        */
1624         /* We don't know what context is currently running...                */
1625         /* Note also: there can be more than one context associated with the */
1626         /* address space (vm).   */
1627         gk20a_mm_tlb_invalidate(c->vm);
1628
1629         /* TODO! Check for errors... */
1630         gk20a_channel_add_job(c, &c->last_submit_fence);
1631
1632         c->cmds_pending = true;
1633         gk20a_bar1_writel(g,
1634                 c->userd_gpu_va + 4 * ram_userd_gp_put_w(),
1635                 c->gpfifo.put);
1636
1637         mutex_unlock(&c->submit_lock);
1638
1639         trace_gk20a_channel_submitted_gpfifo(c->g->dev->name,
1640                                              c->hw_chid,
1641                                              num_entries,
1642                                              flags,
1643                                              fence->syncpt_id, fence->value);
1644
1645         gk20a_dbg_info("post-submit put %d, get %d, size %d",
1646                 c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num);
1647
1648         gk20a_dbg_fn("done");
1649         return err;
1650
1651 clean_up:
1652         gk20a_err(d, "fail");
1653         free_priv_cmdbuf(c, wait_cmd);
1654         free_priv_cmdbuf(c, incr_cmd);
1655         gk20a_idle(g->dev);
1656         return err;
1657 }
1658
1659 void gk20a_remove_channel_support(struct channel_gk20a *c)
1660 {
1661
1662 }
1663
1664 int gk20a_init_channel_support(struct gk20a *g, u32 chid)
1665 {
1666         struct channel_gk20a *c = g->fifo.channel+chid;
1667         c->g = g;
1668         c->in_use = false;
1669         c->hw_chid = chid;
1670         c->bound = false;
1671         c->remove_support = gk20a_remove_channel_support;
1672         mutex_init(&c->jobs_lock);
1673         mutex_init(&c->submit_lock);
1674         INIT_LIST_HEAD(&c->jobs);
1675 #if defined(CONFIG_GK20A_CYCLE_STATS)
1676         mutex_init(&c->cyclestate.cyclestate_buffer_mutex);
1677 #endif
1678         INIT_LIST_HEAD(&c->dbg_s_list);
1679         mutex_init(&c->dbg_s_lock);
1680
1681         return 0;
1682 }
1683
1684 int gk20a_channel_finish(struct channel_gk20a *ch, unsigned long timeout)
1685 {
1686         int err = 0;
1687
1688         if (!ch->cmds_pending)
1689                 return 0;
1690
1691         /* Do not wait for a timedout channel */
1692         if (ch->has_timedout)
1693                 return -ETIMEDOUT;
1694
1695         if (!(ch->last_submit_fence.valid && ch->last_submit_fence.wfi)) {
1696                 gk20a_dbg_fn("issuing wfi, incr to finish the channel");
1697                 err = gk20a_channel_submit_wfi(ch);
1698         }
1699         if (err)
1700                 return err;
1701
1702         BUG_ON(!(ch->last_submit_fence.valid && ch->last_submit_fence.wfi));
1703
1704         gk20a_dbg_fn("waiting for channel to finish thresh:%d",
1705                       ch->last_submit_fence.thresh);
1706
1707         if (ch->sync) {
1708                 err = ch->sync->wait_cpu(ch->sync, &ch->last_submit_fence,
1709                                                                 timeout);
1710                 if (WARN_ON(err))
1711                         dev_warn(dev_from_gk20a(ch->g),
1712                                "timed out waiting for gk20a channel to finish");
1713                 else
1714                         ch->cmds_pending = false;
1715         }
1716
1717         return err;
1718 }
1719
1720 static int gk20a_channel_wait_semaphore(struct channel_gk20a *ch,
1721                                         ulong id, u32 offset,
1722                                         u32 payload, long timeout)
1723 {
1724         struct platform_device *pdev = ch->g->dev;
1725         struct dma_buf *dmabuf;
1726         void *data;
1727         u32 *semaphore;
1728         int ret = 0;
1729         long remain;
1730
1731         /* do not wait if channel has timed out */
1732         if (ch->has_timedout)
1733                 return -ETIMEDOUT;
1734
1735         dmabuf = dma_buf_get(id);
1736         if (IS_ERR(dmabuf)) {
1737                 gk20a_err(&pdev->dev, "invalid notifier nvmap handle 0x%lx",
1738                            id);
1739                 return -EINVAL;
1740         }
1741
1742         data = dma_buf_kmap(dmabuf, offset >> PAGE_SHIFT);
1743         if (!data) {
1744                 gk20a_err(&pdev->dev, "failed to map notifier memory");
1745                 ret = -EINVAL;
1746                 goto cleanup_put;
1747         }
1748
1749         semaphore = data + (offset & ~PAGE_MASK);
1750
1751         remain = wait_event_interruptible_timeout(
1752                         ch->semaphore_wq,
1753                         *semaphore == payload || ch->has_timedout,
1754                         timeout);
1755
1756         if (remain == 0 && *semaphore != payload)
1757                 ret = -ETIMEDOUT;
1758         else if (remain < 0)
1759                 ret = remain;
1760
1761         dma_buf_kunmap(dmabuf, offset >> PAGE_SHIFT, data);
1762 cleanup_put:
1763         dma_buf_put(dmabuf);
1764         return ret;
1765 }
1766
1767 static int gk20a_channel_wait(struct channel_gk20a *ch,
1768                               struct nvhost_wait_args *args)
1769 {
1770         struct device *d = dev_from_gk20a(ch->g);
1771         struct dma_buf *dmabuf;
1772         struct notification *notif;
1773         struct timespec tv;
1774         u64 jiffies;
1775         ulong id;
1776         u32 offset;
1777         unsigned long timeout;
1778         int remain, ret = 0;
1779
1780         gk20a_dbg_fn("");
1781
1782         if (ch->has_timedout)
1783                 return -ETIMEDOUT;
1784
1785         if (args->timeout == NVHOST_NO_TIMEOUT)
1786                 timeout = MAX_SCHEDULE_TIMEOUT;
1787         else
1788                 timeout = (u32)msecs_to_jiffies(args->timeout);
1789
1790         switch (args->type) {
1791         case NVHOST_WAIT_TYPE_NOTIFIER:
1792                 id = args->condition.notifier.nvmap_handle;
1793                 offset = args->condition.notifier.offset;
1794
1795                 dmabuf = dma_buf_get(id);
1796                 if (IS_ERR(dmabuf)) {
1797                         gk20a_err(d, "invalid notifier nvmap handle 0x%lx",
1798                                    id);
1799                         return -EINVAL;
1800                 }
1801
1802                 notif = dma_buf_vmap(dmabuf);
1803                 if (!notif) {
1804                         gk20a_err(d, "failed to map notifier memory");
1805                         return -ENOMEM;
1806                 }
1807
1808                 notif = (struct notification *)((uintptr_t)notif + offset);
1809
1810                 /* user should set status pending before
1811                  * calling this ioctl */
1812                 remain = wait_event_interruptible_timeout(
1813                                 ch->notifier_wq,
1814                                 notif->status == 0 || ch->has_timedout,
1815                                 timeout);
1816
1817                 if (remain == 0 && notif->status != 0) {
1818                         ret = -ETIMEDOUT;
1819                         goto notif_clean_up;
1820                 } else if (remain < 0) {
1821                         ret = -EINTR;
1822                         goto notif_clean_up;
1823                 }
1824
1825                 /* TBD: fill in correct information */
1826                 jiffies = get_jiffies_64();
1827                 jiffies_to_timespec(jiffies, &tv);
1828                 notif->timestamp.nanoseconds[0] = tv.tv_nsec;
1829                 notif->timestamp.nanoseconds[1] = tv.tv_sec;
1830                 notif->info32 = 0xDEADBEEF; /* should be object name */
1831                 notif->info16 = ch->hw_chid; /* should be method offset */
1832
1833 notif_clean_up:
1834                 dma_buf_vunmap(dmabuf, notif);
1835                 return ret;
1836
1837         case NVHOST_WAIT_TYPE_SEMAPHORE:
1838                 ret = gk20a_channel_wait_semaphore(ch,
1839                                 args->condition.semaphore.nvmap_handle,
1840                                 args->condition.semaphore.offset,
1841                                 args->condition.semaphore.payload,
1842                                 timeout);
1843
1844                 break;
1845
1846         default:
1847                 ret = -EINVAL;
1848                 break;
1849         }
1850
1851         return ret;
1852 }
1853
1854 static int gk20a_channel_set_priority(struct channel_gk20a *ch,
1855                 u32 priority)
1856 {
1857         u32 timeslice_timeout;
1858         /* set priority of graphics channel */
1859         switch (priority) {
1860         case NVHOST_PRIORITY_LOW:
1861                 /* 64 << 3 = 512us */
1862                 timeslice_timeout = 64;
1863                 break;
1864         case NVHOST_PRIORITY_MEDIUM:
1865                 /* 128 << 3 = 1024us */
1866                 timeslice_timeout = 128;
1867                 break;
1868         case NVHOST_PRIORITY_HIGH:
1869                 /* 255 << 3 = 2048us */
1870                 timeslice_timeout = 255;
1871                 break;
1872         default:
1873                 pr_err("Unsupported priority");
1874                 return -EINVAL;
1875         }
1876         channel_gk20a_set_schedule_params(ch,
1877                         timeslice_timeout);
1878         return 0;
1879 }
1880
1881 static int gk20a_channel_zcull_bind(struct channel_gk20a *ch,
1882                             struct nvhost_zcull_bind_args *args)
1883 {
1884         struct gk20a *g = ch->g;
1885         struct gr_gk20a *gr = &g->gr;
1886
1887         gk20a_dbg_fn("");
1888
1889         return gr_gk20a_bind_ctxsw_zcull(g, gr, ch,
1890                                 args->gpu_va, args->mode);
1891 }
1892
1893 /* in this context the "channel" is the host1x channel which
1894  * maps to *all* gk20a channels */
1895 int gk20a_channel_suspend(struct gk20a *g)
1896 {
1897         struct fifo_gk20a *f = &g->fifo;
1898         u32 chid;
1899         bool channels_in_use = false;
1900         struct device *d = dev_from_gk20a(g);
1901         int err;
1902
1903         gk20a_dbg_fn("");
1904
1905         /* idle the engine by submitting WFI on non-KEPLER_C channel */
1906         for (chid = 0; chid < f->num_channels; chid++) {
1907                 struct channel_gk20a *c = &f->channel[chid];
1908                 if (c->in_use && c->obj_class != KEPLER_C) {
1909                         gk20a_platform_channel_busy(g->dev);
1910                         err = gk20a_channel_submit_wfi(c);
1911                         if (err) {
1912                                 gk20a_err(d, "cannot idle channel %d\n",
1913                                                 chid);
1914                                 return err;
1915                         }
1916
1917                         if (c->sync)
1918                                 c->sync->wait_cpu(c->sync,
1919                                                 &c->last_submit_fence, 500000);
1920                         gk20a_platform_channel_idle(g->dev);
1921                         break;
1922                 }
1923         }
1924
1925         for (chid = 0; chid < f->num_channels; chid++) {
1926                 if (f->channel[chid].in_use) {
1927
1928                         gk20a_dbg_info("suspend channel %d", chid);
1929                         /* disable channel */
1930                         gk20a_writel(g, ccsr_channel_r(chid),
1931                                 gk20a_readl(g, ccsr_channel_r(chid)) |
1932                                 ccsr_channel_enable_clr_true_f());
1933                         /* preempt the channel */
1934                         gk20a_fifo_preempt_channel(g, chid);
1935
1936                         channels_in_use = true;
1937                 }
1938         }
1939
1940         if (channels_in_use) {
1941                 gk20a_fifo_update_runlist(g, 0, ~0, false, true);
1942
1943                 for (chid = 0; chid < f->num_channels; chid++) {
1944                         if (f->channel[chid].in_use)
1945                                 channel_gk20a_unbind(&f->channel[chid]);
1946                 }
1947         }
1948
1949         gk20a_dbg_fn("done");
1950         return 0;
1951 }
1952
1953 /* in this context the "channel" is the host1x channel which
1954  * maps to *all* gk20a channels */
1955 int gk20a_channel_resume(struct gk20a *g)
1956 {
1957         struct fifo_gk20a *f = &g->fifo;
1958         u32 chid;
1959         bool channels_in_use = false;
1960
1961         gk20a_dbg_fn("");
1962
1963         for (chid = 0; chid < f->num_channels; chid++) {
1964                 if (f->channel[chid].in_use) {
1965                         gk20a_dbg_info("resume channel %d", chid);
1966                         g->ops.fifo.bind_channel(&f->channel[chid]);
1967                         channels_in_use = true;
1968                 }
1969         }
1970
1971         if (channels_in_use)
1972                 gk20a_fifo_update_runlist(g, 0, ~0, true, true);
1973
1974         gk20a_dbg_fn("done");
1975         return 0;
1976 }
1977
1978 void gk20a_channel_semaphore_wakeup(struct gk20a *g)
1979 {
1980         struct fifo_gk20a *f = &g->fifo;
1981         u32 chid;
1982
1983         gk20a_dbg_fn("");
1984
1985         for (chid = 0; chid < f->num_channels; chid++) {
1986                 struct channel_gk20a *c = g->fifo.channel+chid;
1987                 if (c->in_use)
1988                         wake_up_interruptible_all(&c->semaphore_wq);
1989         }
1990 }
1991
1992 static int gk20a_ioctl_channel_submit_gpfifo(
1993         struct channel_gk20a *ch,
1994         struct nvhost_submit_gpfifo_args *args)
1995 {
1996         void *gpfifo;
1997         u32 size;
1998         int ret = 0;
1999
2000         gk20a_dbg_fn("");
2001
2002         if (ch->has_timedout)
2003                 return -ETIMEDOUT;
2004
2005         size = args->num_entries * sizeof(struct nvhost_gpfifo);
2006
2007         gpfifo = kzalloc(size, GFP_KERNEL);
2008         if (!gpfifo)
2009                 return -ENOMEM;
2010
2011         if (copy_from_user(gpfifo,
2012                            (void __user *)(uintptr_t)args->gpfifo, size)) {
2013                 ret = -EINVAL;
2014                 goto clean_up;
2015         }
2016
2017         ret = gk20a_submit_channel_gpfifo(ch, gpfifo, args->num_entries,
2018                                         &args->fence, args->flags);
2019
2020 clean_up:
2021         kfree(gpfifo);
2022         return ret;
2023 }
2024
2025 void gk20a_init_channel(struct gpu_ops *gops)
2026 {
2027         gops->fifo.bind_channel = channel_gk20a_bind;
2028 }
2029
2030 long gk20a_channel_ioctl(struct file *filp,
2031         unsigned int cmd, unsigned long arg)
2032 {
2033         struct channel_gk20a *ch = filp->private_data;
2034         struct platform_device *dev = ch->g->dev;
2035         u8 buf[NVHOST_IOCTL_CHANNEL_MAX_ARG_SIZE];
2036         int err = 0;
2037
2038         if ((_IOC_TYPE(cmd) != NVHOST_IOCTL_MAGIC) ||
2039                 (_IOC_NR(cmd) == 0) ||
2040                 (_IOC_NR(cmd) > NVHOST_IOCTL_CHANNEL_LAST) ||
2041                 (_IOC_SIZE(cmd) > NVHOST_IOCTL_CHANNEL_MAX_ARG_SIZE))
2042                 return -EFAULT;
2043
2044         if (_IOC_DIR(cmd) & _IOC_WRITE) {
2045                 if (copy_from_user(buf, (void __user *)arg, _IOC_SIZE(cmd)))
2046                         return -EFAULT;
2047         }
2048
2049         switch (cmd) {
2050         case NVHOST_IOCTL_CHANNEL_OPEN:
2051         {
2052                 int fd;
2053                 struct file *file;
2054                 char *name;
2055
2056                 err = get_unused_fd_flags(O_RDWR);
2057                 if (err < 0)
2058                         break;
2059                 fd = err;
2060
2061                 name = kasprintf(GFP_KERNEL, "nvhost-%s-fd%d",
2062                                 dev_name(&dev->dev), fd);
2063                 if (!name) {
2064                         err = -ENOMEM;
2065                         put_unused_fd(fd);
2066                         break;
2067                 }
2068
2069                 file = anon_inode_getfile(name, filp->f_op, NULL, O_RDWR);
2070                 kfree(name);
2071                 if (IS_ERR(file)) {
2072                         err = PTR_ERR(file);
2073                         put_unused_fd(fd);
2074                         break;
2075                 }
2076                 fd_install(fd, file);
2077
2078                 err = __gk20a_channel_open(ch->g, file);
2079                 if (err) {
2080                         put_unused_fd(fd);
2081                         fput(file);
2082                         break;
2083                 }
2084
2085                 ((struct nvhost_channel_open_args *)buf)->channel_fd = fd;
2086                 break;
2087         }
2088         case NVHOST_IOCTL_CHANNEL_SET_NVMAP_FD:
2089                 break;
2090         case NVHOST_IOCTL_CHANNEL_ALLOC_OBJ_CTX:
2091                 err = gk20a_busy(dev);
2092                 if (err) {
2093                         dev_err(&dev->dev,
2094                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2095                                 __func__, cmd);
2096                         return err;
2097                 }
2098                 err = gk20a_alloc_obj_ctx(ch,
2099                                 (struct nvhost_alloc_obj_ctx_args *)buf);
2100                 gk20a_idle(dev);
2101                 break;
2102         case NVHOST_IOCTL_CHANNEL_FREE_OBJ_CTX:
2103                 err = gk20a_busy(dev);
2104                 if (err) {
2105                         dev_err(&dev->dev,
2106                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2107                                 __func__, cmd);
2108                         return err;
2109                 }
2110                 err = gk20a_free_obj_ctx(ch,
2111                                 (struct nvhost_free_obj_ctx_args *)buf);
2112                 gk20a_idle(dev);
2113                 break;
2114         case NVHOST_IOCTL_CHANNEL_ALLOC_GPFIFO:
2115                 err = gk20a_busy(dev);
2116                 if (err) {
2117                         dev_err(&dev->dev,
2118                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2119                                 __func__, cmd);
2120                         return err;
2121                 }
2122                 err = gk20a_alloc_channel_gpfifo(ch,
2123                                 (struct nvhost_alloc_gpfifo_args *)buf);
2124                 gk20a_idle(dev);
2125                 break;
2126         case NVHOST_IOCTL_CHANNEL_SUBMIT_GPFIFO:
2127                 err = gk20a_ioctl_channel_submit_gpfifo(ch,
2128                                 (struct nvhost_submit_gpfifo_args *)buf);
2129                 break;
2130         case NVHOST_IOCTL_CHANNEL_WAIT:
2131                 err = gk20a_busy(dev);
2132                 if (err) {
2133                         dev_err(&dev->dev,
2134                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2135                                 __func__, cmd);
2136                         return err;
2137                 }
2138                 err = gk20a_channel_wait(ch,
2139                                 (struct nvhost_wait_args *)buf);
2140                 gk20a_idle(dev);
2141                 break;
2142         case NVHOST_IOCTL_CHANNEL_ZCULL_BIND:
2143                 err = gk20a_busy(dev);
2144                 if (err) {
2145                         dev_err(&dev->dev,
2146                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2147                                 __func__, cmd);
2148                         return err;
2149                 }
2150                 err = gk20a_channel_zcull_bind(ch,
2151                                 (struct nvhost_zcull_bind_args *)buf);
2152                 gk20a_idle(dev);
2153                 break;
2154         case NVHOST_IOCTL_CHANNEL_SET_ERROR_NOTIFIER:
2155                 err = gk20a_busy(dev);
2156                 if (err) {
2157                         dev_err(&dev->dev,
2158                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2159                                 __func__, cmd);
2160                         return err;
2161                 }
2162                 err = gk20a_init_error_notifier(ch,
2163                                 (struct nvhost_set_error_notifier *)buf);
2164                 gk20a_idle(dev);
2165                 break;
2166 #ifdef CONFIG_GK20A_CYCLE_STATS
2167         case NVHOST_IOCTL_CHANNEL_CYCLE_STATS:
2168                 err = gk20a_busy(dev);
2169                 if (err) {
2170                         dev_err(&dev->dev,
2171                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2172                                 __func__, cmd);
2173                         return err;
2174                 }
2175                 err = gk20a_channel_cycle_stats(ch,
2176                                 (struct nvhost_cycle_stats_args *)buf);
2177                 gk20a_idle(dev);
2178                 break;
2179 #endif
2180         case NVHOST_IOCTL_CHANNEL_SET_TIMEOUT:
2181         {
2182                 u32 timeout =
2183                         (u32)((struct nvhost_set_timeout_args *)buf)->timeout;
2184                 gk20a_dbg(gpu_dbg_gpu_dbg, "setting timeout (%d ms) for chid %d",
2185                            timeout, ch->hw_chid);
2186                 ch->timeout_ms_max = timeout;
2187                 break;
2188         }
2189         case NVHOST_IOCTL_CHANNEL_SET_TIMEOUT_EX:
2190         {
2191                 u32 timeout =
2192                         (u32)((struct nvhost_set_timeout_args *)buf)->timeout;
2193                 bool timeout_debug_dump = !((u32)
2194                         ((struct nvhost_set_timeout_ex_args *)buf)->flags &
2195                         (1 << NVHOST_TIMEOUT_FLAG_DISABLE_DUMP));
2196                 gk20a_dbg(gpu_dbg_gpu_dbg, "setting timeout (%d ms) for chid %d",
2197                            timeout, ch->hw_chid);
2198                 ch->timeout_ms_max = timeout;
2199                 ch->timeout_debug_dump = timeout_debug_dump;
2200                 break;
2201         }
2202         case NVHOST_IOCTL_CHANNEL_GET_TIMEDOUT:
2203                 ((struct nvhost_get_param_args *)buf)->value =
2204                         ch->has_timedout;
2205                 break;
2206         case NVHOST_IOCTL_CHANNEL_SET_PRIORITY:
2207                 err = gk20a_busy(dev);
2208                 if (err) {
2209                         dev_err(&dev->dev,
2210                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2211                                 __func__, cmd);
2212                         return err;
2213                 }
2214                 gk20a_channel_set_priority(ch,
2215                         ((struct nvhost_set_priority_args *)buf)->priority);
2216                 gk20a_idle(dev);
2217                 break;
2218         default:
2219                 dev_err(&dev->dev, "unrecognized ioctl cmd: 0x%x", cmd);
2220                 err = -ENOTTY;
2221                 break;
2222         }
2223
2224         if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ))
2225                 err = copy_to_user((void __user *)arg, buf, _IOC_SIZE(cmd));
2226
2227         return err;
2228 }