gpu: nvgpu: Protect sync by an own lock
[linux-3.10.git] / drivers / gpu / nvgpu / gk20a / channel_gk20a.c
1 /*
2  * GK20A Graphics channel
3  *
4  * Copyright (c) 2011-2015, NVIDIA CORPORATION.  All rights reserved.
5  *
6  * This program is free software; you can redistribute it and/or modify it
7  * under the terms and conditions of the GNU General Public License,
8  * version 2, as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope it will be useful, but WITHOUT
11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13  * more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
17  */
18
19 #include <linux/nvhost.h>
20 #include <linux/list.h>
21 #include <linux/delay.h>
22 #include <linux/highmem.h> /* need for nvmap.h*/
23 #include <trace/events/gk20a.h>
24 #include <linux/scatterlist.h>
25 #include <linux/file.h>
26 #include <linux/anon_inodes.h>
27 #include <linux/dma-buf.h>
28 #include <linux/vmalloc.h>
29
30 #include "debug_gk20a.h"
31
32 #include "gk20a.h"
33 #include "dbg_gpu_gk20a.h"
34 #include "fence_gk20a.h"
35 #include "semaphore_gk20a.h"
36
37 #include "hw_ram_gk20a.h"
38 #include "hw_fifo_gk20a.h"
39 #include "hw_pbdma_gk20a.h"
40 #include "hw_ccsr_gk20a.h"
41 #include "hw_ltc_gk20a.h"
42
43 #define NVMAP_HANDLE_PARAM_SIZE 1
44
45 #define NVGPU_BEGIN_AGGRESSIVE_SYNC_DESTROY_LIMIT       64      /* channels */
46
47 static struct channel_gk20a *allocate_channel(struct fifo_gk20a *f);
48 static void free_channel(struct fifo_gk20a *f, struct channel_gk20a *c);
49
50 static void free_priv_cmdbuf(struct channel_gk20a *c,
51                              struct priv_cmd_entry *e);
52
53 static int channel_gk20a_alloc_priv_cmdbuf(struct channel_gk20a *c);
54 static void channel_gk20a_free_priv_cmdbuf(struct channel_gk20a *c);
55
56 static int channel_gk20a_commit_userd(struct channel_gk20a *c);
57 static int channel_gk20a_setup_userd(struct channel_gk20a *c);
58
59 static void channel_gk20a_bind(struct channel_gk20a *ch_gk20a);
60
61 static int channel_gk20a_update_runlist(struct channel_gk20a *c,
62                                         bool add);
63 static void gk20a_free_error_notifiers(struct channel_gk20a *ch);
64
65 static void gk20a_channel_clean_up_jobs(struct work_struct *work);
66
67 /* allocate GPU channel */
68 static struct channel_gk20a *allocate_channel(struct fifo_gk20a *f)
69 {
70         struct channel_gk20a *ch = NULL;
71         struct gk20a_platform *platform = gk20a_get_platform(f->g->dev);
72
73         mutex_lock(&f->free_chs_mutex);
74         if (!list_empty(&f->free_chs)) {
75                 ch = list_first_entry(&f->free_chs, struct channel_gk20a,
76                                 free_chs);
77                 list_del(&ch->free_chs);
78                 WARN_ON(atomic_read(&ch->ref_count));
79                 WARN_ON(ch->referenceable);
80                 f->used_channels++;
81         }
82         mutex_unlock(&f->free_chs_mutex);
83
84         if (f->used_channels > NVGPU_BEGIN_AGGRESSIVE_SYNC_DESTROY_LIMIT)
85                 platform->aggressive_sync_destroy = true;
86
87         return ch;
88 }
89
90 static void free_channel(struct fifo_gk20a *f,
91                 struct channel_gk20a *ch)
92 {
93         struct gk20a_platform *platform = gk20a_get_platform(f->g->dev);
94
95         trace_gk20a_release_used_channel(ch->hw_chid);
96         /* refcount is zero here and channel is in a freed/dead state */
97         mutex_lock(&f->free_chs_mutex);
98         /* add to head to increase visibility of timing-related bugs */
99         list_add(&ch->free_chs, &f->free_chs);
100         f->used_channels--;
101         mutex_unlock(&f->free_chs_mutex);
102
103         if (f->used_channels < NVGPU_BEGIN_AGGRESSIVE_SYNC_DESTROY_LIMIT)
104                 platform->aggressive_sync_destroy = false;
105 }
106
107 int channel_gk20a_commit_va(struct channel_gk20a *c)
108 {
109         gk20a_dbg_fn("");
110
111         if (!c->inst_block.cpu_va)
112                 return -ENOMEM;
113
114         gk20a_init_inst_block(&c->inst_block, c->vm,
115                         c->vm->gmmu_page_sizes[gmmu_page_size_big]);
116
117         return 0;
118 }
119
120 static int channel_gk20a_commit_userd(struct channel_gk20a *c)
121 {
122         u32 addr_lo;
123         u32 addr_hi;
124         void *inst_ptr;
125
126         gk20a_dbg_fn("");
127
128         inst_ptr = c->inst_block.cpu_va;
129         if (!inst_ptr)
130                 return -ENOMEM;
131
132         addr_lo = u64_lo32(c->userd_iova >> ram_userd_base_shift_v());
133         addr_hi = u64_hi32(c->userd_iova);
134
135         gk20a_dbg_info("channel %d : set ramfc userd 0x%16llx",
136                 c->hw_chid, (u64)c->userd_iova);
137
138         gk20a_mem_wr32(inst_ptr, ram_in_ramfc_w() + ram_fc_userd_w(),
139                  pbdma_userd_target_vid_mem_f() |
140                  pbdma_userd_addr_f(addr_lo));
141
142         gk20a_mem_wr32(inst_ptr, ram_in_ramfc_w() + ram_fc_userd_hi_w(),
143                  pbdma_userd_target_vid_mem_f() |
144                  pbdma_userd_hi_addr_f(addr_hi));
145
146         return 0;
147 }
148
149 static int channel_gk20a_set_schedule_params(struct channel_gk20a *c,
150                                 u32 timeslice_timeout)
151 {
152         void *inst_ptr;
153         int shift = 3;
154         int value = timeslice_timeout;
155
156         inst_ptr = c->inst_block.cpu_va;
157         if (!inst_ptr)
158                 return -ENOMEM;
159
160         /* disable channel */
161         c->g->ops.fifo.disable_channel(c);
162
163         /* preempt the channel */
164         WARN_ON(gk20a_fifo_preempt(c->g, c));
165
166         /* value field is 8 bits long */
167         while (value >= 1 << 8) {
168                 value >>= 1;
169                 shift++;
170         }
171
172         /* time slice register is only 18bits long */
173         if ((value << shift) >= 1<<19) {
174                 pr_err("Requested timeslice value is clamped to 18 bits\n");
175                 value = 255;
176                 shift = 10;
177         }
178
179         /* set new timeslice */
180         gk20a_mem_wr32(inst_ptr, ram_fc_runlist_timeslice_w(),
181                 value | (shift << 12) |
182                 fifo_runlist_timeslice_enable_true_f());
183
184         /* enable channel */
185         gk20a_writel(c->g, ccsr_channel_r(c->hw_chid),
186                 gk20a_readl(c->g, ccsr_channel_r(c->hw_chid)) |
187                 ccsr_channel_enable_set_true_f());
188
189         return 0;
190 }
191
192 int channel_gk20a_setup_ramfc(struct channel_gk20a *c,
193                         u64 gpfifo_base, u32 gpfifo_entries, u32 flags)
194 {
195         void *inst_ptr;
196
197         gk20a_dbg_fn("");
198
199         inst_ptr = c->inst_block.cpu_va;
200         if (!inst_ptr)
201                 return -ENOMEM;
202
203         memset(inst_ptr, 0, ram_fc_size_val_v());
204
205         gk20a_mem_wr32(inst_ptr, ram_fc_gp_base_w(),
206                 pbdma_gp_base_offset_f(
207                 u64_lo32(gpfifo_base >> pbdma_gp_base_rsvd_s())));
208
209         gk20a_mem_wr32(inst_ptr, ram_fc_gp_base_hi_w(),
210                 pbdma_gp_base_hi_offset_f(u64_hi32(gpfifo_base)) |
211                 pbdma_gp_base_hi_limit2_f(ilog2(gpfifo_entries)));
212
213         gk20a_mem_wr32(inst_ptr, ram_fc_signature_w(),
214                  c->g->ops.fifo.get_pbdma_signature(c->g));
215
216         gk20a_mem_wr32(inst_ptr, ram_fc_formats_w(),
217                 pbdma_formats_gp_fermi0_f() |
218                 pbdma_formats_pb_fermi1_f() |
219                 pbdma_formats_mp_fermi0_f());
220
221         gk20a_mem_wr32(inst_ptr, ram_fc_pb_header_w(),
222                 pbdma_pb_header_priv_user_f() |
223                 pbdma_pb_header_method_zero_f() |
224                 pbdma_pb_header_subchannel_zero_f() |
225                 pbdma_pb_header_level_main_f() |
226                 pbdma_pb_header_first_true_f() |
227                 pbdma_pb_header_type_inc_f());
228
229         gk20a_mem_wr32(inst_ptr, ram_fc_subdevice_w(),
230                 pbdma_subdevice_id_f(1) |
231                 pbdma_subdevice_status_active_f() |
232                 pbdma_subdevice_channel_dma_enable_f());
233
234         gk20a_mem_wr32(inst_ptr, ram_fc_target_w(), pbdma_target_engine_sw_f());
235
236         gk20a_mem_wr32(inst_ptr, ram_fc_acquire_w(),
237                 pbdma_acquire_retry_man_2_f() |
238                 pbdma_acquire_retry_exp_2_f() |
239                 pbdma_acquire_timeout_exp_max_f() |
240                 pbdma_acquire_timeout_man_max_f() |
241                 pbdma_acquire_timeout_en_disable_f());
242
243         gk20a_mem_wr32(inst_ptr, ram_fc_runlist_timeslice_w(),
244                 fifo_runlist_timeslice_timeout_128_f() |
245                 fifo_runlist_timeslice_timescale_3_f() |
246                 fifo_runlist_timeslice_enable_true_f());
247
248         gk20a_mem_wr32(inst_ptr, ram_fc_pb_timeslice_w(),
249                 fifo_pb_timeslice_timeout_16_f() |
250                 fifo_pb_timeslice_timescale_0_f() |
251                 fifo_pb_timeslice_enable_true_f());
252
253         gk20a_mem_wr32(inst_ptr, ram_fc_chid_w(), ram_fc_chid_id_f(c->hw_chid));
254
255         return channel_gk20a_commit_userd(c);
256 }
257
258 static int channel_gk20a_setup_userd(struct channel_gk20a *c)
259 {
260         BUG_ON(!c->userd_cpu_va);
261
262         gk20a_dbg_fn("");
263
264         gk20a_mem_wr32(c->userd_cpu_va, ram_userd_put_w(), 0);
265         gk20a_mem_wr32(c->userd_cpu_va, ram_userd_get_w(), 0);
266         gk20a_mem_wr32(c->userd_cpu_va, ram_userd_ref_w(), 0);
267         gk20a_mem_wr32(c->userd_cpu_va, ram_userd_put_hi_w(), 0);
268         gk20a_mem_wr32(c->userd_cpu_va, ram_userd_ref_threshold_w(), 0);
269         gk20a_mem_wr32(c->userd_cpu_va, ram_userd_gp_top_level_get_w(), 0);
270         gk20a_mem_wr32(c->userd_cpu_va, ram_userd_gp_top_level_get_hi_w(), 0);
271         gk20a_mem_wr32(c->userd_cpu_va, ram_userd_get_hi_w(), 0);
272         gk20a_mem_wr32(c->userd_cpu_va, ram_userd_gp_get_w(), 0);
273         gk20a_mem_wr32(c->userd_cpu_va, ram_userd_gp_put_w(), 0);
274
275         return 0;
276 }
277
278 static void channel_gk20a_bind(struct channel_gk20a *ch_gk20a)
279 {
280         struct gk20a *g = ch_gk20a->g;
281         struct fifo_gk20a *f = &g->fifo;
282         struct fifo_engine_info_gk20a *engine_info =
283                 f->engine_info + ENGINE_GR_GK20A;
284
285         u32 inst_ptr = gk20a_mem_phys(&ch_gk20a->inst_block)
286                 >> ram_in_base_shift_v();
287
288         gk20a_dbg_info("bind channel %d inst ptr 0x%08x",
289                 ch_gk20a->hw_chid, inst_ptr);
290
291         ch_gk20a->bound = true;
292
293         gk20a_writel(g, ccsr_channel_r(ch_gk20a->hw_chid),
294                 (gk20a_readl(g, ccsr_channel_r(ch_gk20a->hw_chid)) &
295                  ~ccsr_channel_runlist_f(~0)) |
296                  ccsr_channel_runlist_f(engine_info->runlist_id));
297
298         gk20a_writel(g, ccsr_channel_inst_r(ch_gk20a->hw_chid),
299                 ccsr_channel_inst_ptr_f(inst_ptr) |
300                 ccsr_channel_inst_target_vid_mem_f() |
301                 ccsr_channel_inst_bind_true_f());
302
303         gk20a_writel(g, ccsr_channel_r(ch_gk20a->hw_chid),
304                 (gk20a_readl(g, ccsr_channel_r(ch_gk20a->hw_chid)) &
305                  ~ccsr_channel_enable_set_f(~0)) |
306                  ccsr_channel_enable_set_true_f());
307 }
308
309 void channel_gk20a_unbind(struct channel_gk20a *ch_gk20a)
310 {
311         struct gk20a *g = ch_gk20a->g;
312         struct gk20a_platform *platform = gk20a_get_platform(g->dev);
313
314         gk20a_dbg_fn("");
315
316         if (ch_gk20a->bound)
317                 gk20a_writel(g, ccsr_channel_inst_r(ch_gk20a->hw_chid),
318                         ccsr_channel_inst_ptr_f(0) |
319                         ccsr_channel_inst_bind_false_f());
320
321         ch_gk20a->bound = false;
322
323         /*
324          * if we are agrressive then we can destroy the syncpt
325          * resource at this point
326          * if not, then it will be destroyed at channel_free()
327          */
328         mutex_lock(&ch_gk20a->sync_lock);
329         if (ch_gk20a->sync && platform->aggressive_sync_destroy) {
330
331                 ch_gk20a->sync->destroy(ch_gk20a->sync);
332                 ch_gk20a->sync = NULL;
333         }
334         mutex_unlock(&ch_gk20a->sync_lock);
335 }
336
337 int channel_gk20a_alloc_inst(struct gk20a *g, struct channel_gk20a *ch)
338 {
339         int err;
340
341         gk20a_dbg_fn("");
342
343         err = gk20a_alloc_inst_block(g, &ch->inst_block);
344         if (err)
345                 return err;
346
347         gk20a_dbg_info("channel %d inst block physical addr: 0x%16llx",
348                 ch->hw_chid, (u64)gk20a_mem_phys(&ch->inst_block));
349
350         gk20a_dbg_fn("done");
351         return 0;
352 }
353
354 void channel_gk20a_free_inst(struct gk20a *g, struct channel_gk20a *ch)
355 {
356         gk20a_free_inst_block(g, &ch->inst_block);
357 }
358
359 static int channel_gk20a_update_runlist(struct channel_gk20a *c, bool add)
360 {
361         return c->g->ops.fifo.update_runlist(c->g, 0, c->hw_chid, add, true);
362 }
363
364 void channel_gk20a_enable(struct channel_gk20a *ch)
365 {
366         /* enable channel */
367         gk20a_writel(ch->g, ccsr_channel_r(ch->hw_chid),
368                 gk20a_readl(ch->g, ccsr_channel_r(ch->hw_chid)) |
369                 ccsr_channel_enable_set_true_f());
370 }
371
372 void channel_gk20a_disable(struct channel_gk20a *ch)
373 {
374         /* disable channel */
375         gk20a_writel(ch->g, ccsr_channel_r(ch->hw_chid),
376                 gk20a_readl(ch->g,
377                         ccsr_channel_r(ch->hw_chid)) |
378                         ccsr_channel_enable_clr_true_f());
379 }
380
381 void gk20a_channel_abort(struct channel_gk20a *ch, bool channel_preempt)
382 {
383         struct channel_gk20a_job *job, *n;
384         bool released_job_semaphore = false;
385
386         gk20a_dbg_fn("");
387
388         /* make sure new kickoffs are prevented */
389         ch->has_timedout = true;
390
391         ch->g->ops.fifo.disable_channel(ch);
392
393         if (channel_preempt)
394                 gk20a_fifo_preempt(ch->g, ch);
395
396         /* ensure no fences are pending */
397         mutex_lock(&ch->sync_lock);
398         if (ch->sync)
399                 ch->sync->set_min_eq_max(ch->sync);
400         mutex_unlock(&ch->sync_lock);
401
402         /* release all job semaphores (applies only to jobs that use
403            semaphore synchronization) */
404         mutex_lock(&ch->jobs_lock);
405         list_for_each_entry_safe(job, n, &ch->jobs, list) {
406                 if (job->post_fence->semaphore) {
407                         gk20a_semaphore_release(job->post_fence->semaphore);
408                         released_job_semaphore = true;
409                 }
410         }
411         mutex_unlock(&ch->jobs_lock);
412
413         if (released_job_semaphore)
414                 wake_up_interruptible_all(&ch->semaphore_wq);
415
416         gk20a_channel_update(ch, 0);
417 }
418
419 int gk20a_wait_channel_idle(struct channel_gk20a *ch)
420 {
421         bool channel_idle = false;
422         unsigned long end_jiffies = jiffies +
423                 msecs_to_jiffies(gk20a_get_gr_idle_timeout(ch->g));
424
425         do {
426                 mutex_lock(&ch->jobs_lock);
427                 channel_idle = list_empty(&ch->jobs);
428                 mutex_unlock(&ch->jobs_lock);
429                 if (channel_idle)
430                         break;
431
432                 usleep_range(1000, 3000);
433         } while (time_before(jiffies, end_jiffies)
434                         || !tegra_platform_is_silicon());
435
436         if (!channel_idle) {
437                 gk20a_err(dev_from_gk20a(ch->g), "jobs not freed for channel %d\n",
438                                 ch->hw_chid);
439                 return -EBUSY;
440         }
441
442         return 0;
443 }
444
445 void gk20a_disable_channel(struct channel_gk20a *ch)
446 {
447         gk20a_channel_abort(ch, true);
448         channel_gk20a_update_runlist(ch, false);
449 }
450
451 #if defined(CONFIG_GK20A_CYCLE_STATS)
452
453 static void gk20a_free_cycle_stats_buffer(struct channel_gk20a *ch)
454 {
455         /* disable existing cyclestats buffer */
456         mutex_lock(&ch->cyclestate.cyclestate_buffer_mutex);
457         if (ch->cyclestate.cyclestate_buffer_handler) {
458                 dma_buf_vunmap(ch->cyclestate.cyclestate_buffer_handler,
459                                 ch->cyclestate.cyclestate_buffer);
460                 dma_buf_put(ch->cyclestate.cyclestate_buffer_handler);
461                 ch->cyclestate.cyclestate_buffer_handler = NULL;
462                 ch->cyclestate.cyclestate_buffer = NULL;
463                 ch->cyclestate.cyclestate_buffer_size = 0;
464         }
465         mutex_unlock(&ch->cyclestate.cyclestate_buffer_mutex);
466 }
467
468 static int gk20a_channel_cycle_stats(struct channel_gk20a *ch,
469                        struct nvgpu_cycle_stats_args *args)
470 {
471         struct dma_buf *dmabuf;
472         void *virtual_address;
473
474         /* is it allowed to handle calls for current GPU? */
475         if (0 == (ch->g->gpu_characteristics.flags &
476                         NVGPU_GPU_FLAGS_SUPPORT_CYCLE_STATS))
477                 return -ENOSYS;
478
479         if (args->dmabuf_fd && !ch->cyclestate.cyclestate_buffer_handler) {
480
481                 /* set up new cyclestats buffer */
482                 dmabuf = dma_buf_get(args->dmabuf_fd);
483                 if (IS_ERR(dmabuf))
484                         return PTR_ERR(dmabuf);
485                 virtual_address = dma_buf_vmap(dmabuf);
486                 if (!virtual_address)
487                         return -ENOMEM;
488
489                 ch->cyclestate.cyclestate_buffer_handler = dmabuf;
490                 ch->cyclestate.cyclestate_buffer = virtual_address;
491                 ch->cyclestate.cyclestate_buffer_size = dmabuf->size;
492                 return 0;
493
494         } else if (!args->dmabuf_fd &&
495                         ch->cyclestate.cyclestate_buffer_handler) {
496                 gk20a_free_cycle_stats_buffer(ch);
497                 return 0;
498
499         } else if (!args->dmabuf_fd &&
500                         !ch->cyclestate.cyclestate_buffer_handler) {
501                 /* no requst from GL */
502                 return 0;
503
504         } else {
505                 pr_err("channel already has cyclestats buffer\n");
506                 return -EINVAL;
507         }
508 }
509
510
511 static int gk20a_flush_cycle_stats_snapshot(struct channel_gk20a *ch)
512 {
513         int ret;
514
515         mutex_lock(&ch->cs_client_mutex);
516         if (ch->cs_client)
517                 ret = gr_gk20a_css_flush(ch->g, ch->cs_client);
518         else
519                 ret = -EBADF;
520         mutex_unlock(&ch->cs_client_mutex);
521
522         return ret;
523 }
524
525 static int gk20a_attach_cycle_stats_snapshot(struct channel_gk20a *ch,
526                                 u32 dmabuf_fd,
527                                 u32 perfmon_id_count,
528                                 u32 *perfmon_id_start)
529 {
530         int ret;
531
532         mutex_lock(&ch->cs_client_mutex);
533         if (ch->cs_client) {
534                 ret = -EEXIST;
535         } else {
536                 ret = gr_gk20a_css_attach(ch->g,
537                                         dmabuf_fd,
538                                         perfmon_id_count,
539                                         perfmon_id_start,
540                                         &ch->cs_client);
541         }
542         mutex_unlock(&ch->cs_client_mutex);
543
544         return ret;
545 }
546
547 static int gk20a_free_cycle_stats_snapshot(struct channel_gk20a *ch)
548 {
549         int ret;
550
551         mutex_lock(&ch->cs_client_mutex);
552         if (ch->cs_client) {
553                 ret = gr_gk20a_css_detach(ch->g, ch->cs_client);
554                 ch->cs_client = NULL;
555         } else {
556                 ret = 0;
557         }
558         mutex_unlock(&ch->cs_client_mutex);
559
560         return ret;
561 }
562
563 static int gk20a_channel_cycle_stats_snapshot(struct channel_gk20a *ch,
564                         struct nvgpu_cycle_stats_snapshot_args *args)
565 {
566         int ret;
567
568         /* is it allowed to handle calls for current GPU? */
569         if (0 == (ch->g->gpu_characteristics.flags &
570                         NVGPU_GPU_FLAGS_SUPPORT_CYCLE_STATS_SNAPSHOT))
571                 return -ENOSYS;
572
573         if (!args->dmabuf_fd)
574                 return -EINVAL;
575
576         /* handle the command (most frequent cases first) */
577         switch (args->cmd) {
578         case NVGPU_IOCTL_CHANNEL_CYCLE_STATS_SNAPSHOT_CMD_FLUSH:
579                 ret = gk20a_flush_cycle_stats_snapshot(ch);
580                 args->extra = 0;
581                 break;
582
583         case NVGPU_IOCTL_CHANNEL_CYCLE_STATS_SNAPSHOT_CMD_ATTACH:
584                 ret = gk20a_attach_cycle_stats_snapshot(ch,
585                                                 args->dmabuf_fd,
586                                                 args->extra,
587                                                 &args->extra);
588                 break;
589
590         case NVGPU_IOCTL_CHANNEL_CYCLE_STATS_SNAPSHOT_CMD_DETACH:
591                 ret = gk20a_free_cycle_stats_snapshot(ch);
592                 args->extra = 0;
593                 break;
594
595         default:
596                 pr_err("cyclestats: unknown command %u\n", args->cmd);
597                 ret = -EINVAL;
598                 break;
599         }
600
601         return ret;
602 }
603 #endif
604
605 static int gk20a_init_error_notifier(struct channel_gk20a *ch,
606                 struct nvgpu_set_error_notifier *args) {
607         void *va;
608
609         struct dma_buf *dmabuf;
610
611         if (!args->mem) {
612                 pr_err("gk20a_init_error_notifier: invalid memory handle\n");
613                 return -EINVAL;
614         }
615
616         dmabuf = dma_buf_get(args->mem);
617
618         if (ch->error_notifier_ref)
619                 gk20a_free_error_notifiers(ch);
620
621         if (IS_ERR(dmabuf)) {
622                 pr_err("Invalid handle: %d\n", args->mem);
623                 return -EINVAL;
624         }
625         /* map handle */
626         va = dma_buf_vmap(dmabuf);
627         if (!va) {
628                 dma_buf_put(dmabuf);
629                 pr_err("Cannot map notifier handle\n");
630                 return -ENOMEM;
631         }
632
633         /* set channel notifiers pointer */
634         ch->error_notifier_ref = dmabuf;
635         ch->error_notifier = va + args->offset;
636         ch->error_notifier_va = va;
637         memset(ch->error_notifier, 0, sizeof(struct nvgpu_notification));
638         return 0;
639 }
640
641 void gk20a_set_error_notifier(struct channel_gk20a *ch, __u32 error)
642 {
643         if (ch->error_notifier_ref) {
644                 struct timespec time_data;
645                 u64 nsec;
646                 getnstimeofday(&time_data);
647                 nsec = ((u64)time_data.tv_sec) * 1000000000u +
648                                 (u64)time_data.tv_nsec;
649                 ch->error_notifier->time_stamp.nanoseconds[0] =
650                                 (u32)nsec;
651                 ch->error_notifier->time_stamp.nanoseconds[1] =
652                                 (u32)(nsec >> 32);
653                 ch->error_notifier->info32 = error;
654                 ch->error_notifier->status = 0xffff;
655
656                 gk20a_err(dev_from_gk20a(ch->g),
657                     "error notifier set to %d for ch %d", error, ch->hw_chid);
658         }
659 }
660
661 static void gk20a_free_error_notifiers(struct channel_gk20a *ch)
662 {
663         if (ch->error_notifier_ref) {
664                 dma_buf_vunmap(ch->error_notifier_ref, ch->error_notifier_va);
665                 dma_buf_put(ch->error_notifier_ref);
666                 ch->error_notifier_ref = NULL;
667                 ch->error_notifier = NULL;
668                 ch->error_notifier_va = NULL;
669         }
670 }
671
672 /* Returns delta of cyclic integers a and b. If a is ahead of b, delta
673  * is positive */
674 static int cyclic_delta(int a, int b)
675 {
676         return a - b;
677 }
678
679 static void gk20a_wait_for_deferred_interrupts(struct gk20a *g)
680 {
681         int stall_irq_threshold = atomic_read(&g->hw_irq_stall_count);
682         int nonstall_irq_threshold = atomic_read(&g->hw_irq_nonstall_count);
683
684         /* wait until all stalling irqs are handled */
685         wait_event(g->sw_irq_stall_last_handled_wq,
686                    cyclic_delta(stall_irq_threshold,
687                                 atomic_read(&g->sw_irq_stall_last_handled))
688                    <= 0);
689
690         /* wait until all non-stalling irqs are handled */
691         wait_event(g->sw_irq_nonstall_last_handled_wq,
692                    cyclic_delta(nonstall_irq_threshold,
693                                 atomic_read(&g->sw_irq_nonstall_last_handled))
694                    <= 0);
695 }
696
697 static void gk20a_wait_until_counter_is_N(
698         struct channel_gk20a *ch, atomic_t *counter, int wait_value,
699         wait_queue_head_t *wq, const char *caller, const char *counter_name)
700 {
701         while (true) {
702                 if (wait_event_timeout(
703                             *wq,
704                             atomic_read(counter) == wait_value,
705                             msecs_to_jiffies(5000)) > 0)
706                         break;
707
708                 gk20a_warn(dev_from_gk20a(ch->g),
709                            "%s: channel %d, still waiting, %s left: %d, waiting for: %d",
710                            caller, ch->hw_chid, counter_name,
711                            atomic_read(counter), wait_value);
712         }
713 }
714
715
716
717 /* call ONLY when no references to the channel exist: after the last put */
718 static void gk20a_free_channel(struct channel_gk20a *ch)
719 {
720         struct gk20a *g = ch->g;
721         struct fifo_gk20a *f = &g->fifo;
722         struct gr_gk20a *gr = &g->gr;
723         struct vm_gk20a *ch_vm = ch->vm;
724         unsigned long timeout = gk20a_get_gr_idle_timeout(g);
725         struct dbg_session_gk20a *dbg_s;
726         bool was_reset;
727         gk20a_dbg_fn("");
728
729         WARN_ON(ch->g == NULL);
730
731         trace_gk20a_free_channel(ch->hw_chid);
732
733         /* abort channel and remove from runlist */
734         gk20a_disable_channel(ch);
735
736         /* wait until there's only our ref to the channel */
737         gk20a_wait_until_counter_is_N(
738                 ch, &ch->ref_count, 1, &ch->ref_count_dec_wq,
739                 __func__, "references");
740
741         /* wait until all pending interrupts for recently completed
742          * jobs are handled */
743         gk20a_wait_for_deferred_interrupts(g);
744
745         /* prevent new refs */
746         spin_lock(&ch->ref_obtain_lock);
747         if (!ch->referenceable) {
748                 spin_unlock(&ch->ref_obtain_lock);
749                 gk20a_err(dev_from_gk20a(ch->g),
750                           "Extra %s() called to channel %u",
751                           __func__, ch->hw_chid);
752                 return;
753         }
754         ch->referenceable = false;
755         spin_unlock(&ch->ref_obtain_lock);
756
757         /* matches with the initial reference in gk20a_open_new_channel() */
758         atomic_dec(&ch->ref_count);
759
760         /* wait until no more refs to the channel */
761         gk20a_wait_until_counter_is_N(
762                 ch, &ch->ref_count, 0, &ch->ref_count_dec_wq,
763                 __func__, "references");
764
765         /* if engine reset was deferred, perform it now */
766         mutex_lock(&f->deferred_reset_mutex);
767         if (g->fifo.deferred_reset_pending) {
768                 gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, "engine reset was"
769                            " deferred, running now");
770                 was_reset = mutex_is_locked(&g->fifo.gr_reset_mutex);
771                 mutex_lock(&g->fifo.gr_reset_mutex);
772                 /* if lock is already taken, a reset is taking place
773                 so no need to repeat */
774                 if (!was_reset) {
775                         gk20a_fifo_reset_engine(g,
776                                 g->fifo.deferred_fault_engines);
777                 }
778                 mutex_unlock(&g->fifo.gr_reset_mutex);
779                 g->fifo.deferred_fault_engines = 0;
780                 g->fifo.deferred_reset_pending = false;
781         }
782         mutex_unlock(&f->deferred_reset_mutex);
783
784         if (!ch->bound)
785                 goto release;
786
787         if (!gk20a_channel_as_bound(ch))
788                 goto unbind;
789
790         gk20a_dbg_info("freeing bound channel context, timeout=%ld",
791                         timeout);
792
793         gk20a_free_error_notifiers(ch);
794
795         /* release channel ctx */
796         g->ops.gr.free_channel_ctx(ch);
797
798         gk20a_gr_flush_channel_tlb(gr);
799
800         memset(&ch->ramfc, 0, sizeof(struct mem_desc_sub));
801
802         gk20a_gmmu_unmap_free(ch_vm, &ch->gpfifo.mem);
803
804         memset(&ch->gpfifo, 0, sizeof(struct gpfifo_desc));
805
806 #if defined(CONFIG_GK20A_CYCLE_STATS)
807         gk20a_free_cycle_stats_buffer(ch);
808         gk20a_free_cycle_stats_snapshot(ch);
809 #endif
810
811         channel_gk20a_free_priv_cmdbuf(ch);
812
813         /* sync must be destroyed before releasing channel vm */
814         mutex_lock(&ch->sync_lock);
815         if (ch->sync) {
816                 ch->sync->destroy(ch->sync);
817                 ch->sync = NULL;
818         }
819         mutex_unlock(&ch->sync_lock);
820
821         /* release channel binding to the as_share */
822         if (ch_vm->as_share)
823                 gk20a_as_release_share(ch_vm->as_share);
824         else
825                 gk20a_vm_put(ch_vm);
826
827         spin_lock(&ch->update_fn_lock);
828         ch->update_fn = NULL;
829         ch->update_fn_data = NULL;
830         spin_unlock(&ch->update_fn_lock);
831         cancel_work_sync(&ch->update_fn_work);
832
833         /* make sure we don't have deferred interrupts pending that
834          * could still touch the channel */
835         gk20a_wait_for_deferred_interrupts(g);
836
837 unbind:
838         if (gk20a_is_channel_marked_as_tsg(ch))
839                 gk20a_tsg_unbind_channel(ch);
840
841         g->ops.fifo.unbind_channel(ch);
842         g->ops.fifo.free_inst(g, ch);
843
844         ch->vpr = false;
845         ch->vm = NULL;
846
847         mutex_lock(&ch->submit_lock);
848         gk20a_fence_put(ch->last_submit.pre_fence);
849         gk20a_fence_put(ch->last_submit.post_fence);
850         ch->last_submit.pre_fence = NULL;
851         ch->last_submit.post_fence = NULL;
852         mutex_unlock(&ch->submit_lock);
853         WARN_ON(ch->sync);
854
855         /* unlink all debug sessions */
856         mutex_lock(&ch->dbg_s_lock);
857
858         list_for_each_entry(dbg_s, &ch->dbg_s_list, dbg_s_list_node) {
859                 dbg_s->ch = NULL;
860                 list_del_init(&dbg_s->dbg_s_list_node);
861         }
862
863         mutex_unlock(&ch->dbg_s_lock);
864
865 release:
866         /* make sure we catch accesses of unopened channels in case
867          * there's non-refcounted channel pointers hanging around */
868         ch->g = NULL;
869         wmb();
870
871         /* ALWAYS last */
872         free_channel(f, ch);
873 }
874
875 /* Try to get a reference to the channel. Return nonzero on success. If fails,
876  * the channel is dead or being freed elsewhere and you must not touch it.
877  *
878  * Always when a channel_gk20a pointer is seen and about to be used, a
879  * reference must be held to it - either by you or the caller, which should be
880  * documented well or otherwise clearly seen. This usually boils down to the
881  * file from ioctls directly, or an explicit get in exception handlers when the
882  * channel is found by a hw_chid.
883  *
884  * Most global functions in this file require a reference to be held by the
885  * caller.
886  */
887 struct channel_gk20a *_gk20a_channel_get(struct channel_gk20a *ch,
888                                          const char *caller) {
889         struct channel_gk20a *ret;
890
891         spin_lock(&ch->ref_obtain_lock);
892
893         if (likely(ch->referenceable)) {
894                 atomic_inc(&ch->ref_count);
895                 ret = ch;
896         } else
897                 ret = NULL;
898
899         spin_unlock(&ch->ref_obtain_lock);
900
901         if (ret)
902                 trace_gk20a_channel_get(ch->hw_chid, caller);
903
904         return ret;
905 }
906
907 void _gk20a_channel_put(struct channel_gk20a *ch, const char *caller)
908 {
909         trace_gk20a_channel_put(ch->hw_chid, caller);
910         atomic_dec(&ch->ref_count);
911         wake_up_all(&ch->ref_count_dec_wq);
912
913         /* More puts than gets. Channel is probably going to get
914          * stuck. */
915         WARN_ON(atomic_read(&ch->ref_count) < 0);
916
917         /* Also, more puts than gets. ref_count can go to 0 only if
918          * the channel is closing. Channel is probably going to get
919          * stuck. */
920         WARN_ON(atomic_read(&ch->ref_count) == 0 && ch->referenceable);
921 }
922
923 void gk20a_channel_close(struct channel_gk20a *ch)
924 {
925         gk20a_free_channel(ch);
926 }
927
928 int gk20a_channel_release(struct inode *inode, struct file *filp)
929 {
930         struct channel_gk20a *ch = (struct channel_gk20a *)filp->private_data;
931         struct gk20a *g = ch ? ch->g : NULL;
932         int err;
933
934         if (!ch)
935                 return 0;
936
937         trace_gk20a_channel_release(dev_name(&g->dev->dev));
938
939         err = gk20a_busy(g->dev);
940         if (err) {
941                 gk20a_err(dev_from_gk20a(g), "failed to release channel %d",
942                         ch->hw_chid);
943                 return err;
944         }
945         gk20a_channel_close(ch);
946         gk20a_idle(g->dev);
947
948         filp->private_data = NULL;
949         return 0;
950 }
951
952 static void gk20a_channel_update_runcb_fn(struct work_struct *work)
953 {
954         struct channel_gk20a *ch =
955                 container_of(work, struct channel_gk20a, update_fn_work);
956         void (*update_fn)(struct channel_gk20a *, void *);
957         void *update_fn_data;
958
959         spin_lock(&ch->update_fn_lock);
960         update_fn = ch->update_fn;
961         update_fn_data = ch->update_fn_data;
962         spin_unlock(&ch->update_fn_lock);
963
964         if (update_fn)
965                 update_fn(ch, update_fn_data);
966 }
967
968 struct channel_gk20a *gk20a_open_new_channel_with_cb(struct gk20a *g,
969                 void (*update_fn)(struct channel_gk20a *, void *),
970                 void *update_fn_data)
971 {
972         struct channel_gk20a *ch = gk20a_open_new_channel(g);
973
974         if (ch) {
975                 spin_lock(&ch->update_fn_lock);
976                 ch->update_fn = update_fn;
977                 ch->update_fn_data = update_fn_data;
978                 spin_unlock(&ch->update_fn_lock);
979         }
980
981         return ch;
982 }
983
984 struct channel_gk20a *gk20a_open_new_channel(struct gk20a *g)
985 {
986         struct fifo_gk20a *f = &g->fifo;
987         struct channel_gk20a *ch;
988
989         gk20a_dbg_fn("");
990
991         ch = allocate_channel(f);
992         if (ch == NULL) {
993                 /* TBD: we want to make this virtualizable */
994                 gk20a_err(dev_from_gk20a(g), "out of hw chids");
995                 return NULL;
996         }
997
998         trace_gk20a_open_new_channel(ch->hw_chid);
999
1000         BUG_ON(ch->g);
1001         ch->g = g;
1002
1003         if (g->ops.fifo.alloc_inst(g, ch)) {
1004                 ch->g = NULL;
1005                 free_channel(f, ch);
1006                 gk20a_err(dev_from_gk20a(g),
1007                            "failed to open gk20a channel, out of inst mem");
1008                 return NULL;
1009         }
1010
1011         /* now the channel is in a limbo out of the free list but not marked as
1012          * alive and used (i.e. get-able) yet */
1013
1014         ch->pid = current->pid;
1015
1016         /* By default, channel is regular (non-TSG) channel */
1017         ch->tsgid = NVGPU_INVALID_TSG_ID;
1018
1019         /* reset timeout counter and update timestamp */
1020         ch->timeout_accumulated_ms = 0;
1021         ch->timeout_gpfifo_get = 0;
1022         /* set gr host default timeout */
1023         ch->timeout_ms_max = gk20a_get_gr_idle_timeout(g);
1024         ch->timeout_debug_dump = true;
1025         ch->has_timedout = false;
1026         ch->obj_class = 0;
1027         ch->clean_up.scheduled = false;
1028
1029         /* The channel is *not* runnable at this point. It still needs to have
1030          * an address space bound and allocate a gpfifo and grctx. */
1031
1032         init_waitqueue_head(&ch->notifier_wq);
1033         init_waitqueue_head(&ch->semaphore_wq);
1034         init_waitqueue_head(&ch->submit_wq);
1035
1036         mutex_init(&ch->poll_events.lock);
1037         ch->poll_events.events_enabled = false;
1038         ch->poll_events.num_pending_events = 0;
1039
1040         ch->update_fn = NULL;
1041         ch->update_fn_data = NULL;
1042         spin_lock_init(&ch->update_fn_lock);
1043         INIT_WORK(&ch->update_fn_work, gk20a_channel_update_runcb_fn);
1044
1045         /* Mark the channel alive, get-able, with 1 initial use
1046          * references. The initial reference will be decreased in
1047          * gk20a_free_channel() */
1048         ch->referenceable = true;
1049         atomic_set(&ch->ref_count, 1);
1050         wmb();
1051
1052         return ch;
1053 }
1054
1055 static int __gk20a_channel_open(struct gk20a *g, struct file *filp)
1056 {
1057         int err;
1058         struct channel_gk20a *ch;
1059
1060         trace_gk20a_channel_open(dev_name(&g->dev->dev));
1061
1062         err = gk20a_busy(g->dev);
1063         if (err) {
1064                 gk20a_err(dev_from_gk20a(g), "failed to power on, %d", err);
1065                 return err;
1066         }
1067         ch = gk20a_open_new_channel(g);
1068         gk20a_idle(g->dev);
1069         if (!ch) {
1070                 gk20a_err(dev_from_gk20a(g),
1071                         "failed to get f");
1072                 return -ENOMEM;
1073         }
1074
1075         filp->private_data = ch;
1076         return 0;
1077 }
1078
1079 int gk20a_channel_open(struct inode *inode, struct file *filp)
1080 {
1081         struct gk20a *g = container_of(inode->i_cdev,
1082                         struct gk20a, channel.cdev);
1083         int ret;
1084
1085         gk20a_dbg_fn("start");
1086         ret = __gk20a_channel_open(g, filp);
1087
1088         gk20a_dbg_fn("end");
1089         return ret;
1090 }
1091
1092 int gk20a_channel_open_ioctl(struct gk20a *g,
1093                 struct nvgpu_channel_open_args *args)
1094 {
1095         int err;
1096         int fd;
1097         struct file *file;
1098         char *name;
1099
1100         err = get_unused_fd_flags(O_RDWR);
1101         if (err < 0)
1102                 return err;
1103         fd = err;
1104
1105         name = kasprintf(GFP_KERNEL, "nvhost-%s-fd%d",
1106                         dev_name(&g->dev->dev), fd);
1107         if (!name) {
1108                 err = -ENOMEM;
1109                 goto clean_up;
1110         }
1111
1112         file = anon_inode_getfile(name, g->channel.cdev.ops, NULL, O_RDWR);
1113         kfree(name);
1114         if (IS_ERR(file)) {
1115                 err = PTR_ERR(file);
1116                 goto clean_up;
1117         }
1118
1119         err = __gk20a_channel_open(g, file);
1120         if (err)
1121                 goto clean_up_file;
1122
1123         fd_install(fd, file);
1124         args->channel_fd = fd;
1125         return 0;
1126
1127 clean_up_file:
1128         fput(file);
1129 clean_up:
1130         put_unused_fd(fd);
1131         return err;
1132 }
1133
1134 /* allocate private cmd buffer.
1135    used for inserting commands before/after user submitted buffers. */
1136 static int channel_gk20a_alloc_priv_cmdbuf(struct channel_gk20a *c)
1137 {
1138         struct device *d = dev_from_gk20a(c->g);
1139         struct vm_gk20a *ch_vm = c->vm;
1140         struct priv_cmd_queue *q = &c->priv_cmd_q;
1141         u32 size;
1142         int err = 0;
1143
1144         /* Kernel can insert gpfifos before and after user gpfifos.
1145            Before user gpfifos, kernel inserts fence_wait, which takes
1146            syncpoint_a (2 dwords) + syncpoint_b (2 dwords) = 4 dwords.
1147            After user gpfifos, kernel inserts fence_get, which takes
1148            wfi (2 dwords) + syncpoint_a (2 dwords) + syncpoint_b (2 dwords)
1149            = 6 dwords.
1150            Worse case if kernel adds both of them for every user gpfifo,
1151            max size of priv_cmdbuf is :
1152            (gpfifo entry number * (2 / 3) * (4 + 6) * 4 bytes */
1153         size = roundup_pow_of_two(
1154                 c->gpfifo.entry_num * 2 * 12 * sizeof(u32) / 3);
1155
1156         err = gk20a_gmmu_alloc_map(ch_vm, size, &q->mem);
1157         if (err) {
1158                 gk20a_err(d, "%s: memory allocation failed\n", __func__);
1159                 goto clean_up;
1160         }
1161
1162         q->size = q->mem.size / sizeof (u32);
1163
1164         return 0;
1165
1166 clean_up:
1167         channel_gk20a_free_priv_cmdbuf(c);
1168         return err;
1169 }
1170
1171 static void channel_gk20a_free_priv_cmdbuf(struct channel_gk20a *c)
1172 {
1173         struct vm_gk20a *ch_vm = c->vm;
1174         struct priv_cmd_queue *q = &c->priv_cmd_q;
1175
1176         if (q->size == 0)
1177                 return;
1178
1179         gk20a_gmmu_unmap_free(ch_vm, &q->mem);
1180
1181         memset(q, 0, sizeof(struct priv_cmd_queue));
1182 }
1183
1184 /* allocate a cmd buffer with given size. size is number of u32 entries */
1185 int gk20a_channel_alloc_priv_cmdbuf(struct channel_gk20a *c, u32 orig_size,
1186                              struct priv_cmd_entry **entry)
1187 {
1188         struct priv_cmd_queue *q = &c->priv_cmd_q;
1189         struct priv_cmd_entry *e;
1190         u32 free_count;
1191         u32 size = orig_size;
1192
1193         gk20a_dbg_fn("size %d", orig_size);
1194
1195         *entry = NULL;
1196
1197         /* if free space in the end is less than requested, increase the size
1198          * to make the real allocated space start from beginning. */
1199         if (q->put + size > q->size)
1200                 size = orig_size + (q->size - q->put);
1201
1202         gk20a_dbg_info("ch %d: priv cmd queue get:put %d:%d",
1203                         c->hw_chid, q->get, q->put);
1204
1205         free_count = (q->size - (q->put - q->get) - 1) % q->size;
1206
1207         if (size > free_count)
1208                 return -EAGAIN;
1209
1210         e = kzalloc(sizeof(struct priv_cmd_entry), GFP_KERNEL);
1211         if (!e) {
1212                 gk20a_err(dev_from_gk20a(c->g),
1213                         "ch %d: fail to allocate priv cmd entry",
1214                         c->hw_chid);
1215                 return -ENOMEM;
1216         }
1217
1218         e->size = orig_size;
1219         e->gp_get = c->gpfifo.get;
1220         e->gp_put = c->gpfifo.put;
1221         e->gp_wrap = c->gpfifo.wrap;
1222
1223         /* if we have increased size to skip free space in the end, set put
1224            to beginning of cmd buffer (0) + size */
1225         if (size != orig_size) {
1226                 e->ptr = (u32 *)q->mem.cpu_va;
1227                 e->gva = q->mem.gpu_va;
1228                 q->put = orig_size;
1229         } else {
1230                 e->ptr = (u32 *)q->mem.cpu_va + q->put;
1231                 e->gva = q->mem.gpu_va + q->put * sizeof(u32);
1232                 q->put = (q->put + orig_size) & (q->size - 1);
1233         }
1234
1235         /* we already handled q->put + size > q->size so BUG_ON this */
1236         BUG_ON(q->put > q->size);
1237
1238         *entry = e;
1239
1240         gk20a_dbg_fn("done");
1241
1242         return 0;
1243 }
1244
1245 /* Don't call this to free an explict cmd entry.
1246  * It doesn't update priv_cmd_queue get/put */
1247 static void free_priv_cmdbuf(struct channel_gk20a *c,
1248                              struct priv_cmd_entry *e)
1249 {
1250         kfree(e);
1251 }
1252
1253 int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c,
1254                 struct nvgpu_alloc_gpfifo_args *args)
1255 {
1256         struct gk20a *g = c->g;
1257         struct device *d = dev_from_gk20a(g);
1258         struct vm_gk20a *ch_vm;
1259         u32 gpfifo_size;
1260         int err = 0;
1261
1262         /* Kernel can insert one extra gpfifo entry before user submitted gpfifos
1263            and another one after, for internal usage. Triple the requested size. */
1264         gpfifo_size = roundup_pow_of_two(args->num_entries * 3);
1265
1266         if (args->flags & NVGPU_ALLOC_GPFIFO_FLAGS_VPR_ENABLED)
1267                 c->vpr = true;
1268
1269         /* an address space needs to have been bound at this point. */
1270         if (!gk20a_channel_as_bound(c)) {
1271                 gk20a_err(d,
1272                             "not bound to an address space at time of gpfifo"
1273                             " allocation.");
1274                 return -EINVAL;
1275         }
1276         ch_vm = c->vm;
1277
1278         c->cmds_pending = false;
1279         mutex_lock(&c->submit_lock);
1280         gk20a_fence_put(c->last_submit.pre_fence);
1281         gk20a_fence_put(c->last_submit.post_fence);
1282         c->last_submit.pre_fence = NULL;
1283         c->last_submit.post_fence = NULL;
1284         mutex_unlock(&c->submit_lock);
1285
1286         c->ramfc.offset = 0;
1287         c->ramfc.size = ram_in_ramfc_s() / 8;
1288
1289         if (c->gpfifo.mem.cpu_va) {
1290                 gk20a_err(d, "channel %d :"
1291                            "gpfifo already allocated", c->hw_chid);
1292                 return -EEXIST;
1293         }
1294
1295         err = gk20a_gmmu_alloc_map(ch_vm, gpfifo_size * sizeof(struct gpfifo),
1296                         &c->gpfifo.mem);
1297         if (err) {
1298                 gk20a_err(d, "%s: memory allocation failed\n", __func__);
1299                 goto clean_up;
1300         }
1301
1302         c->gpfifo.entry_num = gpfifo_size;
1303         c->gpfifo.get = c->gpfifo.put = 0;
1304
1305         gk20a_dbg_info("channel %d : gpfifo_base 0x%016llx, size %d",
1306                 c->hw_chid, c->gpfifo.mem.gpu_va, c->gpfifo.entry_num);
1307
1308         channel_gk20a_setup_userd(c);
1309
1310         err = g->ops.fifo.setup_ramfc(c, c->gpfifo.mem.gpu_va,
1311                                         c->gpfifo.entry_num, args->flags);
1312         if (err)
1313                 goto clean_up_unmap;
1314
1315         /* TBD: setup engine contexts */
1316
1317         err = channel_gk20a_alloc_priv_cmdbuf(c);
1318         if (err)
1319                 goto clean_up_unmap;
1320
1321         err = channel_gk20a_update_runlist(c, true);
1322         if (err)
1323                 goto clean_up_unmap;
1324
1325         g->ops.fifo.bind_channel(c);
1326
1327         gk20a_dbg_fn("done");
1328         return 0;
1329
1330 clean_up_unmap:
1331         gk20a_gmmu_unmap_free(ch_vm, &c->gpfifo.mem);
1332 clean_up:
1333         memset(&c->gpfifo, 0, sizeof(struct gpfifo_desc));
1334         gk20a_err(d, "fail");
1335         return err;
1336 }
1337
1338 static inline bool check_gp_put(struct gk20a *g,
1339                                 struct channel_gk20a *c)
1340 {
1341         u32 put;
1342         /* gp_put changed unexpectedly since last update? */
1343         put = gk20a_bar1_readl(g,
1344                c->userd_gpu_va + 4 * ram_userd_gp_put_w());
1345         if (c->gpfifo.put != put) {
1346                 /*TBD: BUG_ON/teardown on this*/
1347                 gk20a_err(dev_from_gk20a(g), "gp_put changed unexpectedly "
1348                           "since last update, channel put = %u, ram put = %u\n",
1349                           c->gpfifo.put, put);
1350                 c->gpfifo.put = put;
1351                 return false; /* surprise! */
1352         }
1353         return true; /* checked out ok */
1354 }
1355
1356 /* Update with this periodically to determine how the gpfifo is draining. */
1357 static inline u32 update_gp_get(struct gk20a *g,
1358                                 struct channel_gk20a *c)
1359 {
1360         u32 new_get = gk20a_bar1_readl(g,
1361                 c->userd_gpu_va + sizeof(u32) * ram_userd_gp_get_w());
1362         if (new_get < c->gpfifo.get)
1363                 c->gpfifo.wrap = !c->gpfifo.wrap;
1364         c->gpfifo.get = new_get;
1365         return new_get;
1366 }
1367
1368 static inline u32 gp_free_count(struct channel_gk20a *c)
1369 {
1370         return (c->gpfifo.entry_num - (c->gpfifo.put - c->gpfifo.get) - 1) %
1371                 c->gpfifo.entry_num;
1372 }
1373
1374 bool gk20a_channel_update_and_check_timeout(struct channel_gk20a *ch,
1375                 u32 timeout_delta_ms)
1376 {
1377         u32 gpfifo_get = update_gp_get(ch->g, ch);
1378         /* Count consequent timeout isr */
1379         if (gpfifo_get == ch->timeout_gpfifo_get) {
1380                 /* we didn't advance since previous channel timeout check */
1381                 ch->timeout_accumulated_ms += timeout_delta_ms;
1382         } else {
1383                 /* first timeout isr encountered */
1384                 ch->timeout_accumulated_ms = timeout_delta_ms;
1385         }
1386
1387         ch->timeout_gpfifo_get = gpfifo_get;
1388
1389         return ch->g->timeouts_enabled &&
1390                 ch->timeout_accumulated_ms > ch->timeout_ms_max;
1391 }
1392
1393 static u32 get_gp_free_count(struct channel_gk20a *c)
1394 {
1395         update_gp_get(c->g, c);
1396         return gp_free_count(c);
1397 }
1398
1399 static void trace_write_pushbuffer(struct channel_gk20a *c,
1400                                    struct nvgpu_gpfifo *g)
1401 {
1402         void *mem = NULL;
1403         unsigned int words;
1404         u64 offset;
1405         struct dma_buf *dmabuf = NULL;
1406
1407         if (gk20a_debug_trace_cmdbuf) {
1408                 u64 gpu_va = (u64)g->entry0 |
1409                         (u64)((u64)pbdma_gp_entry1_get_hi_v(g->entry1) << 32);
1410                 int err;
1411
1412                 words = pbdma_gp_entry1_length_v(g->entry1);
1413                 err = gk20a_vm_find_buffer(c->vm, gpu_va, &dmabuf, &offset);
1414                 if (!err)
1415                         mem = dma_buf_vmap(dmabuf);
1416         }
1417
1418         if (mem) {
1419                 u32 i;
1420                 /*
1421                  * Write in batches of 128 as there seems to be a limit
1422                  * of how much you can output to ftrace at once.
1423                  */
1424                 for (i = 0; i < words; i += 128U) {
1425                         trace_gk20a_push_cmdbuf(
1426                                 c->g->dev->name,
1427                                 0,
1428                                 min(words - i, 128U),
1429                                 offset + i * sizeof(u32),
1430                                 mem);
1431                 }
1432                 dma_buf_vunmap(dmabuf, mem);
1433         }
1434 }
1435
1436 static void trace_write_pushbuffer_range(struct channel_gk20a *c,
1437                                          struct nvgpu_gpfifo *g,
1438                                          struct nvgpu_submit_gpfifo_args *args,
1439                                          int offset,
1440                                          int count)
1441 {
1442         u32 size;
1443         int i;
1444         struct nvgpu_gpfifo *gp;
1445         bool gpfifo_allocated = false;
1446
1447         if (!gk20a_debug_trace_cmdbuf)
1448                 return;
1449
1450         if (!g && !args)
1451                 return;
1452
1453         if (!g) {
1454                 size = args->num_entries * sizeof(struct nvgpu_gpfifo);
1455                 if (size) {
1456                         g = nvgpu_alloc(size, false);
1457                         if (!g)
1458                                 return;
1459
1460                         if (copy_from_user(g,
1461                                 (void __user *)(uintptr_t)args->gpfifo, size)) {
1462                                 return;
1463                         }
1464                 }
1465                 gpfifo_allocated = true;
1466         }
1467
1468         gp = g + offset;
1469         for (i = 0; i < count; i++, gp++)
1470                 trace_write_pushbuffer(c, gp);
1471
1472         if (gpfifo_allocated)
1473                 nvgpu_free(g);
1474 }
1475
1476 static int gk20a_free_priv_cmdbuf(struct channel_gk20a *c,
1477                                         struct priv_cmd_entry *e)
1478 {
1479         struct priv_cmd_queue *q = &c->priv_cmd_q;
1480         u32 cmd_entry_start;
1481         struct device *d = dev_from_gk20a(c->g);
1482
1483         if (!e)
1484                 return 0;
1485
1486         cmd_entry_start = (u32)(e->ptr - (u32 *)q->mem.cpu_va);
1487         if ((q->get != cmd_entry_start) && cmd_entry_start != 0)
1488                 gk20a_err(d, "requests out-of-order, ch=%d\n", c->hw_chid);
1489
1490         q->get = (e->ptr - (u32 *)q->mem.cpu_va) + e->size;
1491         free_priv_cmdbuf(c, e);
1492
1493         return 0;
1494 }
1495
1496 static void gk20a_channel_schedule_job_clean_up(struct channel_gk20a *c)
1497 {
1498         mutex_lock(&c->clean_up.lock);
1499
1500         if (c->clean_up.scheduled) {
1501                 mutex_unlock(&c->clean_up.lock);
1502                 return;
1503         }
1504
1505         c->clean_up.scheduled = true;
1506         schedule_delayed_work(&c->clean_up.wq, 1);
1507
1508         mutex_unlock(&c->clean_up.lock);
1509 }
1510
1511 static void gk20a_channel_cancel_job_clean_up(struct channel_gk20a *c,
1512                                 bool wait_for_completion)
1513 {
1514         if (wait_for_completion)
1515                 cancel_delayed_work_sync(&c->clean_up.wq);
1516
1517         mutex_lock(&c->clean_up.lock);
1518         c->clean_up.scheduled = false;
1519         mutex_unlock(&c->clean_up.lock);
1520 }
1521
1522 static int gk20a_channel_add_job(struct channel_gk20a *c,
1523                                  struct gk20a_fence *pre_fence,
1524                                  struct gk20a_fence *post_fence,
1525                                  struct priv_cmd_entry *wait_cmd,
1526                                  struct priv_cmd_entry *incr_cmd,
1527                                  bool skip_buffer_refcounting)
1528 {
1529         struct vm_gk20a *vm = c->vm;
1530         struct channel_gk20a_job *job = NULL;
1531         struct mapped_buffer_node **mapped_buffers = NULL;
1532         int err = 0, num_mapped_buffers = 0;
1533
1534         /* job needs reference to this vm (released in channel_update) */
1535         gk20a_vm_get(vm);
1536
1537         if (!skip_buffer_refcounting) {
1538                 err = gk20a_vm_get_buffers(vm, &mapped_buffers,
1539                                         &num_mapped_buffers);
1540                 if (err) {
1541                         gk20a_vm_put(vm);
1542                         return err;
1543                 }
1544         }
1545
1546         job = kzalloc(sizeof(*job), GFP_KERNEL);
1547         if (!job) {
1548                 gk20a_vm_put_buffers(vm, mapped_buffers, num_mapped_buffers);
1549                 gk20a_vm_put(vm);
1550                 return -ENOMEM;
1551         }
1552
1553         /* put() is done in gk20a_channel_update() when the job is done */
1554         c = gk20a_channel_get(c);
1555
1556         if (c) {
1557                 job->num_mapped_buffers = num_mapped_buffers;
1558                 job->mapped_buffers = mapped_buffers;
1559                 job->pre_fence = gk20a_fence_get(pre_fence);
1560                 job->post_fence = gk20a_fence_get(post_fence);
1561                 job->wait_cmd = wait_cmd;
1562                 job->incr_cmd = incr_cmd;
1563
1564                 mutex_lock(&c->jobs_lock);
1565                 list_add_tail(&job->list, &c->jobs);
1566                 mutex_unlock(&c->jobs_lock);
1567         } else {
1568                 return -ETIMEDOUT;
1569         }
1570
1571         return 0;
1572 }
1573
1574 static void gk20a_channel_clean_up_jobs(struct work_struct *work)
1575 {
1576         struct channel_gk20a *c = container_of(to_delayed_work(work),
1577                         struct channel_gk20a, clean_up.wq);
1578         struct vm_gk20a *vm;
1579         struct channel_gk20a_job *job, *n;
1580         struct gk20a_platform *platform;
1581
1582         c = gk20a_channel_get(c);
1583         if (!c)
1584                 return;
1585
1586         vm = c->vm;
1587         platform = gk20a_get_platform(c->g->dev);
1588
1589         mutex_lock(&c->submit_lock);
1590
1591         if (c->g->power_on) {
1592                 /* gp_put check needs to be done inside submit lock */
1593                 update_gp_get(c->g, c);
1594                 check_gp_put(c->g, c);
1595         }
1596
1597         gk20a_channel_cancel_job_clean_up(c, false);
1598
1599         mutex_lock(&c->jobs_lock);
1600         list_for_each_entry_safe(job, n, &c->jobs, list) {
1601                 struct gk20a *g = c->g;
1602
1603                 bool completed = gk20a_fence_is_expired(job->post_fence);
1604                 if (!completed)
1605                         break;
1606
1607                 if (c->sync)
1608                         c->sync->signal_timeline(c->sync);
1609
1610                 if (job->num_mapped_buffers)
1611                         gk20a_vm_put_buffers(vm, job->mapped_buffers,
1612                                 job->num_mapped_buffers);
1613
1614                 /* Close the fences (this will unref the semaphores and release
1615                  * them to the pool). */
1616                 gk20a_fence_put(job->pre_fence);
1617                 gk20a_fence_put(job->post_fence);
1618
1619                 /* Free the private command buffers (wait_cmd first and
1620                  * then incr_cmd i.e. order of allocation) */
1621                 gk20a_free_priv_cmdbuf(c, job->wait_cmd);
1622                 gk20a_free_priv_cmdbuf(c, job->incr_cmd);
1623
1624                 /* job is done. release its vm reference (taken in add_job) */
1625                 gk20a_vm_put(vm);
1626                 /* another bookkeeping taken in add_job. caller must hold a ref
1627                  * so this wouldn't get freed here. */
1628                 gk20a_channel_put(c);
1629
1630                 list_del_init(&job->list);
1631                 kfree(job);
1632                 gk20a_idle(g->dev);
1633         }
1634
1635         /*
1636          * If job list is empty then channel is idle and we can free
1637          * the syncpt here (given aggressive_destroy flag is set)
1638          * Note: check if last submit is complete before destroying
1639          * the sync resource
1640          */
1641         if (list_empty(&c->jobs)) {
1642                 mutex_lock(&c->sync_lock);
1643                 if (c->sync && platform->aggressive_sync_destroy &&
1644                           gk20a_fence_is_expired(c->last_submit.post_fence)) {
1645                         c->sync->destroy(c->sync);
1646                         c->sync = NULL;
1647                 }
1648                 mutex_unlock(&c->sync_lock);
1649         }
1650         mutex_unlock(&c->jobs_lock);
1651         mutex_unlock(&c->submit_lock);
1652
1653         if (c->update_fn)
1654                 schedule_work(&c->update_fn_work);
1655
1656         gk20a_channel_put(c);
1657 }
1658
1659 void gk20a_channel_update(struct channel_gk20a *c, int nr_completed)
1660 {
1661         c = gk20a_channel_get(c);
1662         if (!c)
1663                 return;
1664
1665         update_gp_get(c->g, c);
1666         wake_up(&c->submit_wq);
1667
1668         trace_gk20a_channel_update(c->hw_chid);
1669         gk20a_channel_schedule_job_clean_up(c);
1670
1671         gk20a_channel_put(c);
1672 }
1673
1674 int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
1675                                 struct nvgpu_gpfifo *gpfifo,
1676                                 struct nvgpu_submit_gpfifo_args *args,
1677                                 u32 num_entries,
1678                                 u32 flags,
1679                                 struct nvgpu_fence *fence,
1680                                 struct gk20a_fence **fence_out,
1681                                 bool force_need_sync_fence)
1682 {
1683         struct gk20a *g = c->g;
1684         struct device *d = dev_from_gk20a(g);
1685         int err = 0;
1686         int start, end;
1687         int wait_fence_fd = -1;
1688         struct priv_cmd_entry *wait_cmd = NULL;
1689         struct priv_cmd_entry *incr_cmd = NULL;
1690         struct gk20a_fence *pre_fence = NULL;
1691         struct gk20a_fence *post_fence = NULL;
1692         /* we might need two extra gpfifo entries - one for pre fence
1693          * and one for post fence. */
1694         const int extra_entries = 2;
1695         bool need_wfi = !(flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SUPPRESS_WFI);
1696         bool skip_buffer_refcounting = (flags &
1697                         NVGPU_SUBMIT_GPFIFO_FLAGS_SKIP_BUFFER_REFCOUNTING);
1698         bool need_sync_fence = false;
1699
1700         /*
1701          * If user wants to allocate sync_fence_fd always, then respect that;
1702          * otherwise, allocate sync_fence_fd based on user flags only
1703          */
1704         if (force_need_sync_fence)
1705                 need_sync_fence = true;
1706
1707         if (c->has_timedout)
1708                 return -ETIMEDOUT;
1709
1710         /* fifo not large enough for request. Return error immediately.
1711          * Kernel can insert gpfifo entries before and after user gpfifos.
1712          * So, add extra_entries in user request. Also, HW with fifo size N
1713          * can accept only N-1 entreis and so the below condition */
1714         if (c->gpfifo.entry_num - 1 < num_entries + extra_entries) {
1715                 gk20a_err(d, "not enough gpfifo space allocated");
1716                 return -ENOMEM;
1717         }
1718
1719         if (!gpfifo && !args)
1720                 return -EINVAL;
1721
1722         if ((flags & (NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT |
1723                       NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET)) &&
1724             !fence)
1725                 return -EINVAL;
1726
1727         /* an address space needs to have been bound at this point. */
1728         if (!gk20a_channel_as_bound(c)) {
1729                 gk20a_err(d,
1730                             "not bound to an address space at time of gpfifo"
1731                             " submission.");
1732                 return -EINVAL;
1733         }
1734
1735 #ifdef CONFIG_DEBUG_FS
1736         /* update debug settings */
1737         if (g->ops.ltc.sync_debugfs)
1738                 g->ops.ltc.sync_debugfs(g);
1739 #endif
1740
1741         gk20a_dbg_info("channel %d", c->hw_chid);
1742
1743         /* gk20a_channel_update releases this ref. */
1744         err = gk20a_busy(g->dev);
1745         if (err) {
1746                 gk20a_err(d, "failed to host gk20a to submit gpfifo");
1747                 return err;
1748         }
1749
1750         trace_gk20a_channel_submit_gpfifo(c->g->dev->name,
1751                                           c->hw_chid,
1752                                           num_entries,
1753                                           flags,
1754                                           fence ? fence->id : 0,
1755                                           fence ? fence->value : 0);
1756
1757         gk20a_dbg_info("pre-submit put %d, get %d, size %d",
1758                 c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num);
1759
1760         /* Make sure we have enough space for gpfifo entries. If not,
1761          * wait for signals from completed submits */
1762         if (gp_free_count(c) < num_entries + extra_entries) {
1763                 /* we can get here via locked ioctl and other paths too */
1764                 int locked_path = mutex_is_locked(&c->ioctl_lock);
1765                 if (locked_path)
1766                         mutex_unlock(&c->ioctl_lock);
1767
1768                 trace_gk20a_gpfifo_submit_wait_for_space(c->g->dev->name);
1769                 err = wait_event_interruptible(c->submit_wq,
1770                         get_gp_free_count(c) >= num_entries + extra_entries ||
1771                         c->has_timedout);
1772                 trace_gk20a_gpfifo_submit_wait_for_space_done(c->g->dev->name);
1773
1774                 if (locked_path)
1775                         mutex_lock(&c->ioctl_lock);
1776         }
1777
1778         if (c->has_timedout) {
1779                 err = -ETIMEDOUT;
1780                 goto clean_up;
1781         }
1782
1783         if (err) {
1784                 gk20a_err(d, "timeout waiting for gpfifo space");
1785                 err = -EAGAIN;
1786                 goto clean_up;
1787         }
1788
1789         mutex_lock(&c->submit_lock);
1790
1791         mutex_lock(&c->sync_lock);
1792         if (!c->sync) {
1793                 c->sync = gk20a_channel_sync_create(c);
1794                 if (!c->sync) {
1795                         err = -ENOMEM;
1796                         mutex_unlock(&c->submit_lock);
1797                         goto clean_up;
1798                 }
1799                 if (g->ops.fifo.resetup_ramfc)
1800                         err = g->ops.fifo.resetup_ramfc(c);
1801                 if (err)
1802                         return err;
1803         }
1804         mutex_unlock(&c->sync_lock);
1805
1806         /*
1807          * optionally insert syncpt wait in the beginning of gpfifo submission
1808          * when user requested and the wait hasn't expired.
1809          * validate that the id makes sense, elide if not
1810          * the only reason this isn't being unceremoniously killed is to
1811          * keep running some tests which trigger this condition
1812          */
1813         if (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT) {
1814                 if (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE) {
1815                         wait_fence_fd = fence->id;
1816                         err = c->sync->wait_fd(c->sync, wait_fence_fd,
1817                                         &wait_cmd, &pre_fence);
1818                 } else {
1819                         err = c->sync->wait_syncpt(c->sync, fence->id,
1820                                         fence->value, &wait_cmd, &pre_fence);
1821                 }
1822         }
1823         if (err) {
1824                 mutex_unlock(&c->submit_lock);
1825                 goto clean_up;
1826         }
1827
1828         if ((flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET) &&
1829                         (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE))
1830                 need_sync_fence = true;
1831
1832         /* always insert syncpt increment at end of gpfifo submission
1833            to keep track of method completion for idle railgating */
1834         if (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET)
1835                 err = c->sync->incr_user(c->sync, wait_fence_fd, &incr_cmd,
1836                                  &post_fence, need_wfi, need_sync_fence);
1837         else
1838                 err = c->sync->incr(c->sync, &incr_cmd,
1839                                     &post_fence, need_sync_fence);
1840         if (err) {
1841                 mutex_unlock(&c->submit_lock);
1842                 goto clean_up;
1843         }
1844
1845         if (wait_cmd) {
1846                 ((struct gpfifo *)(c->gpfifo.mem.cpu_va))[c->gpfifo.put].entry0 =
1847                         u64_lo32(wait_cmd->gva);
1848                 ((struct gpfifo *)(c->gpfifo.mem.cpu_va))[c->gpfifo.put].entry1 =
1849                         u64_hi32(wait_cmd->gva) |
1850                         pbdma_gp_entry1_length_f(wait_cmd->size);
1851                 trace_gk20a_push_cmdbuf(c->g->dev->name,
1852                         0, wait_cmd->size, 0, wait_cmd->ptr);
1853
1854                 c->gpfifo.put = (c->gpfifo.put + 1) &
1855                         (c->gpfifo.entry_num - 1);
1856
1857                 /* save gp_put */
1858                 wait_cmd->gp_put = c->gpfifo.put;
1859         }
1860
1861         /*
1862          * Copy source gpfifo entries into the gpfifo ring buffer,
1863          * potentially splitting into two memcpies to handle the
1864          * ring buffer wrap-around case.
1865          */
1866         start = c->gpfifo.put;
1867         end = start + num_entries;
1868
1869         if (gpfifo) {
1870                 if (end > c->gpfifo.entry_num) {
1871                         int length0 = c->gpfifo.entry_num - start;
1872                         int length1 = num_entries - length0;
1873
1874                         memcpy((struct gpfifo *)c->gpfifo.mem.cpu_va + start,
1875                                 gpfifo,
1876                                 length0 * sizeof(*gpfifo));
1877
1878                         memcpy((struct gpfifo *)c->gpfifo.mem.cpu_va,
1879                                 gpfifo + length0,
1880                                 length1 * sizeof(*gpfifo));
1881
1882                         trace_write_pushbuffer_range(c, gpfifo, NULL,
1883                                         0, length0);
1884                         trace_write_pushbuffer_range(c, gpfifo, NULL,
1885                                         length0, length1);
1886                 } else {
1887                         memcpy((struct gpfifo *)c->gpfifo.mem.cpu_va + start,
1888                                 gpfifo,
1889                                 num_entries * sizeof(*gpfifo));
1890
1891                         trace_write_pushbuffer_range(c, gpfifo, NULL,
1892                                         0, num_entries);
1893                 }
1894         } else {
1895                 struct nvgpu_gpfifo __user *user_gpfifo =
1896                         (struct nvgpu_gpfifo __user *)(uintptr_t)args->gpfifo;
1897                 if (end > c->gpfifo.entry_num) {
1898                         int length0 = c->gpfifo.entry_num - start;
1899                         int length1 = num_entries - length0;
1900
1901                         err = copy_from_user((struct gpfifo *)c->gpfifo.mem.cpu_va + start,
1902                                 user_gpfifo,
1903                                 length0 * sizeof(*user_gpfifo));
1904                         if (err) {
1905                                 mutex_unlock(&c->submit_lock);
1906                                 goto clean_up;
1907                         }
1908
1909                         err = copy_from_user((struct gpfifo *)c->gpfifo.mem.cpu_va,
1910                                 user_gpfifo + length0,
1911                                 length1 * sizeof(*user_gpfifo));
1912                         if (err) {
1913                                 mutex_unlock(&c->submit_lock);
1914                                 goto clean_up;
1915                         }
1916
1917                         trace_write_pushbuffer_range(c, NULL, args,
1918                                         0, length0);
1919                         trace_write_pushbuffer_range(c, NULL, args,
1920                                         length0, length1);
1921                 } else {
1922                         err = copy_from_user((struct gpfifo *)c->gpfifo.mem.cpu_va + start,
1923                                 user_gpfifo,
1924                                 num_entries * sizeof(*user_gpfifo));
1925                         if (err) {
1926                                 mutex_unlock(&c->submit_lock);
1927                                 goto clean_up;
1928                         }
1929
1930                         trace_write_pushbuffer_range(c, NULL, args,
1931                                         0, num_entries);
1932                 }
1933         }
1934
1935         c->gpfifo.put = (c->gpfifo.put + num_entries) &
1936                 (c->gpfifo.entry_num - 1);
1937
1938         if (incr_cmd) {
1939                 ((struct gpfifo *)(c->gpfifo.mem.cpu_va))[c->gpfifo.put].entry0 =
1940                         u64_lo32(incr_cmd->gva);
1941                 ((struct gpfifo *)(c->gpfifo.mem.cpu_va))[c->gpfifo.put].entry1 =
1942                         u64_hi32(incr_cmd->gva) |
1943                         pbdma_gp_entry1_length_f(incr_cmd->size);
1944                 trace_gk20a_push_cmdbuf(c->g->dev->name,
1945                         0, incr_cmd->size, 0, incr_cmd->ptr);
1946
1947                 c->gpfifo.put = (c->gpfifo.put + 1) &
1948                         (c->gpfifo.entry_num - 1);
1949
1950                 /* save gp_put */
1951                 incr_cmd->gp_put = c->gpfifo.put;
1952         }
1953
1954         gk20a_fence_put(c->last_submit.pre_fence);
1955         gk20a_fence_put(c->last_submit.post_fence);
1956         c->last_submit.pre_fence = pre_fence;
1957         c->last_submit.post_fence = post_fence;
1958         if (fence_out)
1959                 *fence_out = gk20a_fence_get(post_fence);
1960
1961         /* TODO! Check for errors... */
1962         gk20a_channel_add_job(c, pre_fence, post_fence,
1963                                 wait_cmd, incr_cmd,
1964                                 skip_buffer_refcounting);
1965
1966         c->cmds_pending = true;
1967         gk20a_bar1_writel(g,
1968                 c->userd_gpu_va + 4 * ram_userd_gp_put_w(),
1969                 c->gpfifo.put);
1970
1971         mutex_unlock(&c->submit_lock);
1972
1973         trace_gk20a_channel_submitted_gpfifo(c->g->dev->name,
1974                                              c->hw_chid,
1975                                              num_entries,
1976                                              flags,
1977                                              post_fence->syncpt_id,
1978                                              post_fence->syncpt_value);
1979
1980         gk20a_dbg_info("post-submit put %d, get %d, size %d",
1981                 c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num);
1982
1983         gk20a_dbg_fn("done");
1984         return err;
1985
1986 clean_up:
1987         gk20a_err(d, "fail");
1988         free_priv_cmdbuf(c, wait_cmd);
1989         free_priv_cmdbuf(c, incr_cmd);
1990         gk20a_fence_put(pre_fence);
1991         gk20a_fence_put(post_fence);
1992         gk20a_idle(g->dev);
1993         return err;
1994 }
1995
1996 int gk20a_init_channel_support(struct gk20a *g, u32 chid)
1997 {
1998         struct channel_gk20a *c = g->fifo.channel+chid;
1999         c->g = NULL;
2000         c->hw_chid = chid;
2001         c->bound = false;
2002         spin_lock_init(&c->ref_obtain_lock);
2003         atomic_set(&c->ref_count, 0);
2004         c->referenceable = false;
2005         init_waitqueue_head(&c->ref_count_dec_wq);
2006         mutex_init(&c->ioctl_lock);
2007         mutex_init(&c->jobs_lock);
2008         mutex_init(&c->submit_lock);
2009         INIT_DELAYED_WORK(&c->clean_up.wq, gk20a_channel_clean_up_jobs);
2010         mutex_init(&c->clean_up.lock);
2011         mutex_init(&c->sync_lock);
2012         INIT_LIST_HEAD(&c->jobs);
2013 #if defined(CONFIG_GK20A_CYCLE_STATS)
2014         mutex_init(&c->cyclestate.cyclestate_buffer_mutex);
2015         mutex_init(&c->cs_client_mutex);
2016 #endif
2017         INIT_LIST_HEAD(&c->dbg_s_list);
2018         mutex_init(&c->dbg_s_lock);
2019         list_add(&c->free_chs, &g->fifo.free_chs);
2020
2021         return 0;
2022 }
2023
2024 int gk20a_channel_finish(struct channel_gk20a *ch, unsigned long timeout)
2025 {
2026         int err = 0;
2027         struct gk20a_fence *fence = ch->last_submit.post_fence;
2028
2029         if (!ch->cmds_pending)
2030                 return 0;
2031
2032         /* Do not wait for a timedout channel */
2033         if (ch->has_timedout)
2034                 return -ETIMEDOUT;
2035
2036         gk20a_dbg_fn("waiting for channel to finish thresh:%d sema:%p",
2037                      fence->syncpt_value, fence->semaphore);
2038
2039         err = gk20a_fence_wait(fence, timeout);
2040         if (WARN_ON(err))
2041                 dev_warn(dev_from_gk20a(ch->g),
2042                        "timed out waiting for gk20a channel to finish");
2043         else
2044                 ch->cmds_pending = false;
2045
2046         return err;
2047 }
2048
2049 static int gk20a_channel_wait_semaphore(struct channel_gk20a *ch,
2050                                         ulong id, u32 offset,
2051                                         u32 payload, long timeout)
2052 {
2053         struct platform_device *pdev = ch->g->dev;
2054         struct dma_buf *dmabuf;
2055         void *data;
2056         u32 *semaphore;
2057         int ret = 0;
2058         long remain;
2059
2060         /* do not wait if channel has timed out */
2061         if (ch->has_timedout)
2062                 return -ETIMEDOUT;
2063
2064         dmabuf = dma_buf_get(id);
2065         if (IS_ERR(dmabuf)) {
2066                 gk20a_err(&pdev->dev, "invalid notifier nvmap handle 0x%lx",
2067                            id);
2068                 return -EINVAL;
2069         }
2070
2071         data = dma_buf_kmap(dmabuf, offset >> PAGE_SHIFT);
2072         if (!data) {
2073                 gk20a_err(&pdev->dev, "failed to map notifier memory");
2074                 ret = -EINVAL;
2075                 goto cleanup_put;
2076         }
2077
2078         semaphore = data + (offset & ~PAGE_MASK);
2079
2080         remain = wait_event_interruptible_timeout(
2081                         ch->semaphore_wq,
2082                         *semaphore == payload || ch->has_timedout,
2083                         timeout);
2084
2085         if (remain == 0 && *semaphore != payload)
2086                 ret = -ETIMEDOUT;
2087         else if (remain < 0)
2088                 ret = remain;
2089
2090         dma_buf_kunmap(dmabuf, offset >> PAGE_SHIFT, data);
2091 cleanup_put:
2092         dma_buf_put(dmabuf);
2093         return ret;
2094 }
2095
2096 static int gk20a_channel_wait(struct channel_gk20a *ch,
2097                               struct nvgpu_wait_args *args)
2098 {
2099         struct device *d = dev_from_gk20a(ch->g);
2100         struct dma_buf *dmabuf;
2101         struct notification *notif;
2102         struct timespec tv;
2103         u64 jiffies;
2104         ulong id;
2105         u32 offset;
2106         unsigned long timeout;
2107         int remain, ret = 0;
2108
2109         gk20a_dbg_fn("");
2110
2111         if (ch->has_timedout)
2112                 return -ETIMEDOUT;
2113
2114         if (args->timeout == NVGPU_NO_TIMEOUT)
2115                 timeout = MAX_SCHEDULE_TIMEOUT;
2116         else
2117                 timeout = (u32)msecs_to_jiffies(args->timeout);
2118
2119         switch (args->type) {
2120         case NVGPU_WAIT_TYPE_NOTIFIER:
2121                 id = args->condition.notifier.dmabuf_fd;
2122                 offset = args->condition.notifier.offset;
2123
2124                 dmabuf = dma_buf_get(id);
2125                 if (IS_ERR(dmabuf)) {
2126                         gk20a_err(d, "invalid notifier nvmap handle 0x%lx",
2127                                    id);
2128                         return -EINVAL;
2129                 }
2130
2131                 notif = dma_buf_vmap(dmabuf);
2132                 if (!notif) {
2133                         gk20a_err(d, "failed to map notifier memory");
2134                         return -ENOMEM;
2135                 }
2136
2137                 notif = (struct notification *)((uintptr_t)notif + offset);
2138
2139                 /* user should set status pending before
2140                  * calling this ioctl */
2141                 remain = wait_event_interruptible_timeout(
2142                                 ch->notifier_wq,
2143                                 notif->status == 0 || ch->has_timedout,
2144                                 timeout);
2145
2146                 if (remain == 0 && notif->status != 0) {
2147                         ret = -ETIMEDOUT;
2148                         goto notif_clean_up;
2149                 } else if (remain < 0) {
2150                         ret = -EINTR;
2151                         goto notif_clean_up;
2152                 }
2153
2154                 /* TBD: fill in correct information */
2155                 jiffies = get_jiffies_64();
2156                 jiffies_to_timespec(jiffies, &tv);
2157                 notif->timestamp.nanoseconds[0] = tv.tv_nsec;
2158                 notif->timestamp.nanoseconds[1] = tv.tv_sec;
2159                 notif->info32 = 0xDEADBEEF; /* should be object name */
2160                 notif->info16 = ch->hw_chid; /* should be method offset */
2161
2162 notif_clean_up:
2163                 dma_buf_vunmap(dmabuf, notif);
2164                 return ret;
2165
2166         case NVGPU_WAIT_TYPE_SEMAPHORE:
2167                 ret = gk20a_channel_wait_semaphore(ch,
2168                                 args->condition.semaphore.dmabuf_fd,
2169                                 args->condition.semaphore.offset,
2170                                 args->condition.semaphore.payload,
2171                                 timeout);
2172
2173                 break;
2174
2175         default:
2176                 ret = -EINVAL;
2177                 break;
2178         }
2179
2180         return ret;
2181 }
2182
2183 /* poll events for semaphores */
2184
2185 static void gk20a_channel_events_enable(struct channel_gk20a_poll_events *ev)
2186 {
2187         gk20a_dbg_fn("");
2188
2189         mutex_lock(&ev->lock);
2190
2191         ev->events_enabled = true;
2192         ev->num_pending_events = 0;
2193
2194         mutex_unlock(&ev->lock);
2195 }
2196
2197 static void gk20a_channel_events_disable(struct channel_gk20a_poll_events *ev)
2198 {
2199         gk20a_dbg_fn("");
2200
2201         mutex_lock(&ev->lock);
2202
2203         ev->events_enabled = false;
2204         ev->num_pending_events = 0;
2205
2206         mutex_unlock(&ev->lock);
2207 }
2208
2209 static void gk20a_channel_events_clear(struct channel_gk20a_poll_events *ev)
2210 {
2211         gk20a_dbg_fn("");
2212
2213         mutex_lock(&ev->lock);
2214
2215         if (ev->events_enabled &&
2216                         ev->num_pending_events > 0)
2217                 ev->num_pending_events--;
2218
2219         mutex_unlock(&ev->lock);
2220 }
2221
2222 static int gk20a_channel_events_ctrl(struct channel_gk20a *ch,
2223                           struct nvgpu_channel_events_ctrl_args *args)
2224 {
2225         int ret = 0;
2226
2227         gk20a_dbg(gpu_dbg_fn | gpu_dbg_info,
2228                         "channel events ctrl cmd %d", args->cmd);
2229
2230         switch (args->cmd) {
2231         case NVGPU_IOCTL_CHANNEL_EVENTS_CTRL_CMD_ENABLE:
2232                 gk20a_channel_events_enable(&ch->poll_events);
2233                 break;
2234
2235         case NVGPU_IOCTL_CHANNEL_EVENTS_CTRL_CMD_DISABLE:
2236                 gk20a_channel_events_disable(&ch->poll_events);
2237                 break;
2238
2239         case NVGPU_IOCTL_CHANNEL_EVENTS_CTRL_CMD_CLEAR:
2240                 gk20a_channel_events_clear(&ch->poll_events);
2241                 break;
2242
2243         default:
2244                 gk20a_err(dev_from_gk20a(ch->g),
2245                            "unrecognized channel events ctrl cmd: 0x%x",
2246                            args->cmd);
2247                 ret = -EINVAL;
2248                 break;
2249         }
2250
2251         return ret;
2252 }
2253
2254 void gk20a_channel_event(struct channel_gk20a *ch)
2255 {
2256         mutex_lock(&ch->poll_events.lock);
2257
2258         if (ch->poll_events.events_enabled) {
2259                 gk20a_dbg_info("posting event on channel id %d",
2260                                 ch->hw_chid);
2261                 gk20a_dbg_info("%d channel events pending",
2262                                 ch->poll_events.num_pending_events);
2263
2264                 ch->poll_events.num_pending_events++;
2265                 /* not waking up here, caller does that */
2266         }
2267
2268         mutex_unlock(&ch->poll_events.lock);
2269 }
2270
2271 unsigned int gk20a_channel_poll(struct file *filep, poll_table *wait)
2272 {
2273         unsigned int mask = 0;
2274         struct channel_gk20a *ch = filep->private_data;
2275
2276         gk20a_dbg(gpu_dbg_fn | gpu_dbg_info, "");
2277
2278         poll_wait(filep, &ch->semaphore_wq, wait);
2279
2280         mutex_lock(&ch->poll_events.lock);
2281
2282         if (ch->poll_events.events_enabled &&
2283                         ch->poll_events.num_pending_events > 0) {
2284                 gk20a_dbg_info("found pending event on channel id %d",
2285                                 ch->hw_chid);
2286                 gk20a_dbg_info("%d channel events pending",
2287                                 ch->poll_events.num_pending_events);
2288                 mask = (POLLPRI | POLLIN);
2289         }
2290
2291         mutex_unlock(&ch->poll_events.lock);
2292
2293         return mask;
2294 }
2295
2296 static int gk20a_channel_set_priority(struct channel_gk20a *ch,
2297                 u32 priority)
2298 {
2299         u32 timeslice_timeout;
2300         /* set priority of graphics channel */
2301         switch (priority) {
2302         case NVGPU_PRIORITY_LOW:
2303                 /* 64 << 3 = 512us */
2304                 timeslice_timeout = 64;
2305                 break;
2306         case NVGPU_PRIORITY_MEDIUM:
2307                 /* 128 << 3 = 1024us */
2308                 timeslice_timeout = 128;
2309                 break;
2310         case NVGPU_PRIORITY_HIGH:
2311                 /* 255 << 3 = 2048us */
2312                 timeslice_timeout = 255;
2313                 break;
2314         default:
2315                 pr_err("Unsupported priority");
2316                 return -EINVAL;
2317         }
2318         channel_gk20a_set_schedule_params(ch,
2319                         timeslice_timeout);
2320         return 0;
2321 }
2322
2323 static int gk20a_channel_zcull_bind(struct channel_gk20a *ch,
2324                             struct nvgpu_zcull_bind_args *args)
2325 {
2326         struct gk20a *g = ch->g;
2327         struct gr_gk20a *gr = &g->gr;
2328
2329         gk20a_dbg_fn("");
2330
2331         return g->ops.gr.bind_ctxsw_zcull(g, gr, ch,
2332                                 args->gpu_va, args->mode);
2333 }
2334
2335 /* in this context the "channel" is the host1x channel which
2336  * maps to *all* gk20a channels */
2337 int gk20a_channel_suspend(struct gk20a *g)
2338 {
2339         struct fifo_gk20a *f = &g->fifo;
2340         u32 chid;
2341         bool channels_in_use = false;
2342         int err;
2343
2344         gk20a_dbg_fn("");
2345
2346         /* wait for engine idle */
2347         err = g->ops.fifo.wait_engine_idle(g);
2348         if (err)
2349                 return err;
2350
2351         for (chid = 0; chid < f->num_channels; chid++) {
2352                 struct channel_gk20a *ch = &f->channel[chid];
2353                 if (gk20a_channel_get(ch)) {
2354                         gk20a_dbg_info("suspend channel %d", chid);
2355                         /* disable channel */
2356                         g->ops.fifo.disable_channel(ch);
2357                         /* preempt the channel */
2358                         gk20a_fifo_preempt(ch->g, ch);
2359                         /* wait for channel update notifiers */
2360                         if (ch->update_fn &&
2361                                         work_pending(&ch->update_fn_work))
2362                                 flush_work(&ch->update_fn_work);
2363                         gk20a_channel_cancel_job_clean_up(ch, true);
2364
2365                         channels_in_use = true;
2366
2367                         gk20a_channel_put(ch);
2368                 }
2369         }
2370
2371         if (channels_in_use) {
2372                 g->ops.fifo.update_runlist(g, 0, ~0, false, true);
2373
2374                 for (chid = 0; chid < f->num_channels; chid++) {
2375                         if (gk20a_channel_get(&f->channel[chid])) {
2376                                 g->ops.fifo.unbind_channel(&f->channel[chid]);
2377                                 gk20a_channel_put(&f->channel[chid]);
2378                         }
2379                 }
2380         }
2381
2382         gk20a_dbg_fn("done");
2383         return 0;
2384 }
2385
2386 int gk20a_channel_resume(struct gk20a *g)
2387 {
2388         struct fifo_gk20a *f = &g->fifo;
2389         u32 chid;
2390         bool channels_in_use = false;
2391
2392         gk20a_dbg_fn("");
2393
2394         for (chid = 0; chid < f->num_channels; chid++) {
2395                 if (gk20a_channel_get(&f->channel[chid])) {
2396                         gk20a_dbg_info("resume channel %d", chid);
2397                         g->ops.fifo.bind_channel(&f->channel[chid]);
2398                         channels_in_use = true;
2399                         gk20a_channel_put(&f->channel[chid]);
2400                 }
2401         }
2402
2403         if (channels_in_use)
2404                 g->ops.fifo.update_runlist(g, 0, ~0, true, true);
2405
2406         gk20a_dbg_fn("done");
2407         return 0;
2408 }
2409
2410 void gk20a_channel_semaphore_wakeup(struct gk20a *g)
2411 {
2412         struct fifo_gk20a *f = &g->fifo;
2413         u32 chid;
2414
2415         gk20a_dbg_fn("");
2416
2417         for (chid = 0; chid < f->num_channels; chid++) {
2418                 struct channel_gk20a *c = g->fifo.channel+chid;
2419                 if (gk20a_channel_get(c)) {
2420                         gk20a_channel_event(c);
2421                         wake_up_interruptible_all(&c->semaphore_wq);
2422                         gk20a_channel_update(c, 0);
2423                         gk20a_channel_put(c);
2424                 }
2425         }
2426 }
2427
2428 static int gk20a_ioctl_channel_submit_gpfifo(
2429         struct channel_gk20a *ch,
2430         struct nvgpu_submit_gpfifo_args *args)
2431 {
2432         struct gk20a_fence *fence_out;
2433         int ret = 0;
2434
2435         gk20a_dbg_fn("");
2436
2437         if (ch->has_timedout)
2438                 return -ETIMEDOUT;
2439
2440         ret = gk20a_submit_channel_gpfifo(ch, NULL, args, args->num_entries,
2441                                           args->flags, &args->fence,
2442                                           &fence_out, false);
2443
2444         if (ret)
2445                 goto clean_up;
2446
2447         /* Convert fence_out to something we can pass back to user space. */
2448         if (args->flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET) {
2449                 if (args->flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE) {
2450                         int fd = gk20a_fence_install_fd(fence_out);
2451                         if (fd < 0)
2452                                 ret = fd;
2453                         else
2454                                 args->fence.id = fd;
2455                 } else {
2456                         args->fence.id = fence_out->syncpt_id;
2457                         args->fence.value = fence_out->syncpt_value;
2458                 }
2459         }
2460         gk20a_fence_put(fence_out);
2461
2462 clean_up:
2463         return ret;
2464 }
2465
2466 void gk20a_init_channel(struct gpu_ops *gops)
2467 {
2468         gops->fifo.bind_channel = channel_gk20a_bind;
2469         gops->fifo.unbind_channel = channel_gk20a_unbind;
2470         gops->fifo.disable_channel = channel_gk20a_disable;
2471         gops->fifo.alloc_inst = channel_gk20a_alloc_inst;
2472         gops->fifo.free_inst = channel_gk20a_free_inst;
2473         gops->fifo.setup_ramfc = channel_gk20a_setup_ramfc;
2474 }
2475
2476 long gk20a_channel_ioctl(struct file *filp,
2477         unsigned int cmd, unsigned long arg)
2478 {
2479         struct channel_gk20a *ch = filp->private_data;
2480         struct platform_device *dev = ch->g->dev;
2481         u8 buf[NVGPU_IOCTL_CHANNEL_MAX_ARG_SIZE];
2482         int err = 0;
2483
2484         gk20a_dbg_fn("start %d", _IOC_NR(cmd));
2485
2486         if ((_IOC_TYPE(cmd) != NVGPU_IOCTL_MAGIC) ||
2487                 (_IOC_NR(cmd) == 0) ||
2488                 (_IOC_NR(cmd) > NVGPU_IOCTL_CHANNEL_LAST) ||
2489                 (_IOC_SIZE(cmd) > NVGPU_IOCTL_CHANNEL_MAX_ARG_SIZE))
2490                 return -EINVAL;
2491
2492         if (_IOC_DIR(cmd) & _IOC_WRITE) {
2493                 if (copy_from_user(buf, (void __user *)arg, _IOC_SIZE(cmd)))
2494                         return -EFAULT;
2495         }
2496
2497         /* take a ref or return timeout if channel refs can't be taken */
2498         ch = gk20a_channel_get(ch);
2499         if (!ch)
2500                 return -ETIMEDOUT;
2501
2502         /* protect our sanity for threaded userspace - most of the channel is
2503          * not thread safe */
2504         mutex_lock(&ch->ioctl_lock);
2505
2506         /* this ioctl call keeps a ref to the file which keeps a ref to the
2507          * channel */
2508
2509         switch (cmd) {
2510         case NVGPU_IOCTL_CHANNEL_OPEN:
2511                 err = gk20a_channel_open_ioctl(ch->g,
2512                         (struct nvgpu_channel_open_args *)buf);
2513                 break;
2514         case NVGPU_IOCTL_CHANNEL_SET_NVMAP_FD:
2515                 break;
2516         case NVGPU_IOCTL_CHANNEL_ALLOC_OBJ_CTX:
2517                 err = gk20a_busy(dev);
2518                 if (err) {
2519                         dev_err(&dev->dev,
2520                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2521                                 __func__, cmd);
2522                         break;
2523                 }
2524                 err = ch->g->ops.gr.alloc_obj_ctx(ch,
2525                                 (struct nvgpu_alloc_obj_ctx_args *)buf);
2526                 gk20a_idle(dev);
2527                 break;
2528         case NVGPU_IOCTL_CHANNEL_FREE_OBJ_CTX:
2529                 err = gk20a_busy(dev);
2530                 if (err) {
2531                         dev_err(&dev->dev,
2532                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2533                                 __func__, cmd);
2534                         break;
2535                 }
2536                 err = ch->g->ops.gr.free_obj_ctx(ch,
2537                                 (struct nvgpu_free_obj_ctx_args *)buf);
2538                 gk20a_idle(dev);
2539                 break;
2540         case NVGPU_IOCTL_CHANNEL_ALLOC_GPFIFO:
2541                 err = gk20a_busy(dev);
2542                 if (err) {
2543                         dev_err(&dev->dev,
2544                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2545                                 __func__, cmd);
2546                         break;
2547                 }
2548                 err = gk20a_alloc_channel_gpfifo(ch,
2549                                 (struct nvgpu_alloc_gpfifo_args *)buf);
2550                 gk20a_idle(dev);
2551                 break;
2552         case NVGPU_IOCTL_CHANNEL_SUBMIT_GPFIFO:
2553                 err = gk20a_ioctl_channel_submit_gpfifo(ch,
2554                                 (struct nvgpu_submit_gpfifo_args *)buf);
2555                 break;
2556         case NVGPU_IOCTL_CHANNEL_WAIT:
2557                 err = gk20a_busy(dev);
2558                 if (err) {
2559                         dev_err(&dev->dev,
2560                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2561                                 __func__, cmd);
2562                         break;
2563                 }
2564
2565                 /* waiting is thread-safe, not dropping this mutex could
2566                  * deadlock in certain conditions */
2567                 mutex_unlock(&ch->ioctl_lock);
2568
2569                 err = gk20a_channel_wait(ch,
2570                                 (struct nvgpu_wait_args *)buf);
2571
2572                 mutex_lock(&ch->ioctl_lock);
2573
2574                 gk20a_idle(dev);
2575                 break;
2576         case NVGPU_IOCTL_CHANNEL_ZCULL_BIND:
2577                 err = gk20a_busy(dev);
2578                 if (err) {
2579                         dev_err(&dev->dev,
2580                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2581                                 __func__, cmd);
2582                         break;
2583                 }
2584                 err = gk20a_channel_zcull_bind(ch,
2585                                 (struct nvgpu_zcull_bind_args *)buf);
2586                 gk20a_idle(dev);
2587                 break;
2588         case NVGPU_IOCTL_CHANNEL_SET_ERROR_NOTIFIER:
2589                 err = gk20a_busy(dev);
2590                 if (err) {
2591                         dev_err(&dev->dev,
2592                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2593                                 __func__, cmd);
2594                         break;
2595                 }
2596                 err = gk20a_init_error_notifier(ch,
2597                                 (struct nvgpu_set_error_notifier *)buf);
2598                 gk20a_idle(dev);
2599                 break;
2600 #ifdef CONFIG_GK20A_CYCLE_STATS
2601         case NVGPU_IOCTL_CHANNEL_CYCLE_STATS:
2602                 err = gk20a_busy(dev);
2603                 if (err) {
2604                         dev_err(&dev->dev,
2605                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2606                                 __func__, cmd);
2607                         break;
2608                 }
2609                 err = gk20a_channel_cycle_stats(ch,
2610                                 (struct nvgpu_cycle_stats_args *)buf);
2611                 gk20a_idle(dev);
2612                 break;
2613 #endif
2614         case NVGPU_IOCTL_CHANNEL_SET_TIMEOUT:
2615         {
2616                 u32 timeout =
2617                         (u32)((struct nvgpu_set_timeout_args *)buf)->timeout;
2618                 gk20a_dbg(gpu_dbg_gpu_dbg, "setting timeout (%d ms) for chid %d",
2619                            timeout, ch->hw_chid);
2620                 ch->timeout_ms_max = timeout;
2621                 break;
2622         }
2623         case NVGPU_IOCTL_CHANNEL_SET_TIMEOUT_EX:
2624         {
2625                 u32 timeout =
2626                         (u32)((struct nvgpu_set_timeout_args *)buf)->timeout;
2627                 bool timeout_debug_dump = !((u32)
2628                         ((struct nvgpu_set_timeout_ex_args *)buf)->flags &
2629                         (1 << NVGPU_TIMEOUT_FLAG_DISABLE_DUMP));
2630                 gk20a_dbg(gpu_dbg_gpu_dbg, "setting timeout (%d ms) for chid %d",
2631                            timeout, ch->hw_chid);
2632                 ch->timeout_ms_max = timeout;
2633                 ch->timeout_debug_dump = timeout_debug_dump;
2634                 break;
2635         }
2636         case NVGPU_IOCTL_CHANNEL_GET_TIMEDOUT:
2637                 ((struct nvgpu_get_param_args *)buf)->value =
2638                         ch->has_timedout;
2639                 break;
2640         case NVGPU_IOCTL_CHANNEL_SET_PRIORITY:
2641                 err = gk20a_busy(dev);
2642                 if (err) {
2643                         dev_err(&dev->dev,
2644                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2645                                 __func__, cmd);
2646                         break;
2647                 }
2648                 gk20a_channel_set_priority(ch,
2649                         ((struct nvgpu_set_priority_args *)buf)->priority);
2650                 gk20a_idle(dev);
2651                 break;
2652         case NVGPU_IOCTL_CHANNEL_ENABLE:
2653                 err = gk20a_busy(dev);
2654                 if (err) {
2655                         dev_err(&dev->dev,
2656                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2657                                 __func__, cmd);
2658                         break;
2659                 }
2660                 /* enable channel */
2661                 gk20a_writel(ch->g, ccsr_channel_r(ch->hw_chid),
2662                         gk20a_readl(ch->g, ccsr_channel_r(ch->hw_chid)) |
2663                         ccsr_channel_enable_set_true_f());
2664                 gk20a_idle(dev);
2665                 break;
2666         case NVGPU_IOCTL_CHANNEL_DISABLE:
2667                 err = gk20a_busy(dev);
2668                 if (err) {
2669                         dev_err(&dev->dev,
2670                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2671                                 __func__, cmd);
2672                         break;
2673                 }
2674                 /* disable channel */
2675                 gk20a_writel(ch->g, ccsr_channel_r(ch->hw_chid),
2676                         gk20a_readl(ch->g, ccsr_channel_r(ch->hw_chid)) |
2677                         ccsr_channel_enable_clr_true_f());
2678                 gk20a_idle(dev);
2679                 break;
2680         case NVGPU_IOCTL_CHANNEL_PREEMPT:
2681                 err = gk20a_busy(dev);
2682                 if (err) {
2683                         dev_err(&dev->dev,
2684                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2685                                 __func__, cmd);
2686                         break;
2687                 }
2688                 err = gk20a_fifo_preempt(ch->g, ch);
2689                 gk20a_idle(dev);
2690                 break;
2691         case NVGPU_IOCTL_CHANNEL_FORCE_RESET:
2692                 err = gk20a_busy(dev);
2693                 if (err) {
2694                         dev_err(&dev->dev,
2695                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2696                                 __func__, cmd);
2697                         break;
2698                 }
2699                 err = gk20a_fifo_force_reset_ch(ch, true);
2700                 gk20a_idle(dev);
2701                 break;
2702         case NVGPU_IOCTL_CHANNEL_EVENTS_CTRL:
2703                 err = gk20a_channel_events_ctrl(ch,
2704                            (struct nvgpu_channel_events_ctrl_args *)buf);
2705                 break;
2706 #ifdef CONFIG_GK20A_CYCLE_STATS
2707         case NVGPU_IOCTL_CHANNEL_CYCLE_STATS_SNAPSHOT:
2708                 err = gk20a_busy(dev);
2709                 if (err) {
2710                         dev_err(&dev->dev,
2711                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2712                                 __func__, cmd);
2713                         break;
2714                 }
2715                 err = gk20a_channel_cycle_stats_snapshot(ch,
2716                                 (struct nvgpu_cycle_stats_snapshot_args *)buf);
2717                 gk20a_idle(dev);
2718                 break;
2719 #endif
2720         default:
2721                 dev_dbg(&dev->dev, "unrecognized ioctl cmd: 0x%x", cmd);
2722                 err = -ENOTTY;
2723                 break;
2724         }
2725
2726         if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ))
2727                 err = copy_to_user((void __user *)arg, buf, _IOC_SIZE(cmd));
2728
2729         mutex_unlock(&ch->ioctl_lock);
2730
2731         gk20a_channel_put(ch);
2732
2733         gk20a_dbg_fn("end");
2734
2735         return err;
2736 }