169242a798f2ca9aa1c20a820a4646e79947e62c
[linux-3.10.git] / drivers / gpu / nvgpu / gk20a / channel_gk20a.c
1 /*
2  * GK20A Graphics channel
3  *
4  * Copyright (c) 2011-2015, NVIDIA CORPORATION.  All rights reserved.
5  *
6  * This program is free software; you can redistribute it and/or modify it
7  * under the terms and conditions of the GNU General Public License,
8  * version 2, as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope it will be useful, but WITHOUT
11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13  * more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
17  */
18
19 #include <linux/nvhost.h>
20 #include <linux/list.h>
21 #include <linux/delay.h>
22 #include <linux/highmem.h> /* need for nvmap.h*/
23 #include <trace/events/gk20a.h>
24 #include <linux/scatterlist.h>
25 #include <linux/file.h>
26 #include <linux/anon_inodes.h>
27 #include <linux/dma-buf.h>
28 #include <linux/vmalloc.h>
29
30 #include "debug_gk20a.h"
31
32 #include "gk20a.h"
33 #include "dbg_gpu_gk20a.h"
34 #include "fence_gk20a.h"
35 #include "semaphore_gk20a.h"
36
37 #include "hw_ram_gk20a.h"
38 #include "hw_fifo_gk20a.h"
39 #include "hw_pbdma_gk20a.h"
40 #include "hw_ccsr_gk20a.h"
41 #include "hw_ltc_gk20a.h"
42
43 #define NVMAP_HANDLE_PARAM_SIZE 1
44
45 #define NVGPU_BEGIN_AGGRESSIVE_SYNC_DESTROY_LIMIT       64      /* channels */
46
47 static struct channel_gk20a *allocate_channel(struct fifo_gk20a *f);
48 static void free_channel(struct fifo_gk20a *f, struct channel_gk20a *c);
49
50 static void free_priv_cmdbuf(struct channel_gk20a *c,
51                              struct priv_cmd_entry *e);
52
53 static int channel_gk20a_alloc_priv_cmdbuf(struct channel_gk20a *c);
54 static void channel_gk20a_free_priv_cmdbuf(struct channel_gk20a *c);
55
56 static int channel_gk20a_commit_userd(struct channel_gk20a *c);
57 static int channel_gk20a_setup_userd(struct channel_gk20a *c);
58
59 static void channel_gk20a_bind(struct channel_gk20a *ch_gk20a);
60
61 static int channel_gk20a_update_runlist(struct channel_gk20a *c,
62                                         bool add);
63 static void gk20a_free_error_notifiers(struct channel_gk20a *ch);
64
65 /* allocate GPU channel */
66 static struct channel_gk20a *allocate_channel(struct fifo_gk20a *f)
67 {
68         struct channel_gk20a *ch = NULL;
69         struct gk20a_platform *platform = gk20a_get_platform(f->g->dev);
70
71         mutex_lock(&f->free_chs_mutex);
72         if (!list_empty(&f->free_chs)) {
73                 ch = list_first_entry(&f->free_chs, struct channel_gk20a,
74                                 free_chs);
75                 list_del(&ch->free_chs);
76                 WARN_ON(atomic_read(&ch->ref_count));
77                 WARN_ON(ch->referenceable);
78                 f->used_channels++;
79         }
80         mutex_unlock(&f->free_chs_mutex);
81
82         if (f->used_channels > NVGPU_BEGIN_AGGRESSIVE_SYNC_DESTROY_LIMIT)
83                 platform->aggressive_sync_destroy = true;
84
85         return ch;
86 }
87
88 static void free_channel(struct fifo_gk20a *f,
89                 struct channel_gk20a *ch)
90 {
91         struct gk20a_platform *platform = gk20a_get_platform(f->g->dev);
92
93         trace_gk20a_release_used_channel(ch->hw_chid);
94         /* refcount is zero here and channel is in a freed/dead state */
95         mutex_lock(&f->free_chs_mutex);
96         /* add to head to increase visibility of timing-related bugs */
97         list_add(&ch->free_chs, &f->free_chs);
98         f->used_channels--;
99         mutex_unlock(&f->free_chs_mutex);
100
101         if (f->used_channels < NVGPU_BEGIN_AGGRESSIVE_SYNC_DESTROY_LIMIT)
102                 platform->aggressive_sync_destroy = false;
103 }
104
105 int channel_gk20a_commit_va(struct channel_gk20a *c)
106 {
107         gk20a_dbg_fn("");
108
109         if (!c->inst_block.cpu_va)
110                 return -ENOMEM;
111
112         gk20a_init_inst_block(&c->inst_block, c->vm,
113                         c->vm->gmmu_page_sizes[gmmu_page_size_big]);
114
115         return 0;
116 }
117
118 static int channel_gk20a_commit_userd(struct channel_gk20a *c)
119 {
120         u32 addr_lo;
121         u32 addr_hi;
122         void *inst_ptr;
123
124         gk20a_dbg_fn("");
125
126         inst_ptr = c->inst_block.cpu_va;
127         if (!inst_ptr)
128                 return -ENOMEM;
129
130         addr_lo = u64_lo32(c->userd_iova >> ram_userd_base_shift_v());
131         addr_hi = u64_hi32(c->userd_iova);
132
133         gk20a_dbg_info("channel %d : set ramfc userd 0x%16llx",
134                 c->hw_chid, (u64)c->userd_iova);
135
136         gk20a_mem_wr32(inst_ptr, ram_in_ramfc_w() + ram_fc_userd_w(),
137                  pbdma_userd_target_vid_mem_f() |
138                  pbdma_userd_addr_f(addr_lo));
139
140         gk20a_mem_wr32(inst_ptr, ram_in_ramfc_w() + ram_fc_userd_hi_w(),
141                  pbdma_userd_target_vid_mem_f() |
142                  pbdma_userd_hi_addr_f(addr_hi));
143
144         return 0;
145 }
146
147 static int channel_gk20a_set_schedule_params(struct channel_gk20a *c,
148                                 u32 timeslice_timeout)
149 {
150         void *inst_ptr;
151         int shift = 3;
152         int value = timeslice_timeout;
153
154         inst_ptr = c->inst_block.cpu_va;
155         if (!inst_ptr)
156                 return -ENOMEM;
157
158         /* disable channel */
159         c->g->ops.fifo.disable_channel(c);
160
161         /* preempt the channel */
162         WARN_ON(c->g->ops.fifo.preempt_channel(c->g, c->hw_chid));
163
164         /* value field is 8 bits long */
165         while (value >= 1 << 8) {
166                 value >>= 1;
167                 shift++;
168         }
169
170         /* time slice register is only 18bits long */
171         if ((value << shift) >= 1<<19) {
172                 pr_err("Requested timeslice value is clamped to 18 bits\n");
173                 value = 255;
174                 shift = 10;
175         }
176
177         /* set new timeslice */
178         gk20a_mem_wr32(inst_ptr, ram_fc_runlist_timeslice_w(),
179                 value | (shift << 12) |
180                 fifo_runlist_timeslice_enable_true_f());
181
182         /* enable channel */
183         gk20a_writel(c->g, ccsr_channel_r(c->hw_chid),
184                 gk20a_readl(c->g, ccsr_channel_r(c->hw_chid)) |
185                 ccsr_channel_enable_set_true_f());
186
187         return 0;
188 }
189
190 int channel_gk20a_setup_ramfc(struct channel_gk20a *c,
191                         u64 gpfifo_base, u32 gpfifo_entries, u32 flags)
192 {
193         void *inst_ptr;
194
195         gk20a_dbg_fn("");
196
197         inst_ptr = c->inst_block.cpu_va;
198         if (!inst_ptr)
199                 return -ENOMEM;
200
201         memset(inst_ptr, 0, ram_fc_size_val_v());
202
203         gk20a_mem_wr32(inst_ptr, ram_fc_gp_base_w(),
204                 pbdma_gp_base_offset_f(
205                 u64_lo32(gpfifo_base >> pbdma_gp_base_rsvd_s())));
206
207         gk20a_mem_wr32(inst_ptr, ram_fc_gp_base_hi_w(),
208                 pbdma_gp_base_hi_offset_f(u64_hi32(gpfifo_base)) |
209                 pbdma_gp_base_hi_limit2_f(ilog2(gpfifo_entries)));
210
211         gk20a_mem_wr32(inst_ptr, ram_fc_signature_w(),
212                  c->g->ops.fifo.get_pbdma_signature(c->g));
213
214         gk20a_mem_wr32(inst_ptr, ram_fc_formats_w(),
215                 pbdma_formats_gp_fermi0_f() |
216                 pbdma_formats_pb_fermi1_f() |
217                 pbdma_formats_mp_fermi0_f());
218
219         gk20a_mem_wr32(inst_ptr, ram_fc_pb_header_w(),
220                 pbdma_pb_header_priv_user_f() |
221                 pbdma_pb_header_method_zero_f() |
222                 pbdma_pb_header_subchannel_zero_f() |
223                 pbdma_pb_header_level_main_f() |
224                 pbdma_pb_header_first_true_f() |
225                 pbdma_pb_header_type_inc_f());
226
227         gk20a_mem_wr32(inst_ptr, ram_fc_subdevice_w(),
228                 pbdma_subdevice_id_f(1) |
229                 pbdma_subdevice_status_active_f() |
230                 pbdma_subdevice_channel_dma_enable_f());
231
232         gk20a_mem_wr32(inst_ptr, ram_fc_target_w(), pbdma_target_engine_sw_f());
233
234         gk20a_mem_wr32(inst_ptr, ram_fc_acquire_w(),
235                 pbdma_acquire_retry_man_2_f() |
236                 pbdma_acquire_retry_exp_2_f() |
237                 pbdma_acquire_timeout_exp_max_f() |
238                 pbdma_acquire_timeout_man_max_f() |
239                 pbdma_acquire_timeout_en_disable_f());
240
241         gk20a_mem_wr32(inst_ptr, ram_fc_runlist_timeslice_w(),
242                 fifo_runlist_timeslice_timeout_128_f() |
243                 fifo_runlist_timeslice_timescale_3_f() |
244                 fifo_runlist_timeslice_enable_true_f());
245
246         gk20a_mem_wr32(inst_ptr, ram_fc_pb_timeslice_w(),
247                 fifo_pb_timeslice_timeout_16_f() |
248                 fifo_pb_timeslice_timescale_0_f() |
249                 fifo_pb_timeslice_enable_true_f());
250
251         gk20a_mem_wr32(inst_ptr, ram_fc_chid_w(), ram_fc_chid_id_f(c->hw_chid));
252
253         return channel_gk20a_commit_userd(c);
254 }
255
256 static int channel_gk20a_setup_userd(struct channel_gk20a *c)
257 {
258         BUG_ON(!c->userd_cpu_va);
259
260         gk20a_dbg_fn("");
261
262         gk20a_mem_wr32(c->userd_cpu_va, ram_userd_put_w(), 0);
263         gk20a_mem_wr32(c->userd_cpu_va, ram_userd_get_w(), 0);
264         gk20a_mem_wr32(c->userd_cpu_va, ram_userd_ref_w(), 0);
265         gk20a_mem_wr32(c->userd_cpu_va, ram_userd_put_hi_w(), 0);
266         gk20a_mem_wr32(c->userd_cpu_va, ram_userd_ref_threshold_w(), 0);
267         gk20a_mem_wr32(c->userd_cpu_va, ram_userd_gp_top_level_get_w(), 0);
268         gk20a_mem_wr32(c->userd_cpu_va, ram_userd_gp_top_level_get_hi_w(), 0);
269         gk20a_mem_wr32(c->userd_cpu_va, ram_userd_get_hi_w(), 0);
270         gk20a_mem_wr32(c->userd_cpu_va, ram_userd_gp_get_w(), 0);
271         gk20a_mem_wr32(c->userd_cpu_va, ram_userd_gp_put_w(), 0);
272
273         return 0;
274 }
275
276 static void channel_gk20a_bind(struct channel_gk20a *ch_gk20a)
277 {
278         struct gk20a *g = ch_gk20a->g;
279         struct fifo_gk20a *f = &g->fifo;
280         struct fifo_engine_info_gk20a *engine_info =
281                 f->engine_info + ENGINE_GR_GK20A;
282
283         u32 inst_ptr = gk20a_mem_phys(&ch_gk20a->inst_block)
284                 >> ram_in_base_shift_v();
285
286         gk20a_dbg_info("bind channel %d inst ptr 0x%08x",
287                 ch_gk20a->hw_chid, inst_ptr);
288
289         ch_gk20a->bound = true;
290
291         gk20a_writel(g, ccsr_channel_r(ch_gk20a->hw_chid),
292                 (gk20a_readl(g, ccsr_channel_r(ch_gk20a->hw_chid)) &
293                  ~ccsr_channel_runlist_f(~0)) |
294                  ccsr_channel_runlist_f(engine_info->runlist_id));
295
296         gk20a_writel(g, ccsr_channel_inst_r(ch_gk20a->hw_chid),
297                 ccsr_channel_inst_ptr_f(inst_ptr) |
298                 ccsr_channel_inst_target_vid_mem_f() |
299                 ccsr_channel_inst_bind_true_f());
300
301         gk20a_writel(g, ccsr_channel_r(ch_gk20a->hw_chid),
302                 (gk20a_readl(g, ccsr_channel_r(ch_gk20a->hw_chid)) &
303                  ~ccsr_channel_enable_set_f(~0)) |
304                  ccsr_channel_enable_set_true_f());
305 }
306
307 void channel_gk20a_unbind(struct channel_gk20a *ch_gk20a)
308 {
309         struct gk20a *g = ch_gk20a->g;
310         struct gk20a_platform *platform = gk20a_get_platform(g->dev);
311
312         gk20a_dbg_fn("");
313
314         if (ch_gk20a->bound)
315                 gk20a_writel(g, ccsr_channel_inst_r(ch_gk20a->hw_chid),
316                         ccsr_channel_inst_ptr_f(0) |
317                         ccsr_channel_inst_bind_false_f());
318
319         ch_gk20a->bound = false;
320
321         /*
322          * if we are agrressive then we can destroy the syncpt
323          * resource at this point
324          * if not, then it will be destroyed at channel_free()
325          */
326         if (ch_gk20a->sync && platform->aggressive_sync_destroy) {
327                 ch_gk20a->sync->destroy(ch_gk20a->sync);
328                 ch_gk20a->sync = NULL;
329         }
330 }
331
332 int channel_gk20a_alloc_inst(struct gk20a *g, struct channel_gk20a *ch)
333 {
334         int err;
335
336         gk20a_dbg_fn("");
337
338         err = gk20a_alloc_inst_block(g, &ch->inst_block);
339         if (err)
340                 return err;
341
342         gk20a_dbg_info("channel %d inst block physical addr: 0x%16llx",
343                 ch->hw_chid, (u64)gk20a_mem_phys(&ch->inst_block));
344
345         gk20a_dbg_fn("done");
346         return 0;
347 }
348
349 void channel_gk20a_free_inst(struct gk20a *g, struct channel_gk20a *ch)
350 {
351         gk20a_free_inst_block(g, &ch->inst_block);
352 }
353
354 static int channel_gk20a_update_runlist(struct channel_gk20a *c, bool add)
355 {
356         return c->g->ops.fifo.update_runlist(c->g, 0, c->hw_chid, add, true);
357 }
358
359 void channel_gk20a_enable(struct channel_gk20a *ch)
360 {
361         /* enable channel */
362         gk20a_writel(ch->g, ccsr_channel_r(ch->hw_chid),
363                 gk20a_readl(ch->g, ccsr_channel_r(ch->hw_chid)) |
364                 ccsr_channel_enable_set_true_f());
365 }
366
367 void channel_gk20a_disable(struct channel_gk20a *ch)
368 {
369         /* disable channel */
370         gk20a_writel(ch->g, ccsr_channel_r(ch->hw_chid),
371                 gk20a_readl(ch->g,
372                         ccsr_channel_r(ch->hw_chid)) |
373                         ccsr_channel_enable_clr_true_f());
374 }
375
376 void gk20a_channel_abort(struct channel_gk20a *ch)
377 {
378         struct channel_gk20a_job *job, *n;
379         bool released_job_semaphore = false;
380
381         gk20a_dbg_fn("");
382
383         /* make sure new kickoffs are prevented */
384         ch->has_timedout = true;
385
386         /* ensure no fences are pending */
387         mutex_lock(&ch->submit_lock);
388         if (ch->sync)
389                 ch->sync->set_min_eq_max(ch->sync);
390         mutex_unlock(&ch->submit_lock);
391
392         /* release all job semaphores (applies only to jobs that use
393            semaphore synchronization) */
394         mutex_lock(&ch->jobs_lock);
395         list_for_each_entry_safe(job, n, &ch->jobs, list) {
396                 if (job->post_fence->semaphore) {
397                         gk20a_semaphore_release(job->post_fence->semaphore);
398                         released_job_semaphore = true;
399                 }
400         }
401         mutex_unlock(&ch->jobs_lock);
402
403         ch->g->ops.fifo.disable_channel(ch);
404
405         if (released_job_semaphore) {
406                 wake_up_interruptible_all(&ch->semaphore_wq);
407                 gk20a_channel_update(ch, 0);
408         }
409 }
410
411 int gk20a_wait_channel_idle(struct channel_gk20a *ch)
412 {
413         bool channel_idle = false;
414         unsigned long end_jiffies = jiffies +
415                 msecs_to_jiffies(gk20a_get_gr_idle_timeout(ch->g));
416
417         do {
418                 mutex_lock(&ch->jobs_lock);
419                 channel_idle = list_empty(&ch->jobs);
420                 mutex_unlock(&ch->jobs_lock);
421                 if (channel_idle)
422                         break;
423
424                 usleep_range(1000, 3000);
425         } while (time_before(jiffies, end_jiffies)
426                         || !tegra_platform_is_silicon());
427
428         if (!channel_idle) {
429                 gk20a_err(dev_from_gk20a(ch->g), "jobs not freed for channel %d\n",
430                                 ch->hw_chid);
431                 return -EBUSY;
432         }
433
434         return 0;
435 }
436
437 void gk20a_disable_channel(struct channel_gk20a *ch,
438                            bool finish,
439                            unsigned long finish_timeout)
440 {
441         gk20a_dbg_fn("");
442
443         if (finish) {
444                 int err = gk20a_channel_finish(ch, finish_timeout);
445                 WARN_ON(err);
446         }
447
448         /* disable the channel from hw and increment syncpoints */
449         gk20a_channel_abort(ch);
450
451         gk20a_wait_channel_idle(ch);
452
453         /* preempt the channel */
454         ch->g->ops.fifo.preempt_channel(ch->g, ch->hw_chid);
455
456         /* remove channel from runlist */
457         channel_gk20a_update_runlist(ch, false);
458 }
459
460 #if defined(CONFIG_GK20A_CYCLE_STATS)
461
462 static void gk20a_free_cycle_stats_buffer(struct channel_gk20a *ch)
463 {
464         /* disable existing cyclestats buffer */
465         mutex_lock(&ch->cyclestate.cyclestate_buffer_mutex);
466         if (ch->cyclestate.cyclestate_buffer_handler) {
467                 dma_buf_vunmap(ch->cyclestate.cyclestate_buffer_handler,
468                                 ch->cyclestate.cyclestate_buffer);
469                 dma_buf_put(ch->cyclestate.cyclestate_buffer_handler);
470                 ch->cyclestate.cyclestate_buffer_handler = NULL;
471                 ch->cyclestate.cyclestate_buffer = NULL;
472                 ch->cyclestate.cyclestate_buffer_size = 0;
473         }
474         mutex_unlock(&ch->cyclestate.cyclestate_buffer_mutex);
475 }
476
477 static int gk20a_channel_cycle_stats(struct channel_gk20a *ch,
478                        struct nvgpu_cycle_stats_args *args)
479 {
480         struct dma_buf *dmabuf;
481         void *virtual_address;
482
483         /* is it allowed to handle calls for current GPU? */
484         if (0 == (ch->g->gpu_characteristics.flags &
485                         NVGPU_GPU_FLAGS_SUPPORT_CYCLE_STATS))
486                 return -ENOSYS;
487
488         if (args->dmabuf_fd && !ch->cyclestate.cyclestate_buffer_handler) {
489
490                 /* set up new cyclestats buffer */
491                 dmabuf = dma_buf_get(args->dmabuf_fd);
492                 if (IS_ERR(dmabuf))
493                         return PTR_ERR(dmabuf);
494                 virtual_address = dma_buf_vmap(dmabuf);
495                 if (!virtual_address)
496                         return -ENOMEM;
497
498                 ch->cyclestate.cyclestate_buffer_handler = dmabuf;
499                 ch->cyclestate.cyclestate_buffer = virtual_address;
500                 ch->cyclestate.cyclestate_buffer_size = dmabuf->size;
501                 return 0;
502
503         } else if (!args->dmabuf_fd &&
504                         ch->cyclestate.cyclestate_buffer_handler) {
505                 gk20a_free_cycle_stats_buffer(ch);
506                 return 0;
507
508         } else if (!args->dmabuf_fd &&
509                         !ch->cyclestate.cyclestate_buffer_handler) {
510                 /* no requst from GL */
511                 return 0;
512
513         } else {
514                 pr_err("channel already has cyclestats buffer\n");
515                 return -EINVAL;
516         }
517 }
518
519
520 static int gk20a_flush_cycle_stats_snapshot(struct channel_gk20a *ch)
521 {
522         int ret;
523
524         mutex_lock(&ch->cs_client_mutex);
525         if (ch->cs_client)
526                 ret = gr_gk20a_css_flush(ch->g, ch->cs_client);
527         else
528                 ret = -EBADF;
529         mutex_unlock(&ch->cs_client_mutex);
530
531         return ret;
532 }
533
534 static int gk20a_attach_cycle_stats_snapshot(struct channel_gk20a *ch,
535                                 u32 dmabuf_fd,
536                                 u32 perfmon_id_count,
537                                 u32 *perfmon_id_start)
538 {
539         int ret;
540
541         mutex_lock(&ch->cs_client_mutex);
542         if (ch->cs_client) {
543                 ret = -EEXIST;
544         } else {
545                 ret = gr_gk20a_css_attach(ch->g,
546                                         dmabuf_fd,
547                                         perfmon_id_count,
548                                         perfmon_id_start,
549                                         &ch->cs_client);
550         }
551         mutex_unlock(&ch->cs_client_mutex);
552
553         return ret;
554 }
555
556 static int gk20a_free_cycle_stats_snapshot(struct channel_gk20a *ch)
557 {
558         int ret;
559
560         mutex_lock(&ch->cs_client_mutex);
561         if (ch->cs_client) {
562                 ret = gr_gk20a_css_detach(ch->g, ch->cs_client);
563                 ch->cs_client = NULL;
564         } else {
565                 ret = 0;
566         }
567         mutex_unlock(&ch->cs_client_mutex);
568
569         return ret;
570 }
571
572 static int gk20a_channel_cycle_stats_snapshot(struct channel_gk20a *ch,
573                         struct nvgpu_cycle_stats_snapshot_args *args)
574 {
575         int ret;
576
577         /* is it allowed to handle calls for current GPU? */
578         if (0 == (ch->g->gpu_characteristics.flags &
579                         NVGPU_GPU_FLAGS_SUPPORT_CYCLE_STATS_SNAPSHOT))
580                 return -ENOSYS;
581
582         if (!args->dmabuf_fd)
583                 return -EINVAL;
584
585         /* handle the command (most frequent cases first) */
586         switch (args->cmd) {
587         case NVGPU_IOCTL_CHANNEL_CYCLE_STATS_SNAPSHOT_CMD_FLUSH:
588                 ret = gk20a_flush_cycle_stats_snapshot(ch);
589                 args->extra = 0;
590                 break;
591
592         case NVGPU_IOCTL_CHANNEL_CYCLE_STATS_SNAPSHOT_CMD_ATTACH:
593                 ret = gk20a_attach_cycle_stats_snapshot(ch,
594                                                 args->dmabuf_fd,
595                                                 args->extra,
596                                                 &args->extra);
597                 break;
598
599         case NVGPU_IOCTL_CHANNEL_CYCLE_STATS_SNAPSHOT_CMD_DETACH:
600                 ret = gk20a_free_cycle_stats_snapshot(ch);
601                 args->extra = 0;
602                 break;
603
604         default:
605                 pr_err("cyclestats: unknown command %u\n", args->cmd);
606                 ret = -EINVAL;
607                 break;
608         }
609
610         return ret;
611 }
612 #endif
613
614 static int gk20a_init_error_notifier(struct channel_gk20a *ch,
615                 struct nvgpu_set_error_notifier *args) {
616         void *va;
617
618         struct dma_buf *dmabuf;
619
620         if (!args->mem) {
621                 pr_err("gk20a_init_error_notifier: invalid memory handle\n");
622                 return -EINVAL;
623         }
624
625         dmabuf = dma_buf_get(args->mem);
626
627         if (ch->error_notifier_ref)
628                 gk20a_free_error_notifiers(ch);
629
630         if (IS_ERR(dmabuf)) {
631                 pr_err("Invalid handle: %d\n", args->mem);
632                 return -EINVAL;
633         }
634         /* map handle */
635         va = dma_buf_vmap(dmabuf);
636         if (!va) {
637                 dma_buf_put(dmabuf);
638                 pr_err("Cannot map notifier handle\n");
639                 return -ENOMEM;
640         }
641
642         /* set channel notifiers pointer */
643         ch->error_notifier_ref = dmabuf;
644         ch->error_notifier = va + args->offset;
645         ch->error_notifier_va = va;
646         memset(ch->error_notifier, 0, sizeof(struct nvgpu_notification));
647         return 0;
648 }
649
650 void gk20a_set_error_notifier(struct channel_gk20a *ch, __u32 error)
651 {
652         if (ch->error_notifier_ref) {
653                 struct timespec time_data;
654                 u64 nsec;
655                 getnstimeofday(&time_data);
656                 nsec = ((u64)time_data.tv_sec) * 1000000000u +
657                                 (u64)time_data.tv_nsec;
658                 ch->error_notifier->time_stamp.nanoseconds[0] =
659                                 (u32)nsec;
660                 ch->error_notifier->time_stamp.nanoseconds[1] =
661                                 (u32)(nsec >> 32);
662                 ch->error_notifier->info32 = error;
663                 ch->error_notifier->status = 0xffff;
664
665                 gk20a_err(dev_from_gk20a(ch->g),
666                     "error notifier set to %d for ch %d", error, ch->hw_chid);
667         }
668 }
669
670 static void gk20a_free_error_notifiers(struct channel_gk20a *ch)
671 {
672         if (ch->error_notifier_ref) {
673                 dma_buf_vunmap(ch->error_notifier_ref, ch->error_notifier_va);
674                 dma_buf_put(ch->error_notifier_ref);
675                 ch->error_notifier_ref = NULL;
676                 ch->error_notifier = NULL;
677                 ch->error_notifier_va = NULL;
678         }
679 }
680
681 /* Returns delta of cyclic integers a and b. If a is ahead of b, delta
682  * is positive */
683 static int cyclic_delta(int a, int b)
684 {
685         return a - b;
686 }
687
688 static void gk20a_wait_for_deferred_interrupts(struct gk20a *g)
689 {
690         int stall_irq_threshold = atomic_read(&g->hw_irq_stall_count);
691         int nonstall_irq_threshold = atomic_read(&g->hw_irq_nonstall_count);
692
693         /* wait until all stalling irqs are handled */
694         wait_event(g->sw_irq_stall_last_handled_wq,
695                    cyclic_delta(stall_irq_threshold,
696                                 atomic_read(&g->sw_irq_stall_last_handled))
697                    <= 0);
698
699         /* wait until all non-stalling irqs are handled */
700         wait_event(g->sw_irq_nonstall_last_handled_wq,
701                    cyclic_delta(nonstall_irq_threshold,
702                                 atomic_read(&g->sw_irq_nonstall_last_handled))
703                    <= 0);
704 }
705
706 static void gk20a_wait_until_counter_is_N(
707         struct channel_gk20a *ch, atomic_t *counter, int wait_value,
708         wait_queue_head_t *wq, const char *caller, const char *counter_name)
709 {
710         while (true) {
711                 if (wait_event_timeout(
712                             *wq,
713                             atomic_read(counter) == wait_value,
714                             msecs_to_jiffies(5000)) > 0)
715                         break;
716
717                 gk20a_warn(dev_from_gk20a(ch->g),
718                            "%s: channel %d, still waiting, %s left: %d, waiting for: %d",
719                            caller, ch->hw_chid, counter_name,
720                            atomic_read(counter), wait_value);
721         }
722 }
723
724
725
726 /* call ONLY when no references to the channel exist: after the last put */
727 static void gk20a_free_channel(struct channel_gk20a *ch)
728 {
729         struct gk20a *g = ch->g;
730         struct fifo_gk20a *f = &g->fifo;
731         struct gr_gk20a *gr = &g->gr;
732         struct vm_gk20a *ch_vm = ch->vm;
733         unsigned long timeout = gk20a_get_gr_idle_timeout(g);
734         struct dbg_session_gk20a *dbg_s;
735         bool was_reset;
736         gk20a_dbg_fn("");
737
738         WARN_ON(ch->g == NULL);
739
740         trace_gk20a_free_channel(ch->hw_chid);
741
742         /* prevent new kickoffs */
743         ch->has_timedout = true;
744         wmb();
745
746         /* wait until there's only our ref to the channel */
747         gk20a_wait_until_counter_is_N(
748                 ch, &ch->ref_count, 1, &ch->ref_count_dec_wq,
749                 __func__, "references");
750
751         /* wait until all pending interrupts for recently completed
752          * jobs are handled */
753         gk20a_wait_for_deferred_interrupts(g);
754
755         /* prevent new refs */
756         spin_lock(&ch->ref_obtain_lock);
757         if (!ch->referenceable) {
758                 spin_unlock(&ch->ref_obtain_lock);
759                 gk20a_err(dev_from_gk20a(ch->g),
760                           "Extra %s() called to channel %u",
761                           __func__, ch->hw_chid);
762                 return;
763         }
764         ch->referenceable = false;
765         spin_unlock(&ch->ref_obtain_lock);
766
767         /* matches with the initial reference in gk20a_open_new_channel() */
768         atomic_dec(&ch->ref_count);
769
770         /* wait until no more refs to the channel */
771         gk20a_wait_until_counter_is_N(
772                 ch, &ch->ref_count, 0, &ch->ref_count_dec_wq,
773                 __func__, "references");
774
775         /* if engine reset was deferred, perform it now */
776         mutex_lock(&f->deferred_reset_mutex);
777         if (g->fifo.deferred_reset_pending) {
778                 gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, "engine reset was"
779                            " deferred, running now");
780                 was_reset = mutex_is_locked(&g->fifo.gr_reset_mutex);
781                 mutex_lock(&g->fifo.gr_reset_mutex);
782                 /* if lock is already taken, a reset is taking place
783                 so no need to repeat */
784                 if (!was_reset) {
785                         gk20a_fifo_reset_engine(g,
786                                 g->fifo.deferred_fault_engines);
787                 }
788                 mutex_unlock(&g->fifo.gr_reset_mutex);
789                 g->fifo.deferred_fault_engines = 0;
790                 g->fifo.deferred_reset_pending = false;
791         }
792         mutex_unlock(&f->deferred_reset_mutex);
793
794         if (!ch->bound)
795                 goto release;
796
797         if (!gk20a_channel_as_bound(ch))
798                 goto unbind;
799
800         gk20a_dbg_info("freeing bound channel context, timeout=%ld",
801                         timeout);
802
803         gk20a_disable_channel(ch, !ch->has_timedout, timeout);
804
805         gk20a_free_error_notifiers(ch);
806
807         /* release channel ctx */
808         g->ops.gr.free_channel_ctx(ch);
809
810         gk20a_gr_flush_channel_tlb(gr);
811
812         memset(&ch->ramfc, 0, sizeof(struct mem_desc_sub));
813
814         gk20a_gmmu_unmap_free(ch_vm, &ch->gpfifo.mem);
815
816         memset(&ch->gpfifo, 0, sizeof(struct gpfifo_desc));
817
818 #if defined(CONFIG_GK20A_CYCLE_STATS)
819         gk20a_free_cycle_stats_buffer(ch);
820         gk20a_free_cycle_stats_snapshot(ch);
821 #endif
822
823         channel_gk20a_free_priv_cmdbuf(ch);
824
825         /* sync must be destroyed before releasing channel vm */
826         if (ch->sync) {
827                 ch->sync->destroy(ch->sync);
828                 ch->sync = NULL;
829         }
830
831         /* release channel binding to the as_share */
832         if (ch_vm->as_share)
833                 gk20a_as_release_share(ch_vm->as_share);
834         else
835                 gk20a_vm_put(ch_vm);
836
837         spin_lock(&ch->update_fn_lock);
838         ch->update_fn = NULL;
839         ch->update_fn_data = NULL;
840         spin_unlock(&ch->update_fn_lock);
841         cancel_work_sync(&ch->update_fn_work);
842
843         /* make sure we don't have deferred interrupts pending that
844          * could still touch the channel */
845         gk20a_wait_for_deferred_interrupts(g);
846
847 unbind:
848         if (gk20a_is_channel_marked_as_tsg(ch))
849                 gk20a_tsg_unbind_channel(ch);
850
851         g->ops.fifo.unbind_channel(ch);
852         g->ops.fifo.free_inst(g, ch);
853
854         ch->vpr = false;
855         ch->vm = NULL;
856
857         mutex_lock(&ch->submit_lock);
858         gk20a_fence_put(ch->last_submit.pre_fence);
859         gk20a_fence_put(ch->last_submit.post_fence);
860         ch->last_submit.pre_fence = NULL;
861         ch->last_submit.post_fence = NULL;
862         mutex_unlock(&ch->submit_lock);
863         WARN_ON(ch->sync);
864
865         /* unlink all debug sessions */
866         mutex_lock(&ch->dbg_s_lock);
867
868         list_for_each_entry(dbg_s, &ch->dbg_s_list, dbg_s_list_node) {
869                 dbg_s->ch = NULL;
870                 list_del_init(&dbg_s->dbg_s_list_node);
871         }
872
873         mutex_unlock(&ch->dbg_s_lock);
874
875 release:
876         /* make sure we catch accesses of unopened channels in case
877          * there's non-refcounted channel pointers hanging around */
878         ch->g = NULL;
879         wmb();
880
881         /* ALWAYS last */
882         free_channel(f, ch);
883 }
884
885 /* Try to get a reference to the channel. Return nonzero on success. If fails,
886  * the channel is dead or being freed elsewhere and you must not touch it.
887  *
888  * Always when a channel_gk20a pointer is seen and about to be used, a
889  * reference must be held to it - either by you or the caller, which should be
890  * documented well or otherwise clearly seen. This usually boils down to the
891  * file from ioctls directly, or an explicit get in exception handlers when the
892  * channel is found by a hw_chid.
893  *
894  * Most global functions in this file require a reference to be held by the
895  * caller.
896  */
897 struct channel_gk20a *_gk20a_channel_get(struct channel_gk20a *ch,
898                                          const char *caller) {
899         struct channel_gk20a *ret;
900
901         spin_lock(&ch->ref_obtain_lock);
902
903         if (likely(ch->referenceable)) {
904                 atomic_inc(&ch->ref_count);
905                 ret = ch;
906         } else
907                 ret = NULL;
908
909         spin_unlock(&ch->ref_obtain_lock);
910
911         if (ret)
912                 trace_gk20a_channel_get(ch->hw_chid, caller);
913
914         return ret;
915 }
916
917 void _gk20a_channel_put(struct channel_gk20a *ch, const char *caller)
918 {
919         trace_gk20a_channel_put(ch->hw_chid, caller);
920         atomic_dec(&ch->ref_count);
921         wake_up_all(&ch->ref_count_dec_wq);
922
923         /* More puts than gets. Channel is probably going to get
924          * stuck. */
925         WARN_ON(atomic_read(&ch->ref_count) < 0);
926
927         /* Also, more puts than gets. ref_count can go to 0 only if
928          * the channel is closing. Channel is probably going to get
929          * stuck. */
930         WARN_ON(atomic_read(&ch->ref_count) == 0 && ch->referenceable);
931 }
932
933 void gk20a_channel_close(struct channel_gk20a *ch)
934 {
935         gk20a_free_channel(ch);
936 }
937
938 int gk20a_channel_release(struct inode *inode, struct file *filp)
939 {
940         struct channel_gk20a *ch = (struct channel_gk20a *)filp->private_data;
941         struct gk20a *g = ch ? ch->g : NULL;
942         int err;
943
944         if (!ch)
945                 return 0;
946
947         trace_gk20a_channel_release(dev_name(&g->dev->dev));
948
949         err = gk20a_busy(g->dev);
950         if (err) {
951                 gk20a_err(dev_from_gk20a(g), "failed to release channel %d",
952                         ch->hw_chid);
953                 return err;
954         }
955         gk20a_channel_close(ch);
956         gk20a_idle(g->dev);
957
958         filp->private_data = NULL;
959         return 0;
960 }
961
962 static void gk20a_channel_update_runcb_fn(struct work_struct *work)
963 {
964         struct channel_gk20a *ch =
965                 container_of(work, struct channel_gk20a, update_fn_work);
966         void (*update_fn)(struct channel_gk20a *, void *);
967         void *update_fn_data;
968
969         spin_lock(&ch->update_fn_lock);
970         update_fn = ch->update_fn;
971         update_fn_data = ch->update_fn_data;
972         spin_unlock(&ch->update_fn_lock);
973
974         if (update_fn)
975                 update_fn(ch, update_fn_data);
976 }
977
978 struct channel_gk20a *gk20a_open_new_channel_with_cb(struct gk20a *g,
979                 void (*update_fn)(struct channel_gk20a *, void *),
980                 void *update_fn_data)
981 {
982         struct channel_gk20a *ch = gk20a_open_new_channel(g);
983
984         if (ch) {
985                 spin_lock(&ch->update_fn_lock);
986                 ch->update_fn = update_fn;
987                 ch->update_fn_data = update_fn_data;
988                 spin_unlock(&ch->update_fn_lock);
989         }
990
991         return ch;
992 }
993
994 struct channel_gk20a *gk20a_open_new_channel(struct gk20a *g)
995 {
996         struct fifo_gk20a *f = &g->fifo;
997         struct channel_gk20a *ch;
998
999         gk20a_dbg_fn("");
1000
1001         ch = allocate_channel(f);
1002         if (ch == NULL) {
1003                 /* TBD: we want to make this virtualizable */
1004                 gk20a_err(dev_from_gk20a(g), "out of hw chids");
1005                 return NULL;
1006         }
1007
1008         trace_gk20a_open_new_channel(ch->hw_chid);
1009
1010         BUG_ON(ch->g);
1011         ch->g = g;
1012
1013         if (g->ops.fifo.alloc_inst(g, ch)) {
1014                 ch->g = NULL;
1015                 free_channel(f, ch);
1016                 gk20a_err(dev_from_gk20a(g),
1017                            "failed to open gk20a channel, out of inst mem");
1018                 return NULL;
1019         }
1020
1021         /* now the channel is in a limbo out of the free list but not marked as
1022          * alive and used (i.e. get-able) yet */
1023
1024         ch->pid = current->pid;
1025
1026         /* By default, channel is regular (non-TSG) channel */
1027         ch->tsgid = NVGPU_INVALID_TSG_ID;
1028
1029         /* reset timeout counter and update timestamp */
1030         ch->timeout_accumulated_ms = 0;
1031         ch->timeout_gpfifo_get = 0;
1032         /* set gr host default timeout */
1033         ch->timeout_ms_max = gk20a_get_gr_idle_timeout(g);
1034         ch->timeout_debug_dump = true;
1035         ch->has_timedout = false;
1036         ch->obj_class = 0;
1037
1038         /* The channel is *not* runnable at this point. It still needs to have
1039          * an address space bound and allocate a gpfifo and grctx. */
1040
1041         init_waitqueue_head(&ch->notifier_wq);
1042         init_waitqueue_head(&ch->semaphore_wq);
1043         init_waitqueue_head(&ch->submit_wq);
1044
1045         mutex_init(&ch->poll_events.lock);
1046         ch->poll_events.events_enabled = false;
1047         ch->poll_events.num_pending_events = 0;
1048
1049         ch->update_fn = NULL;
1050         ch->update_fn_data = NULL;
1051         spin_lock_init(&ch->update_fn_lock);
1052         INIT_WORK(&ch->update_fn_work, gk20a_channel_update_runcb_fn);
1053
1054         /* Mark the channel alive, get-able, with 1 initial use
1055          * references. The initial reference will be decreased in
1056          * gk20a_free_channel() */
1057         ch->referenceable = true;
1058         atomic_set(&ch->ref_count, 1);
1059         wmb();
1060
1061         return ch;
1062 }
1063
1064 static int __gk20a_channel_open(struct gk20a *g, struct file *filp)
1065 {
1066         int err;
1067         struct channel_gk20a *ch;
1068
1069         trace_gk20a_channel_open(dev_name(&g->dev->dev));
1070
1071         err = gk20a_busy(g->dev);
1072         if (err) {
1073                 gk20a_err(dev_from_gk20a(g), "failed to power on, %d", err);
1074                 return err;
1075         }
1076         ch = gk20a_open_new_channel(g);
1077         gk20a_idle(g->dev);
1078         if (!ch) {
1079                 gk20a_err(dev_from_gk20a(g),
1080                         "failed to get f");
1081                 return -ENOMEM;
1082         }
1083
1084         filp->private_data = ch;
1085         return 0;
1086 }
1087
1088 int gk20a_channel_open(struct inode *inode, struct file *filp)
1089 {
1090         struct gk20a *g = container_of(inode->i_cdev,
1091                         struct gk20a, channel.cdev);
1092         int ret;
1093
1094         gk20a_dbg_fn("start");
1095         ret = __gk20a_channel_open(g, filp);
1096
1097         gk20a_dbg_fn("end");
1098         return ret;
1099 }
1100
1101 int gk20a_channel_open_ioctl(struct gk20a *g,
1102                 struct nvgpu_channel_open_args *args)
1103 {
1104         int err;
1105         int fd;
1106         struct file *file;
1107         char *name;
1108
1109         err = get_unused_fd_flags(O_RDWR);
1110         if (err < 0)
1111                 return err;
1112         fd = err;
1113
1114         name = kasprintf(GFP_KERNEL, "nvhost-%s-fd%d",
1115                         dev_name(&g->dev->dev), fd);
1116         if (!name) {
1117                 err = -ENOMEM;
1118                 goto clean_up;
1119         }
1120
1121         file = anon_inode_getfile(name, g->channel.cdev.ops, NULL, O_RDWR);
1122         kfree(name);
1123         if (IS_ERR(file)) {
1124                 err = PTR_ERR(file);
1125                 goto clean_up;
1126         }
1127
1128         err = __gk20a_channel_open(g, file);
1129         if (err)
1130                 goto clean_up_file;
1131
1132         fd_install(fd, file);
1133         args->channel_fd = fd;
1134         return 0;
1135
1136 clean_up_file:
1137         fput(file);
1138 clean_up:
1139         put_unused_fd(fd);
1140         return err;
1141 }
1142
1143 /* allocate private cmd buffer.
1144    used for inserting commands before/after user submitted buffers. */
1145 static int channel_gk20a_alloc_priv_cmdbuf(struct channel_gk20a *c)
1146 {
1147         struct device *d = dev_from_gk20a(c->g);
1148         struct vm_gk20a *ch_vm = c->vm;
1149         struct priv_cmd_queue *q = &c->priv_cmd_q;
1150         u32 size;
1151         int err = 0;
1152
1153         /* Kernel can insert gpfifos before and after user gpfifos.
1154            Before user gpfifos, kernel inserts fence_wait, which takes
1155            syncpoint_a (2 dwords) + syncpoint_b (2 dwords) = 4 dwords.
1156            After user gpfifos, kernel inserts fence_get, which takes
1157            wfi (2 dwords) + syncpoint_a (2 dwords) + syncpoint_b (2 dwords)
1158            = 6 dwords.
1159            Worse case if kernel adds both of them for every user gpfifo,
1160            max size of priv_cmdbuf is :
1161            (gpfifo entry number * (2 / 3) * (4 + 6) * 4 bytes */
1162         size = roundup_pow_of_two(
1163                 c->gpfifo.entry_num * 2 * 12 * sizeof(u32) / 3);
1164
1165         err = gk20a_gmmu_alloc_map(ch_vm, size, &q->mem);
1166         if (err) {
1167                 gk20a_err(d, "%s: memory allocation failed\n", __func__);
1168                 goto clean_up;
1169         }
1170
1171         q->size = q->mem.size / sizeof (u32);
1172
1173         return 0;
1174
1175 clean_up:
1176         channel_gk20a_free_priv_cmdbuf(c);
1177         return err;
1178 }
1179
1180 static void channel_gk20a_free_priv_cmdbuf(struct channel_gk20a *c)
1181 {
1182         struct vm_gk20a *ch_vm = c->vm;
1183         struct priv_cmd_queue *q = &c->priv_cmd_q;
1184
1185         if (q->size == 0)
1186                 return;
1187
1188         gk20a_gmmu_unmap_free(ch_vm, &q->mem);
1189
1190         memset(q, 0, sizeof(struct priv_cmd_queue));
1191 }
1192
1193 /* allocate a cmd buffer with given size. size is number of u32 entries */
1194 int gk20a_channel_alloc_priv_cmdbuf(struct channel_gk20a *c, u32 orig_size,
1195                              struct priv_cmd_entry **entry)
1196 {
1197         struct priv_cmd_queue *q = &c->priv_cmd_q;
1198         struct priv_cmd_entry *e;
1199         u32 free_count;
1200         u32 size = orig_size;
1201
1202         gk20a_dbg_fn("size %d", orig_size);
1203
1204         *entry = NULL;
1205
1206         /* if free space in the end is less than requested, increase the size
1207          * to make the real allocated space start from beginning. */
1208         if (q->put + size > q->size)
1209                 size = orig_size + (q->size - q->put);
1210
1211         gk20a_dbg_info("ch %d: priv cmd queue get:put %d:%d",
1212                         c->hw_chid, q->get, q->put);
1213
1214         free_count = (q->size - (q->put - q->get) - 1) % q->size;
1215
1216         if (size > free_count)
1217                 return -EAGAIN;
1218
1219         e = kzalloc(sizeof(struct priv_cmd_entry), GFP_KERNEL);
1220         if (!e) {
1221                 gk20a_err(dev_from_gk20a(c->g),
1222                         "ch %d: fail to allocate priv cmd entry",
1223                         c->hw_chid);
1224                 return -ENOMEM;
1225         }
1226
1227         e->size = orig_size;
1228         e->gp_get = c->gpfifo.get;
1229         e->gp_put = c->gpfifo.put;
1230         e->gp_wrap = c->gpfifo.wrap;
1231
1232         /* if we have increased size to skip free space in the end, set put
1233            to beginning of cmd buffer (0) + size */
1234         if (size != orig_size) {
1235                 e->ptr = (u32 *)q->mem.cpu_va;
1236                 e->gva = q->mem.gpu_va;
1237                 q->put = orig_size;
1238         } else {
1239                 e->ptr = (u32 *)q->mem.cpu_va + q->put;
1240                 e->gva = q->mem.gpu_va + q->put * sizeof(u32);
1241                 q->put = (q->put + orig_size) & (q->size - 1);
1242         }
1243
1244         /* we already handled q->put + size > q->size so BUG_ON this */
1245         BUG_ON(q->put > q->size);
1246
1247         *entry = e;
1248
1249         gk20a_dbg_fn("done");
1250
1251         return 0;
1252 }
1253
1254 /* Don't call this to free an explict cmd entry.
1255  * It doesn't update priv_cmd_queue get/put */
1256 static void free_priv_cmdbuf(struct channel_gk20a *c,
1257                              struct priv_cmd_entry *e)
1258 {
1259         kfree(e);
1260 }
1261
1262 int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c,
1263                 struct nvgpu_alloc_gpfifo_args *args)
1264 {
1265         struct gk20a *g = c->g;
1266         struct device *d = dev_from_gk20a(g);
1267         struct vm_gk20a *ch_vm;
1268         u32 gpfifo_size;
1269         int err = 0;
1270
1271         /* Kernel can insert one extra gpfifo entry before user submitted gpfifos
1272            and another one after, for internal usage. Triple the requested size. */
1273         gpfifo_size = roundup_pow_of_two(args->num_entries * 3);
1274
1275         if (args->flags & NVGPU_ALLOC_GPFIFO_FLAGS_VPR_ENABLED)
1276                 c->vpr = true;
1277
1278         /* an address space needs to have been bound at this point. */
1279         if (!gk20a_channel_as_bound(c)) {
1280                 gk20a_err(d,
1281                             "not bound to an address space at time of gpfifo"
1282                             " allocation.");
1283                 return -EINVAL;
1284         }
1285         ch_vm = c->vm;
1286
1287         c->cmds_pending = false;
1288         mutex_lock(&c->submit_lock);
1289         gk20a_fence_put(c->last_submit.pre_fence);
1290         gk20a_fence_put(c->last_submit.post_fence);
1291         c->last_submit.pre_fence = NULL;
1292         c->last_submit.post_fence = NULL;
1293         mutex_unlock(&c->submit_lock);
1294
1295         c->ramfc.offset = 0;
1296         c->ramfc.size = ram_in_ramfc_s() / 8;
1297
1298         if (c->gpfifo.mem.cpu_va) {
1299                 gk20a_err(d, "channel %d :"
1300                            "gpfifo already allocated", c->hw_chid);
1301                 return -EEXIST;
1302         }
1303
1304         err = gk20a_gmmu_alloc_map(ch_vm, gpfifo_size * sizeof(struct gpfifo),
1305                         &c->gpfifo.mem);
1306         if (err) {
1307                 gk20a_err(d, "%s: memory allocation failed\n", __func__);
1308                 goto clean_up;
1309         }
1310
1311         c->gpfifo.entry_num = gpfifo_size;
1312         c->gpfifo.get = c->gpfifo.put = 0;
1313
1314         gk20a_dbg_info("channel %d : gpfifo_base 0x%016llx, size %d",
1315                 c->hw_chid, c->gpfifo.mem.gpu_va, c->gpfifo.entry_num);
1316
1317         channel_gk20a_setup_userd(c);
1318
1319         err = g->ops.fifo.setup_ramfc(c, c->gpfifo.mem.gpu_va,
1320                                         c->gpfifo.entry_num, args->flags);
1321         if (err)
1322                 goto clean_up_unmap;
1323
1324         /* TBD: setup engine contexts */
1325
1326         err = channel_gk20a_alloc_priv_cmdbuf(c);
1327         if (err)
1328                 goto clean_up_unmap;
1329
1330         err = channel_gk20a_update_runlist(c, true);
1331         if (err)
1332                 goto clean_up_unmap;
1333
1334         g->ops.fifo.bind_channel(c);
1335
1336         gk20a_dbg_fn("done");
1337         return 0;
1338
1339 clean_up_unmap:
1340         gk20a_gmmu_unmap_free(ch_vm, &c->gpfifo.mem);
1341 clean_up:
1342         memset(&c->gpfifo, 0, sizeof(struct gpfifo_desc));
1343         gk20a_err(d, "fail");
1344         return err;
1345 }
1346
1347 static inline bool check_gp_put(struct gk20a *g,
1348                                 struct channel_gk20a *c)
1349 {
1350         u32 put;
1351         /* gp_put changed unexpectedly since last update? */
1352         put = gk20a_bar1_readl(g,
1353                c->userd_gpu_va + 4 * ram_userd_gp_put_w());
1354         if (c->gpfifo.put != put) {
1355                 /*TBD: BUG_ON/teardown on this*/
1356                 gk20a_err(dev_from_gk20a(g), "gp_put changed unexpectedly "
1357                           "since last update, channel put = %u, ram put = %u\n",
1358                           c->gpfifo.put, put);
1359                 c->gpfifo.put = put;
1360                 return false; /* surprise! */
1361         }
1362         return true; /* checked out ok */
1363 }
1364
1365 /* Update with this periodically to determine how the gpfifo is draining. */
1366 static inline u32 update_gp_get(struct gk20a *g,
1367                                 struct channel_gk20a *c)
1368 {
1369         u32 new_get = gk20a_bar1_readl(g,
1370                 c->userd_gpu_va + sizeof(u32) * ram_userd_gp_get_w());
1371         if (new_get < c->gpfifo.get)
1372                 c->gpfifo.wrap = !c->gpfifo.wrap;
1373         c->gpfifo.get = new_get;
1374         return new_get;
1375 }
1376
1377 static inline u32 gp_free_count(struct channel_gk20a *c)
1378 {
1379         return (c->gpfifo.entry_num - (c->gpfifo.put - c->gpfifo.get) - 1) %
1380                 c->gpfifo.entry_num;
1381 }
1382
1383 bool gk20a_channel_update_and_check_timeout(struct channel_gk20a *ch,
1384                 u32 timeout_delta_ms)
1385 {
1386         u32 gpfifo_get = update_gp_get(ch->g, ch);
1387         /* Count consequent timeout isr */
1388         if (gpfifo_get == ch->timeout_gpfifo_get) {
1389                 /* we didn't advance since previous channel timeout check */
1390                 ch->timeout_accumulated_ms += timeout_delta_ms;
1391         } else {
1392                 /* first timeout isr encountered */
1393                 ch->timeout_accumulated_ms = timeout_delta_ms;
1394         }
1395
1396         ch->timeout_gpfifo_get = gpfifo_get;
1397
1398         return ch->g->timeouts_enabled &&
1399                 ch->timeout_accumulated_ms > ch->timeout_ms_max;
1400 }
1401
1402 static u32 get_gp_free_count(struct channel_gk20a *c)
1403 {
1404         update_gp_get(c->g, c);
1405         return gp_free_count(c);
1406 }
1407
1408 static void trace_write_pushbuffer(struct channel_gk20a *c,
1409                                    struct nvgpu_gpfifo *g)
1410 {
1411         void *mem = NULL;
1412         unsigned int words;
1413         u64 offset;
1414         struct dma_buf *dmabuf = NULL;
1415
1416         if (gk20a_debug_trace_cmdbuf) {
1417                 u64 gpu_va = (u64)g->entry0 |
1418                         (u64)((u64)pbdma_gp_entry1_get_hi_v(g->entry1) << 32);
1419                 int err;
1420
1421                 words = pbdma_gp_entry1_length_v(g->entry1);
1422                 err = gk20a_vm_find_buffer(c->vm, gpu_va, &dmabuf, &offset);
1423                 if (!err)
1424                         mem = dma_buf_vmap(dmabuf);
1425         }
1426
1427         if (mem) {
1428                 u32 i;
1429                 /*
1430                  * Write in batches of 128 as there seems to be a limit
1431                  * of how much you can output to ftrace at once.
1432                  */
1433                 for (i = 0; i < words; i += 128U) {
1434                         trace_gk20a_push_cmdbuf(
1435                                 c->g->dev->name,
1436                                 0,
1437                                 min(words - i, 128U),
1438                                 offset + i * sizeof(u32),
1439                                 mem);
1440                 }
1441                 dma_buf_vunmap(dmabuf, mem);
1442         }
1443 }
1444
1445 static void trace_write_pushbuffer_range(struct channel_gk20a *c,
1446                                          struct nvgpu_gpfifo *g,
1447                                          struct nvgpu_submit_gpfifo_args *args,
1448                                          int offset,
1449                                          int count)
1450 {
1451         u32 size;
1452         int i;
1453         struct nvgpu_gpfifo *gp;
1454         bool gpfifo_allocated = false;
1455
1456         if (!gk20a_debug_trace_cmdbuf)
1457                 return;
1458
1459         if (!g && !args)
1460                 return;
1461
1462         if (!g) {
1463                 size = args->num_entries * sizeof(struct nvgpu_gpfifo);
1464                 if (size) {
1465                         g = nvgpu_alloc(size, false);
1466                         if (!g)
1467                                 return;
1468
1469                         if (copy_from_user(g,
1470                                 (void __user *)(uintptr_t)args->gpfifo, size)) {
1471                                 return;
1472                         }
1473                 }
1474                 gpfifo_allocated = true;
1475         }
1476
1477         gp = g + offset;
1478         for (i = 0; i < count; i++, gp++)
1479                 trace_write_pushbuffer(c, gp);
1480
1481         if (gpfifo_allocated)
1482                 nvgpu_free(g);
1483 }
1484
1485 static int gk20a_free_priv_cmdbuf(struct channel_gk20a *c,
1486                                         struct priv_cmd_entry *e)
1487 {
1488         struct priv_cmd_queue *q = &c->priv_cmd_q;
1489         u32 cmd_entry_start;
1490         struct device *d = dev_from_gk20a(c->g);
1491
1492         if (!e)
1493                 return 0;
1494
1495         cmd_entry_start = (u32)(e->ptr - (u32 *)q->mem.cpu_va);
1496         if ((q->get != cmd_entry_start) && cmd_entry_start != 0)
1497                 gk20a_err(d, "requests out-of-order, ch=%d\n", c->hw_chid);
1498
1499         q->get = (e->ptr - (u32 *)q->mem.cpu_va) + e->size;
1500         free_priv_cmdbuf(c, e);
1501
1502         return 0;
1503 }
1504
1505 static int gk20a_channel_add_job(struct channel_gk20a *c,
1506                                  struct gk20a_fence *pre_fence,
1507                                  struct gk20a_fence *post_fence,
1508                                  struct priv_cmd_entry *wait_cmd,
1509                                  struct priv_cmd_entry *incr_cmd,
1510                                  bool skip_buffer_refcounting)
1511 {
1512         struct vm_gk20a *vm = c->vm;
1513         struct channel_gk20a_job *job = NULL;
1514         struct mapped_buffer_node **mapped_buffers = NULL;
1515         int err = 0, num_mapped_buffers = 0;
1516
1517         /* job needs reference to this vm (released in channel_update) */
1518         gk20a_vm_get(vm);
1519
1520         if (!skip_buffer_refcounting) {
1521                 err = gk20a_vm_get_buffers(vm, &mapped_buffers,
1522                                         &num_mapped_buffers);
1523                 if (err) {
1524                         gk20a_vm_put(vm);
1525                         return err;
1526                 }
1527         }
1528
1529         job = kzalloc(sizeof(*job), GFP_KERNEL);
1530         if (!job) {
1531                 gk20a_vm_put_buffers(vm, mapped_buffers, num_mapped_buffers);
1532                 gk20a_vm_put(vm);
1533                 return -ENOMEM;
1534         }
1535
1536         /* put() is done in gk20a_channel_update() when the job is done */
1537         c = gk20a_channel_get(c);
1538
1539         if (c) {
1540                 job->num_mapped_buffers = num_mapped_buffers;
1541                 job->mapped_buffers = mapped_buffers;
1542                 job->pre_fence = gk20a_fence_get(pre_fence);
1543                 job->post_fence = gk20a_fence_get(post_fence);
1544                 job->wait_cmd = wait_cmd;
1545                 job->incr_cmd = incr_cmd;
1546
1547                 mutex_lock(&c->jobs_lock);
1548                 list_add_tail(&job->list, &c->jobs);
1549                 mutex_unlock(&c->jobs_lock);
1550         } else {
1551                 return -ETIMEDOUT;
1552         }
1553
1554         return 0;
1555 }
1556
1557 void gk20a_channel_update(struct channel_gk20a *c, int nr_completed)
1558 {
1559         struct vm_gk20a *vm = c->vm;
1560         struct channel_gk20a_job *job, *n;
1561         struct gk20a_platform *platform = gk20a_get_platform(c->g->dev);
1562
1563         trace_gk20a_channel_update(c->hw_chid);
1564
1565         wake_up(&c->submit_wq);
1566
1567         mutex_lock(&c->submit_lock);
1568
1569         if (c->g->power_on) {
1570                 /* gp_put check needs to be done inside submit lock */
1571                 update_gp_get(c->g, c);
1572                 check_gp_put(c->g, c);
1573         }
1574
1575         mutex_lock(&c->jobs_lock);
1576         list_for_each_entry_safe(job, n, &c->jobs, list) {
1577                 struct gk20a *g = c->g;
1578
1579                 bool completed = gk20a_fence_is_expired(job->post_fence);
1580                 if (!completed)
1581                         break;
1582
1583                 if (c->sync)
1584                         c->sync->signal_timeline(c->sync);
1585
1586                 if (job->num_mapped_buffers)
1587                         gk20a_vm_put_buffers(vm, job->mapped_buffers,
1588                                 job->num_mapped_buffers);
1589
1590                 /* Close the fences (this will unref the semaphores and release
1591                  * them to the pool). */
1592                 gk20a_fence_put(job->pre_fence);
1593                 gk20a_fence_put(job->post_fence);
1594
1595                 /* Free the private command buffers (wait_cmd first and
1596                  * then incr_cmd i.e. order of allocation) */
1597                 gk20a_free_priv_cmdbuf(c, job->wait_cmd);
1598                 gk20a_free_priv_cmdbuf(c, job->incr_cmd);
1599
1600                 /* job is done. release its vm reference (taken in add_job) */
1601                 gk20a_vm_put(vm);
1602                 /* another bookkeeping taken in add_job. caller must hold a ref
1603                  * so this wouldn't get freed here. */
1604                 gk20a_channel_put(c);
1605
1606                 list_del_init(&job->list);
1607                 kfree(job);
1608                 gk20a_idle(g->dev);
1609         }
1610
1611         /*
1612          * If job list is empty then channel is idle and we can free
1613          * the syncpt here (given aggressive_destroy flag is set)
1614          * Note: check if last submit is complete before destroying
1615          * the sync resource
1616          */
1617         if (list_empty(&c->jobs)) {
1618                 if (c->sync && platform->aggressive_sync_destroy &&
1619                           gk20a_fence_is_expired(c->last_submit.post_fence)) {
1620                         c->sync->destroy(c->sync);
1621                         c->sync = NULL;
1622                 }
1623         }
1624         mutex_unlock(&c->jobs_lock);
1625         mutex_unlock(&c->submit_lock);
1626
1627         if (c->update_fn)
1628                 schedule_work(&c->update_fn_work);
1629 }
1630
1631 int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
1632                                 struct nvgpu_gpfifo *gpfifo,
1633                                 struct nvgpu_submit_gpfifo_args *args,
1634                                 u32 num_entries,
1635                                 u32 flags,
1636                                 struct nvgpu_fence *fence,
1637                                 struct gk20a_fence **fence_out,
1638                                 bool force_need_sync_fence)
1639 {
1640         struct gk20a *g = c->g;
1641         struct device *d = dev_from_gk20a(g);
1642         int err = 0;
1643         int start, end;
1644         int wait_fence_fd = -1;
1645         struct priv_cmd_entry *wait_cmd = NULL;
1646         struct priv_cmd_entry *incr_cmd = NULL;
1647         struct gk20a_fence *pre_fence = NULL;
1648         struct gk20a_fence *post_fence = NULL;
1649         /* we might need two extra gpfifo entries - one for pre fence
1650          * and one for post fence. */
1651         const int extra_entries = 2;
1652         bool need_wfi = !(flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SUPPRESS_WFI);
1653         bool skip_buffer_refcounting = (flags &
1654                         NVGPU_SUBMIT_GPFIFO_FLAGS_SKIP_BUFFER_REFCOUNTING);
1655         bool need_sync_fence = false;
1656
1657         /*
1658          * If user wants to allocate sync_fence_fd always, then respect that;
1659          * otherwise, allocate sync_fence_fd based on user flags only
1660          */
1661         if (force_need_sync_fence)
1662                 need_sync_fence = true;
1663
1664         if (c->has_timedout)
1665                 return -ETIMEDOUT;
1666
1667         /* fifo not large enough for request. Return error immediately.
1668          * Kernel can insert gpfifo entries before and after user gpfifos.
1669          * So, add extra_entries in user request. Also, HW with fifo size N
1670          * can accept only N-1 entreis and so the below condition */
1671         if (c->gpfifo.entry_num - 1 < num_entries + extra_entries) {
1672                 gk20a_err(d, "not enough gpfifo space allocated");
1673                 return -ENOMEM;
1674         }
1675
1676         if (!gpfifo && !args)
1677                 return -EINVAL;
1678
1679         if ((flags & (NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT |
1680                       NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET)) &&
1681             !fence)
1682                 return -EINVAL;
1683
1684         /* an address space needs to have been bound at this point. */
1685         if (!gk20a_channel_as_bound(c)) {
1686                 gk20a_err(d,
1687                             "not bound to an address space at time of gpfifo"
1688                             " submission.");
1689                 return -EINVAL;
1690         }
1691
1692 #ifdef CONFIG_DEBUG_FS
1693         /* update debug settings */
1694         if (g->ops.ltc.sync_debugfs)
1695                 g->ops.ltc.sync_debugfs(g);
1696 #endif
1697
1698         gk20a_dbg_info("channel %d", c->hw_chid);
1699
1700         /* gk20a_channel_update releases this ref. */
1701         err = gk20a_busy(g->dev);
1702         if (err) {
1703                 gk20a_err(d, "failed to host gk20a to submit gpfifo");
1704                 return err;
1705         }
1706
1707         trace_gk20a_channel_submit_gpfifo(c->g->dev->name,
1708                                           c->hw_chid,
1709                                           num_entries,
1710                                           flags,
1711                                           fence ? fence->id : 0,
1712                                           fence ? fence->value : 0);
1713
1714         gk20a_dbg_info("pre-submit put %d, get %d, size %d",
1715                 c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num);
1716
1717         /* Make sure we have enough space for gpfifo entries. If not,
1718          * wait for signals from completed submits */
1719         if (gp_free_count(c) < num_entries + extra_entries) {
1720                 /* we can get here via locked ioctl and other paths too */
1721                 int locked_path = mutex_is_locked(&c->ioctl_lock);
1722                 if (locked_path)
1723                         mutex_unlock(&c->ioctl_lock);
1724
1725                 trace_gk20a_gpfifo_submit_wait_for_space(c->g->dev->name);
1726                 err = wait_event_interruptible(c->submit_wq,
1727                         get_gp_free_count(c) >= num_entries + extra_entries ||
1728                         c->has_timedout);
1729                 trace_gk20a_gpfifo_submit_wait_for_space_done(c->g->dev->name);
1730
1731                 if (locked_path)
1732                         mutex_lock(&c->ioctl_lock);
1733         }
1734
1735         if (c->has_timedout) {
1736                 err = -ETIMEDOUT;
1737                 goto clean_up;
1738         }
1739
1740         if (err) {
1741                 gk20a_err(d, "timeout waiting for gpfifo space");
1742                 err = -EAGAIN;
1743                 goto clean_up;
1744         }
1745
1746         mutex_lock(&c->submit_lock);
1747
1748         if (!c->sync) {
1749                 c->sync = gk20a_channel_sync_create(c);
1750                 if (!c->sync) {
1751                         err = -ENOMEM;
1752                         mutex_unlock(&c->submit_lock);
1753                         goto clean_up;
1754                 }
1755                 if (g->ops.fifo.resetup_ramfc)
1756                         err = g->ops.fifo.resetup_ramfc(c);
1757                 if (err)
1758                         return err;
1759         }
1760
1761         /*
1762          * optionally insert syncpt wait in the beginning of gpfifo submission
1763          * when user requested and the wait hasn't expired.
1764          * validate that the id makes sense, elide if not
1765          * the only reason this isn't being unceremoniously killed is to
1766          * keep running some tests which trigger this condition
1767          */
1768         if (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT) {
1769                 if (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE) {
1770                         wait_fence_fd = fence->id;
1771                         err = c->sync->wait_fd(c->sync, wait_fence_fd,
1772                                         &wait_cmd, &pre_fence);
1773                 } else {
1774                         err = c->sync->wait_syncpt(c->sync, fence->id,
1775                                         fence->value, &wait_cmd, &pre_fence);
1776                 }
1777         }
1778         if (err) {
1779                 mutex_unlock(&c->submit_lock);
1780                 goto clean_up;
1781         }
1782
1783         if ((flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET) &&
1784                         (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE))
1785                 need_sync_fence = true;
1786
1787         /* always insert syncpt increment at end of gpfifo submission
1788            to keep track of method completion for idle railgating */
1789         if (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET)
1790                 err = c->sync->incr_user(c->sync, wait_fence_fd, &incr_cmd,
1791                                  &post_fence, need_wfi, need_sync_fence);
1792         else
1793                 err = c->sync->incr(c->sync, &incr_cmd,
1794                                     &post_fence, need_sync_fence);
1795         if (err) {
1796                 mutex_unlock(&c->submit_lock);
1797                 goto clean_up;
1798         }
1799
1800         if (wait_cmd) {
1801                 ((struct gpfifo *)(c->gpfifo.mem.cpu_va))[c->gpfifo.put].entry0 =
1802                         u64_lo32(wait_cmd->gva);
1803                 ((struct gpfifo *)(c->gpfifo.mem.cpu_va))[c->gpfifo.put].entry1 =
1804                         u64_hi32(wait_cmd->gva) |
1805                         pbdma_gp_entry1_length_f(wait_cmd->size);
1806                 trace_gk20a_push_cmdbuf(c->g->dev->name,
1807                         0, wait_cmd->size, 0, wait_cmd->ptr);
1808
1809                 c->gpfifo.put = (c->gpfifo.put + 1) &
1810                         (c->gpfifo.entry_num - 1);
1811
1812                 /* save gp_put */
1813                 wait_cmd->gp_put = c->gpfifo.put;
1814         }
1815
1816         /*
1817          * Copy source gpfifo entries into the gpfifo ring buffer,
1818          * potentially splitting into two memcpies to handle the
1819          * ring buffer wrap-around case.
1820          */
1821         start = c->gpfifo.put;
1822         end = start + num_entries;
1823
1824         if (gpfifo) {
1825                 if (end > c->gpfifo.entry_num) {
1826                         int length0 = c->gpfifo.entry_num - start;
1827                         int length1 = num_entries - length0;
1828
1829                         memcpy((struct gpfifo *)c->gpfifo.mem.cpu_va + start,
1830                                 gpfifo,
1831                                 length0 * sizeof(*gpfifo));
1832
1833                         memcpy((struct gpfifo *)c->gpfifo.mem.cpu_va,
1834                                 gpfifo + length0,
1835                                 length1 * sizeof(*gpfifo));
1836
1837                         trace_write_pushbuffer_range(c, gpfifo, NULL,
1838                                         0, length0);
1839                         trace_write_pushbuffer_range(c, gpfifo, NULL,
1840                                         length0, length1);
1841                 } else {
1842                         memcpy((struct gpfifo *)c->gpfifo.mem.cpu_va + start,
1843                                 gpfifo,
1844                                 num_entries * sizeof(*gpfifo));
1845
1846                         trace_write_pushbuffer_range(c, gpfifo, NULL,
1847                                         0, num_entries);
1848                 }
1849         } else {
1850                 struct nvgpu_gpfifo __user *user_gpfifo =
1851                         (struct nvgpu_gpfifo __user *)(uintptr_t)args->gpfifo;
1852                 if (end > c->gpfifo.entry_num) {
1853                         int length0 = c->gpfifo.entry_num - start;
1854                         int length1 = num_entries - length0;
1855
1856                         err = copy_from_user((struct gpfifo *)c->gpfifo.mem.cpu_va + start,
1857                                 user_gpfifo,
1858                                 length0 * sizeof(*user_gpfifo));
1859                         if (err) {
1860                                 mutex_unlock(&c->submit_lock);
1861                                 goto clean_up;
1862                         }
1863
1864                         err = copy_from_user((struct gpfifo *)c->gpfifo.mem.cpu_va,
1865                                 user_gpfifo + length0,
1866                                 length1 * sizeof(*user_gpfifo));
1867                         if (err) {
1868                                 mutex_unlock(&c->submit_lock);
1869                                 goto clean_up;
1870                         }
1871
1872                         trace_write_pushbuffer_range(c, NULL, args,
1873                                         0, length0);
1874                         trace_write_pushbuffer_range(c, NULL, args,
1875                                         length0, length1);
1876                 } else {
1877                         err = copy_from_user((struct gpfifo *)c->gpfifo.mem.cpu_va + start,
1878                                 user_gpfifo,
1879                                 num_entries * sizeof(*user_gpfifo));
1880                         if (err) {
1881                                 mutex_unlock(&c->submit_lock);
1882                                 goto clean_up;
1883                         }
1884
1885                         trace_write_pushbuffer_range(c, NULL, args,
1886                                         0, num_entries);
1887                 }
1888         }
1889
1890         c->gpfifo.put = (c->gpfifo.put + num_entries) &
1891                 (c->gpfifo.entry_num - 1);
1892
1893         if (incr_cmd) {
1894                 ((struct gpfifo *)(c->gpfifo.mem.cpu_va))[c->gpfifo.put].entry0 =
1895                         u64_lo32(incr_cmd->gva);
1896                 ((struct gpfifo *)(c->gpfifo.mem.cpu_va))[c->gpfifo.put].entry1 =
1897                         u64_hi32(incr_cmd->gva) |
1898                         pbdma_gp_entry1_length_f(incr_cmd->size);
1899                 trace_gk20a_push_cmdbuf(c->g->dev->name,
1900                         0, incr_cmd->size, 0, incr_cmd->ptr);
1901
1902                 c->gpfifo.put = (c->gpfifo.put + 1) &
1903                         (c->gpfifo.entry_num - 1);
1904
1905                 /* save gp_put */
1906                 incr_cmd->gp_put = c->gpfifo.put;
1907         }
1908
1909         gk20a_fence_put(c->last_submit.pre_fence);
1910         gk20a_fence_put(c->last_submit.post_fence);
1911         c->last_submit.pre_fence = pre_fence;
1912         c->last_submit.post_fence = post_fence;
1913         if (fence_out)
1914                 *fence_out = gk20a_fence_get(post_fence);
1915
1916         /* TODO! Check for errors... */
1917         gk20a_channel_add_job(c, pre_fence, post_fence,
1918                                 wait_cmd, incr_cmd,
1919                                 skip_buffer_refcounting);
1920
1921         c->cmds_pending = true;
1922         gk20a_bar1_writel(g,
1923                 c->userd_gpu_va + 4 * ram_userd_gp_put_w(),
1924                 c->gpfifo.put);
1925
1926         mutex_unlock(&c->submit_lock);
1927
1928         trace_gk20a_channel_submitted_gpfifo(c->g->dev->name,
1929                                              c->hw_chid,
1930                                              num_entries,
1931                                              flags,
1932                                              post_fence->syncpt_id,
1933                                              post_fence->syncpt_value);
1934
1935         gk20a_dbg_info("post-submit put %d, get %d, size %d",
1936                 c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num);
1937
1938         gk20a_dbg_fn("done");
1939         return err;
1940
1941 clean_up:
1942         gk20a_err(d, "fail");
1943         free_priv_cmdbuf(c, wait_cmd);
1944         free_priv_cmdbuf(c, incr_cmd);
1945         gk20a_fence_put(pre_fence);
1946         gk20a_fence_put(post_fence);
1947         gk20a_idle(g->dev);
1948         return err;
1949 }
1950
1951 int gk20a_init_channel_support(struct gk20a *g, u32 chid)
1952 {
1953         struct channel_gk20a *c = g->fifo.channel+chid;
1954         c->g = NULL;
1955         c->hw_chid = chid;
1956         c->bound = false;
1957         spin_lock_init(&c->ref_obtain_lock);
1958         atomic_set(&c->ref_count, 0);
1959         c->referenceable = false;
1960         init_waitqueue_head(&c->ref_count_dec_wq);
1961         mutex_init(&c->ioctl_lock);
1962         mutex_init(&c->jobs_lock);
1963         mutex_init(&c->submit_lock);
1964         INIT_LIST_HEAD(&c->jobs);
1965 #if defined(CONFIG_GK20A_CYCLE_STATS)
1966         mutex_init(&c->cyclestate.cyclestate_buffer_mutex);
1967         mutex_init(&c->cs_client_mutex);
1968 #endif
1969         INIT_LIST_HEAD(&c->dbg_s_list);
1970         mutex_init(&c->dbg_s_lock);
1971         list_add(&c->free_chs, &g->fifo.free_chs);
1972
1973         return 0;
1974 }
1975
1976 int gk20a_channel_finish(struct channel_gk20a *ch, unsigned long timeout)
1977 {
1978         int err = 0;
1979         struct gk20a_fence *fence = ch->last_submit.post_fence;
1980
1981         if (!ch->cmds_pending)
1982                 return 0;
1983
1984         /* Do not wait for a timedout channel */
1985         if (ch->has_timedout)
1986                 return -ETIMEDOUT;
1987
1988         gk20a_dbg_fn("waiting for channel to finish thresh:%d sema:%p",
1989                      fence->syncpt_value, fence->semaphore);
1990
1991         err = gk20a_fence_wait(fence, timeout);
1992         if (WARN_ON(err))
1993                 dev_warn(dev_from_gk20a(ch->g),
1994                        "timed out waiting for gk20a channel to finish");
1995         else
1996                 ch->cmds_pending = false;
1997
1998         return err;
1999 }
2000
2001 static int gk20a_channel_wait_semaphore(struct channel_gk20a *ch,
2002                                         ulong id, u32 offset,
2003                                         u32 payload, long timeout)
2004 {
2005         struct platform_device *pdev = ch->g->dev;
2006         struct dma_buf *dmabuf;
2007         void *data;
2008         u32 *semaphore;
2009         int ret = 0;
2010         long remain;
2011
2012         /* do not wait if channel has timed out */
2013         if (ch->has_timedout)
2014                 return -ETIMEDOUT;
2015
2016         dmabuf = dma_buf_get(id);
2017         if (IS_ERR(dmabuf)) {
2018                 gk20a_err(&pdev->dev, "invalid notifier nvmap handle 0x%lx",
2019                            id);
2020                 return -EINVAL;
2021         }
2022
2023         data = dma_buf_kmap(dmabuf, offset >> PAGE_SHIFT);
2024         if (!data) {
2025                 gk20a_err(&pdev->dev, "failed to map notifier memory");
2026                 ret = -EINVAL;
2027                 goto cleanup_put;
2028         }
2029
2030         semaphore = data + (offset & ~PAGE_MASK);
2031
2032         remain = wait_event_interruptible_timeout(
2033                         ch->semaphore_wq,
2034                         *semaphore == payload || ch->has_timedout,
2035                         timeout);
2036
2037         if (remain == 0 && *semaphore != payload)
2038                 ret = -ETIMEDOUT;
2039         else if (remain < 0)
2040                 ret = remain;
2041
2042         dma_buf_kunmap(dmabuf, offset >> PAGE_SHIFT, data);
2043 cleanup_put:
2044         dma_buf_put(dmabuf);
2045         return ret;
2046 }
2047
2048 static int gk20a_channel_wait(struct channel_gk20a *ch,
2049                               struct nvgpu_wait_args *args)
2050 {
2051         struct device *d = dev_from_gk20a(ch->g);
2052         struct dma_buf *dmabuf;
2053         struct notification *notif;
2054         struct timespec tv;
2055         u64 jiffies;
2056         ulong id;
2057         u32 offset;
2058         unsigned long timeout;
2059         int remain, ret = 0;
2060
2061         gk20a_dbg_fn("");
2062
2063         if (ch->has_timedout)
2064                 return -ETIMEDOUT;
2065
2066         if (args->timeout == NVGPU_NO_TIMEOUT)
2067                 timeout = MAX_SCHEDULE_TIMEOUT;
2068         else
2069                 timeout = (u32)msecs_to_jiffies(args->timeout);
2070
2071         switch (args->type) {
2072         case NVGPU_WAIT_TYPE_NOTIFIER:
2073                 id = args->condition.notifier.dmabuf_fd;
2074                 offset = args->condition.notifier.offset;
2075
2076                 dmabuf = dma_buf_get(id);
2077                 if (IS_ERR(dmabuf)) {
2078                         gk20a_err(d, "invalid notifier nvmap handle 0x%lx",
2079                                    id);
2080                         return -EINVAL;
2081                 }
2082
2083                 notif = dma_buf_vmap(dmabuf);
2084                 if (!notif) {
2085                         gk20a_err(d, "failed to map notifier memory");
2086                         return -ENOMEM;
2087                 }
2088
2089                 notif = (struct notification *)((uintptr_t)notif + offset);
2090
2091                 /* user should set status pending before
2092                  * calling this ioctl */
2093                 remain = wait_event_interruptible_timeout(
2094                                 ch->notifier_wq,
2095                                 notif->status == 0 || ch->has_timedout,
2096                                 timeout);
2097
2098                 if (remain == 0 && notif->status != 0) {
2099                         ret = -ETIMEDOUT;
2100                         goto notif_clean_up;
2101                 } else if (remain < 0) {
2102                         ret = -EINTR;
2103                         goto notif_clean_up;
2104                 }
2105
2106                 /* TBD: fill in correct information */
2107                 jiffies = get_jiffies_64();
2108                 jiffies_to_timespec(jiffies, &tv);
2109                 notif->timestamp.nanoseconds[0] = tv.tv_nsec;
2110                 notif->timestamp.nanoseconds[1] = tv.tv_sec;
2111                 notif->info32 = 0xDEADBEEF; /* should be object name */
2112                 notif->info16 = ch->hw_chid; /* should be method offset */
2113
2114 notif_clean_up:
2115                 dma_buf_vunmap(dmabuf, notif);
2116                 return ret;
2117
2118         case NVGPU_WAIT_TYPE_SEMAPHORE:
2119                 ret = gk20a_channel_wait_semaphore(ch,
2120                                 args->condition.semaphore.dmabuf_fd,
2121                                 args->condition.semaphore.offset,
2122                                 args->condition.semaphore.payload,
2123                                 timeout);
2124
2125                 break;
2126
2127         default:
2128                 ret = -EINVAL;
2129                 break;
2130         }
2131
2132         return ret;
2133 }
2134
2135 /* poll events for semaphores */
2136
2137 static void gk20a_channel_events_enable(struct channel_gk20a_poll_events *ev)
2138 {
2139         gk20a_dbg_fn("");
2140
2141         mutex_lock(&ev->lock);
2142
2143         ev->events_enabled = true;
2144         ev->num_pending_events = 0;
2145
2146         mutex_unlock(&ev->lock);
2147 }
2148
2149 static void gk20a_channel_events_disable(struct channel_gk20a_poll_events *ev)
2150 {
2151         gk20a_dbg_fn("");
2152
2153         mutex_lock(&ev->lock);
2154
2155         ev->events_enabled = false;
2156         ev->num_pending_events = 0;
2157
2158         mutex_unlock(&ev->lock);
2159 }
2160
2161 static void gk20a_channel_events_clear(struct channel_gk20a_poll_events *ev)
2162 {
2163         gk20a_dbg_fn("");
2164
2165         mutex_lock(&ev->lock);
2166
2167         if (ev->events_enabled &&
2168                         ev->num_pending_events > 0)
2169                 ev->num_pending_events--;
2170
2171         mutex_unlock(&ev->lock);
2172 }
2173
2174 static int gk20a_channel_events_ctrl(struct channel_gk20a *ch,
2175                           struct nvgpu_channel_events_ctrl_args *args)
2176 {
2177         int ret = 0;
2178
2179         gk20a_dbg(gpu_dbg_fn | gpu_dbg_info,
2180                         "channel events ctrl cmd %d", args->cmd);
2181
2182         switch (args->cmd) {
2183         case NVGPU_IOCTL_CHANNEL_EVENTS_CTRL_CMD_ENABLE:
2184                 gk20a_channel_events_enable(&ch->poll_events);
2185                 break;
2186
2187         case NVGPU_IOCTL_CHANNEL_EVENTS_CTRL_CMD_DISABLE:
2188                 gk20a_channel_events_disable(&ch->poll_events);
2189                 break;
2190
2191         case NVGPU_IOCTL_CHANNEL_EVENTS_CTRL_CMD_CLEAR:
2192                 gk20a_channel_events_clear(&ch->poll_events);
2193                 break;
2194
2195         default:
2196                 gk20a_err(dev_from_gk20a(ch->g),
2197                            "unrecognized channel events ctrl cmd: 0x%x",
2198                            args->cmd);
2199                 ret = -EINVAL;
2200                 break;
2201         }
2202
2203         return ret;
2204 }
2205
2206 void gk20a_channel_event(struct channel_gk20a *ch)
2207 {
2208         mutex_lock(&ch->poll_events.lock);
2209
2210         if (ch->poll_events.events_enabled) {
2211                 gk20a_dbg_info("posting event on channel id %d",
2212                                 ch->hw_chid);
2213                 gk20a_dbg_info("%d channel events pending",
2214                                 ch->poll_events.num_pending_events);
2215
2216                 ch->poll_events.num_pending_events++;
2217                 /* not waking up here, caller does that */
2218         }
2219
2220         mutex_unlock(&ch->poll_events.lock);
2221 }
2222
2223 unsigned int gk20a_channel_poll(struct file *filep, poll_table *wait)
2224 {
2225         unsigned int mask = 0;
2226         struct channel_gk20a *ch = filep->private_data;
2227
2228         gk20a_dbg(gpu_dbg_fn | gpu_dbg_info, "");
2229
2230         poll_wait(filep, &ch->semaphore_wq, wait);
2231
2232         mutex_lock(&ch->poll_events.lock);
2233
2234         if (ch->poll_events.events_enabled &&
2235                         ch->poll_events.num_pending_events > 0) {
2236                 gk20a_dbg_info("found pending event on channel id %d",
2237                                 ch->hw_chid);
2238                 gk20a_dbg_info("%d channel events pending",
2239                                 ch->poll_events.num_pending_events);
2240                 mask = (POLLPRI | POLLIN);
2241         }
2242
2243         mutex_unlock(&ch->poll_events.lock);
2244
2245         return mask;
2246 }
2247
2248 static int gk20a_channel_set_priority(struct channel_gk20a *ch,
2249                 u32 priority)
2250 {
2251         u32 timeslice_timeout;
2252         /* set priority of graphics channel */
2253         switch (priority) {
2254         case NVGPU_PRIORITY_LOW:
2255                 /* 64 << 3 = 512us */
2256                 timeslice_timeout = 64;
2257                 break;
2258         case NVGPU_PRIORITY_MEDIUM:
2259                 /* 128 << 3 = 1024us */
2260                 timeslice_timeout = 128;
2261                 break;
2262         case NVGPU_PRIORITY_HIGH:
2263                 /* 255 << 3 = 2048us */
2264                 timeslice_timeout = 255;
2265                 break;
2266         default:
2267                 pr_err("Unsupported priority");
2268                 return -EINVAL;
2269         }
2270         channel_gk20a_set_schedule_params(ch,
2271                         timeslice_timeout);
2272         return 0;
2273 }
2274
2275 static int gk20a_channel_zcull_bind(struct channel_gk20a *ch,
2276                             struct nvgpu_zcull_bind_args *args)
2277 {
2278         struct gk20a *g = ch->g;
2279         struct gr_gk20a *gr = &g->gr;
2280
2281         gk20a_dbg_fn("");
2282
2283         return g->ops.gr.bind_ctxsw_zcull(g, gr, ch,
2284                                 args->gpu_va, args->mode);
2285 }
2286
2287 /* in this context the "channel" is the host1x channel which
2288  * maps to *all* gk20a channels */
2289 int gk20a_channel_suspend(struct gk20a *g)
2290 {
2291         struct fifo_gk20a *f = &g->fifo;
2292         u32 chid;
2293         bool channels_in_use = false;
2294         int err;
2295
2296         gk20a_dbg_fn("");
2297
2298         /* wait for engine idle */
2299         err = g->ops.fifo.wait_engine_idle(g);
2300         if (err)
2301                 return err;
2302
2303         for (chid = 0; chid < f->num_channels; chid++) {
2304                 struct channel_gk20a *ch = &f->channel[chid];
2305                 if (gk20a_channel_get(ch)) {
2306                         gk20a_dbg_info("suspend channel %d", chid);
2307                         /* disable channel */
2308                         g->ops.fifo.disable_channel(ch);
2309                         /* preempt the channel */
2310                         g->ops.fifo.preempt_channel(g, chid);
2311                         /* wait for channel update notifiers */
2312                         if (ch->update_fn &&
2313                                         work_pending(&ch->update_fn_work))
2314                                 flush_work(&ch->update_fn_work);
2315
2316                         channels_in_use = true;
2317
2318                         gk20a_channel_put(ch);
2319                 }
2320         }
2321
2322         if (channels_in_use) {
2323                 g->ops.fifo.update_runlist(g, 0, ~0, false, true);
2324
2325                 for (chid = 0; chid < f->num_channels; chid++) {
2326                         if (gk20a_channel_get(&f->channel[chid])) {
2327                                 g->ops.fifo.unbind_channel(&f->channel[chid]);
2328                                 gk20a_channel_put(&f->channel[chid]);
2329                         }
2330                 }
2331         }
2332
2333         gk20a_dbg_fn("done");
2334         return 0;
2335 }
2336
2337 int gk20a_channel_resume(struct gk20a *g)
2338 {
2339         struct fifo_gk20a *f = &g->fifo;
2340         u32 chid;
2341         bool channels_in_use = false;
2342
2343         gk20a_dbg_fn("");
2344
2345         for (chid = 0; chid < f->num_channels; chid++) {
2346                 if (gk20a_channel_get(&f->channel[chid])) {
2347                         gk20a_dbg_info("resume channel %d", chid);
2348                         g->ops.fifo.bind_channel(&f->channel[chid]);
2349                         channels_in_use = true;
2350                         gk20a_channel_put(&f->channel[chid]);
2351                 }
2352         }
2353
2354         if (channels_in_use)
2355                 g->ops.fifo.update_runlist(g, 0, ~0, true, true);
2356
2357         gk20a_dbg_fn("done");
2358         return 0;
2359 }
2360
2361 void gk20a_channel_semaphore_wakeup(struct gk20a *g)
2362 {
2363         struct fifo_gk20a *f = &g->fifo;
2364         u32 chid;
2365
2366         gk20a_dbg_fn("");
2367
2368         for (chid = 0; chid < f->num_channels; chid++) {
2369                 struct channel_gk20a *c = g->fifo.channel+chid;
2370                 if (gk20a_channel_get(c)) {
2371                         gk20a_channel_event(c);
2372                         wake_up_interruptible_all(&c->semaphore_wq);
2373                         gk20a_channel_update(c, 0);
2374                         gk20a_channel_put(c);
2375                 }
2376         }
2377 }
2378
2379 static int gk20a_ioctl_channel_submit_gpfifo(
2380         struct channel_gk20a *ch,
2381         struct nvgpu_submit_gpfifo_args *args)
2382 {
2383         struct gk20a_fence *fence_out;
2384         int ret = 0;
2385
2386         gk20a_dbg_fn("");
2387
2388         if (ch->has_timedout)
2389                 return -ETIMEDOUT;
2390
2391         ret = gk20a_submit_channel_gpfifo(ch, NULL, args, args->num_entries,
2392                                           args->flags, &args->fence,
2393                                           &fence_out, false);
2394
2395         if (ret)
2396                 goto clean_up;
2397
2398         /* Convert fence_out to something we can pass back to user space. */
2399         if (args->flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET) {
2400                 if (args->flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE) {
2401                         int fd = gk20a_fence_install_fd(fence_out);
2402                         if (fd < 0)
2403                                 ret = fd;
2404                         else
2405                                 args->fence.id = fd;
2406                 } else {
2407                         args->fence.id = fence_out->syncpt_id;
2408                         args->fence.value = fence_out->syncpt_value;
2409                 }
2410         }
2411         gk20a_fence_put(fence_out);
2412
2413 clean_up:
2414         return ret;
2415 }
2416
2417 void gk20a_init_channel(struct gpu_ops *gops)
2418 {
2419         gops->fifo.bind_channel = channel_gk20a_bind;
2420         gops->fifo.unbind_channel = channel_gk20a_unbind;
2421         gops->fifo.disable_channel = channel_gk20a_disable;
2422         gops->fifo.alloc_inst = channel_gk20a_alloc_inst;
2423         gops->fifo.free_inst = channel_gk20a_free_inst;
2424         gops->fifo.setup_ramfc = channel_gk20a_setup_ramfc;
2425 }
2426
2427 long gk20a_channel_ioctl(struct file *filp,
2428         unsigned int cmd, unsigned long arg)
2429 {
2430         struct channel_gk20a *ch = filp->private_data;
2431         struct platform_device *dev = ch->g->dev;
2432         u8 buf[NVGPU_IOCTL_CHANNEL_MAX_ARG_SIZE];
2433         int err = 0;
2434
2435         gk20a_dbg_fn("start %d", _IOC_NR(cmd));
2436
2437         if ((_IOC_TYPE(cmd) != NVGPU_IOCTL_MAGIC) ||
2438                 (_IOC_NR(cmd) == 0) ||
2439                 (_IOC_NR(cmd) > NVGPU_IOCTL_CHANNEL_LAST) ||
2440                 (_IOC_SIZE(cmd) > NVGPU_IOCTL_CHANNEL_MAX_ARG_SIZE))
2441                 return -EINVAL;
2442
2443         if (_IOC_DIR(cmd) & _IOC_WRITE) {
2444                 if (copy_from_user(buf, (void __user *)arg, _IOC_SIZE(cmd)))
2445                         return -EFAULT;
2446         }
2447
2448         /* take a ref or return timeout if channel refs can't be taken */
2449         ch = gk20a_channel_get(ch);
2450         if (!ch)
2451                 return -ETIMEDOUT;
2452
2453         /* protect our sanity for threaded userspace - most of the channel is
2454          * not thread safe */
2455         mutex_lock(&ch->ioctl_lock);
2456
2457         /* this ioctl call keeps a ref to the file which keeps a ref to the
2458          * channel */
2459
2460         switch (cmd) {
2461         case NVGPU_IOCTL_CHANNEL_OPEN:
2462                 err = gk20a_channel_open_ioctl(ch->g,
2463                         (struct nvgpu_channel_open_args *)buf);
2464                 break;
2465         case NVGPU_IOCTL_CHANNEL_SET_NVMAP_FD:
2466                 break;
2467         case NVGPU_IOCTL_CHANNEL_ALLOC_OBJ_CTX:
2468                 err = gk20a_busy(dev);
2469                 if (err) {
2470                         dev_err(&dev->dev,
2471                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2472                                 __func__, cmd);
2473                         break;
2474                 }
2475                 err = ch->g->ops.gr.alloc_obj_ctx(ch,
2476                                 (struct nvgpu_alloc_obj_ctx_args *)buf);
2477                 gk20a_idle(dev);
2478                 break;
2479         case NVGPU_IOCTL_CHANNEL_FREE_OBJ_CTX:
2480                 err = gk20a_busy(dev);
2481                 if (err) {
2482                         dev_err(&dev->dev,
2483                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2484                                 __func__, cmd);
2485                         break;
2486                 }
2487                 err = ch->g->ops.gr.free_obj_ctx(ch,
2488                                 (struct nvgpu_free_obj_ctx_args *)buf);
2489                 gk20a_idle(dev);
2490                 break;
2491         case NVGPU_IOCTL_CHANNEL_ALLOC_GPFIFO:
2492                 err = gk20a_busy(dev);
2493                 if (err) {
2494                         dev_err(&dev->dev,
2495                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2496                                 __func__, cmd);
2497                         break;
2498                 }
2499                 err = gk20a_alloc_channel_gpfifo(ch,
2500                                 (struct nvgpu_alloc_gpfifo_args *)buf);
2501                 gk20a_idle(dev);
2502                 break;
2503         case NVGPU_IOCTL_CHANNEL_SUBMIT_GPFIFO:
2504                 err = gk20a_ioctl_channel_submit_gpfifo(ch,
2505                                 (struct nvgpu_submit_gpfifo_args *)buf);
2506                 break;
2507         case NVGPU_IOCTL_CHANNEL_WAIT:
2508                 err = gk20a_busy(dev);
2509                 if (err) {
2510                         dev_err(&dev->dev,
2511                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2512                                 __func__, cmd);
2513                         break;
2514                 }
2515
2516                 /* waiting is thread-safe, not dropping this mutex could
2517                  * deadlock in certain conditions */
2518                 mutex_unlock(&ch->ioctl_lock);
2519
2520                 err = gk20a_channel_wait(ch,
2521                                 (struct nvgpu_wait_args *)buf);
2522
2523                 mutex_lock(&ch->ioctl_lock);
2524
2525                 gk20a_idle(dev);
2526                 break;
2527         case NVGPU_IOCTL_CHANNEL_ZCULL_BIND:
2528                 err = gk20a_busy(dev);
2529                 if (err) {
2530                         dev_err(&dev->dev,
2531                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2532                                 __func__, cmd);
2533                         break;
2534                 }
2535                 err = gk20a_channel_zcull_bind(ch,
2536                                 (struct nvgpu_zcull_bind_args *)buf);
2537                 gk20a_idle(dev);
2538                 break;
2539         case NVGPU_IOCTL_CHANNEL_SET_ERROR_NOTIFIER:
2540                 err = gk20a_busy(dev);
2541                 if (err) {
2542                         dev_err(&dev->dev,
2543                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2544                                 __func__, cmd);
2545                         break;
2546                 }
2547                 err = gk20a_init_error_notifier(ch,
2548                                 (struct nvgpu_set_error_notifier *)buf);
2549                 gk20a_idle(dev);
2550                 break;
2551 #ifdef CONFIG_GK20A_CYCLE_STATS
2552         case NVGPU_IOCTL_CHANNEL_CYCLE_STATS:
2553                 err = gk20a_busy(dev);
2554                 if (err) {
2555                         dev_err(&dev->dev,
2556                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2557                                 __func__, cmd);
2558                         break;
2559                 }
2560                 err = gk20a_channel_cycle_stats(ch,
2561                                 (struct nvgpu_cycle_stats_args *)buf);
2562                 gk20a_idle(dev);
2563                 break;
2564 #endif
2565         case NVGPU_IOCTL_CHANNEL_SET_TIMEOUT:
2566         {
2567                 u32 timeout =
2568                         (u32)((struct nvgpu_set_timeout_args *)buf)->timeout;
2569                 gk20a_dbg(gpu_dbg_gpu_dbg, "setting timeout (%d ms) for chid %d",
2570                            timeout, ch->hw_chid);
2571                 ch->timeout_ms_max = timeout;
2572                 break;
2573         }
2574         case NVGPU_IOCTL_CHANNEL_SET_TIMEOUT_EX:
2575         {
2576                 u32 timeout =
2577                         (u32)((struct nvgpu_set_timeout_args *)buf)->timeout;
2578                 bool timeout_debug_dump = !((u32)
2579                         ((struct nvgpu_set_timeout_ex_args *)buf)->flags &
2580                         (1 << NVGPU_TIMEOUT_FLAG_DISABLE_DUMP));
2581                 gk20a_dbg(gpu_dbg_gpu_dbg, "setting timeout (%d ms) for chid %d",
2582                            timeout, ch->hw_chid);
2583                 ch->timeout_ms_max = timeout;
2584                 ch->timeout_debug_dump = timeout_debug_dump;
2585                 break;
2586         }
2587         case NVGPU_IOCTL_CHANNEL_GET_TIMEDOUT:
2588                 ((struct nvgpu_get_param_args *)buf)->value =
2589                         ch->has_timedout;
2590                 break;
2591         case NVGPU_IOCTL_CHANNEL_SET_PRIORITY:
2592                 err = gk20a_busy(dev);
2593                 if (err) {
2594                         dev_err(&dev->dev,
2595                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2596                                 __func__, cmd);
2597                         break;
2598                 }
2599                 gk20a_channel_set_priority(ch,
2600                         ((struct nvgpu_set_priority_args *)buf)->priority);
2601                 gk20a_idle(dev);
2602                 break;
2603         case NVGPU_IOCTL_CHANNEL_ENABLE:
2604                 err = gk20a_busy(dev);
2605                 if (err) {
2606                         dev_err(&dev->dev,
2607                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2608                                 __func__, cmd);
2609                         break;
2610                 }
2611                 /* enable channel */
2612                 gk20a_writel(ch->g, ccsr_channel_r(ch->hw_chid),
2613                         gk20a_readl(ch->g, ccsr_channel_r(ch->hw_chid)) |
2614                         ccsr_channel_enable_set_true_f());
2615                 gk20a_idle(dev);
2616                 break;
2617         case NVGPU_IOCTL_CHANNEL_DISABLE:
2618                 err = gk20a_busy(dev);
2619                 if (err) {
2620                         dev_err(&dev->dev,
2621                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2622                                 __func__, cmd);
2623                         break;
2624                 }
2625                 /* disable channel */
2626                 gk20a_writel(ch->g, ccsr_channel_r(ch->hw_chid),
2627                         gk20a_readl(ch->g, ccsr_channel_r(ch->hw_chid)) |
2628                         ccsr_channel_enable_clr_true_f());
2629                 gk20a_idle(dev);
2630                 break;
2631         case NVGPU_IOCTL_CHANNEL_PREEMPT:
2632                 err = gk20a_busy(dev);
2633                 if (err) {
2634                         dev_err(&dev->dev,
2635                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2636                                 __func__, cmd);
2637                         break;
2638                 }
2639                 err = gk20a_fifo_preempt(ch->g, ch);
2640                 gk20a_idle(dev);
2641                 break;
2642         case NVGPU_IOCTL_CHANNEL_FORCE_RESET:
2643                 err = gk20a_busy(dev);
2644                 if (err) {
2645                         dev_err(&dev->dev,
2646                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2647                                 __func__, cmd);
2648                         break;
2649                 }
2650                 err = gk20a_fifo_force_reset_ch(ch, true);
2651                 gk20a_idle(dev);
2652                 break;
2653         case NVGPU_IOCTL_CHANNEL_EVENTS_CTRL:
2654                 err = gk20a_channel_events_ctrl(ch,
2655                            (struct nvgpu_channel_events_ctrl_args *)buf);
2656                 break;
2657 #ifdef CONFIG_GK20A_CYCLE_STATS
2658         case NVGPU_IOCTL_CHANNEL_CYCLE_STATS_SNAPSHOT:
2659                 err = gk20a_busy(dev);
2660                 if (err) {
2661                         dev_err(&dev->dev,
2662                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2663                                 __func__, cmd);
2664                         break;
2665                 }
2666                 err = gk20a_channel_cycle_stats_snapshot(ch,
2667                                 (struct nvgpu_cycle_stats_snapshot_args *)buf);
2668                 gk20a_idle(dev);
2669                 break;
2670 #endif
2671         default:
2672                 dev_dbg(&dev->dev, "unrecognized ioctl cmd: 0x%x", cmd);
2673                 err = -ENOTTY;
2674                 break;
2675         }
2676
2677         if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ))
2678                 err = copy_to_user((void __user *)arg, buf, _IOC_SIZE(cmd));
2679
2680         mutex_unlock(&ch->ioctl_lock);
2681
2682         gk20a_channel_put(ch);
2683
2684         gk20a_dbg_fn("end");
2685
2686         return err;
2687 }