gpu: nvgpu: support skipping buffer refcounting in submit
[linux-3.10.git] / drivers / gpu / nvgpu / gk20a / channel_gk20a.c
1 /*
2  * GK20A Graphics channel
3  *
4  * Copyright (c) 2011-2015, NVIDIA CORPORATION.  All rights reserved.
5  *
6  * This program is free software; you can redistribute it and/or modify it
7  * under the terms and conditions of the GNU General Public License,
8  * version 2, as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope it will be useful, but WITHOUT
11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13  * more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
17  */
18
19 #include <linux/nvhost.h>
20 #include <linux/list.h>
21 #include <linux/delay.h>
22 #include <linux/highmem.h> /* need for nvmap.h*/
23 #include <trace/events/gk20a.h>
24 #include <linux/scatterlist.h>
25 #include <linux/file.h>
26 #include <linux/anon_inodes.h>
27 #include <linux/dma-buf.h>
28 #include <linux/vmalloc.h>
29
30 #include "debug_gk20a.h"
31
32 #include "gk20a.h"
33 #include "dbg_gpu_gk20a.h"
34 #include "fence_gk20a.h"
35 #include "semaphore_gk20a.h"
36
37 #include "hw_ram_gk20a.h"
38 #include "hw_fifo_gk20a.h"
39 #include "hw_pbdma_gk20a.h"
40 #include "hw_ccsr_gk20a.h"
41 #include "hw_ltc_gk20a.h"
42
43 #define NVMAP_HANDLE_PARAM_SIZE 1
44
45 #define NVGPU_BEGIN_AGGRESSIVE_SYNC_DESTROY_LIMIT       64      /* channels */
46
47 static struct channel_gk20a *allocate_channel(struct fifo_gk20a *f);
48 static void free_channel(struct fifo_gk20a *f, struct channel_gk20a *c);
49
50 static void free_priv_cmdbuf(struct channel_gk20a *c,
51                              struct priv_cmd_entry *e);
52 static void recycle_priv_cmdbuf(struct channel_gk20a *c);
53
54 static int channel_gk20a_alloc_priv_cmdbuf(struct channel_gk20a *c);
55 static void channel_gk20a_free_priv_cmdbuf(struct channel_gk20a *c);
56
57 static int channel_gk20a_commit_userd(struct channel_gk20a *c);
58 static int channel_gk20a_setup_userd(struct channel_gk20a *c);
59
60 static void channel_gk20a_bind(struct channel_gk20a *ch_gk20a);
61
62 static int channel_gk20a_update_runlist(struct channel_gk20a *c,
63                                         bool add);
64 static void gk20a_free_error_notifiers(struct channel_gk20a *ch);
65
66 /* allocate GPU channel */
67 static struct channel_gk20a *allocate_channel(struct fifo_gk20a *f)
68 {
69         struct channel_gk20a *ch = NULL;
70         struct gk20a_platform *platform = gk20a_get_platform(f->g->dev);
71
72         mutex_lock(&f->free_chs_mutex);
73         if (!list_empty(&f->free_chs)) {
74                 ch = list_first_entry(&f->free_chs, struct channel_gk20a,
75                                 free_chs);
76                 list_del(&ch->free_chs);
77                 WARN_ON(atomic_read(&ch->ref_count));
78                 WARN_ON(ch->referenceable);
79                 f->used_channels++;
80         }
81         mutex_unlock(&f->free_chs_mutex);
82
83         if (f->used_channels > NVGPU_BEGIN_AGGRESSIVE_SYNC_DESTROY_LIMIT)
84                 platform->aggressive_sync_destroy = true;
85
86         return ch;
87 }
88
89 static void free_channel(struct fifo_gk20a *f,
90                 struct channel_gk20a *ch)
91 {
92         struct gk20a_platform *platform = gk20a_get_platform(f->g->dev);
93
94         trace_gk20a_release_used_channel(ch->hw_chid);
95         /* refcount is zero here and channel is in a freed/dead state */
96         mutex_lock(&f->free_chs_mutex);
97         /* add to head to increase visibility of timing-related bugs */
98         list_add(&ch->free_chs, &f->free_chs);
99         f->used_channels--;
100         mutex_unlock(&f->free_chs_mutex);
101
102         if (f->used_channels < NVGPU_BEGIN_AGGRESSIVE_SYNC_DESTROY_LIMIT)
103                 platform->aggressive_sync_destroy = false;
104 }
105
106 int channel_gk20a_commit_va(struct channel_gk20a *c)
107 {
108         gk20a_dbg_fn("");
109
110         if (!c->inst_block.cpu_va)
111                 return -ENOMEM;
112
113         gk20a_init_inst_block(&c->inst_block, c->vm,
114                         c->vm->gmmu_page_sizes[gmmu_page_size_big]);
115
116         return 0;
117 }
118
119 static int channel_gk20a_commit_userd(struct channel_gk20a *c)
120 {
121         u32 addr_lo;
122         u32 addr_hi;
123         void *inst_ptr;
124
125         gk20a_dbg_fn("");
126
127         inst_ptr = c->inst_block.cpu_va;
128         if (!inst_ptr)
129                 return -ENOMEM;
130
131         addr_lo = u64_lo32(c->userd_iova >> ram_userd_base_shift_v());
132         addr_hi = u64_hi32(c->userd_iova);
133
134         gk20a_dbg_info("channel %d : set ramfc userd 0x%16llx",
135                 c->hw_chid, (u64)c->userd_iova);
136
137         gk20a_mem_wr32(inst_ptr, ram_in_ramfc_w() + ram_fc_userd_w(),
138                  pbdma_userd_target_vid_mem_f() |
139                  pbdma_userd_addr_f(addr_lo));
140
141         gk20a_mem_wr32(inst_ptr, ram_in_ramfc_w() + ram_fc_userd_hi_w(),
142                  pbdma_userd_target_vid_mem_f() |
143                  pbdma_userd_hi_addr_f(addr_hi));
144
145         return 0;
146 }
147
148 static int channel_gk20a_set_schedule_params(struct channel_gk20a *c,
149                                 u32 timeslice_timeout)
150 {
151         void *inst_ptr;
152         int shift = 3;
153         int value = timeslice_timeout;
154
155         inst_ptr = c->inst_block.cpu_va;
156         if (!inst_ptr)
157                 return -ENOMEM;
158
159         /* disable channel */
160         c->g->ops.fifo.disable_channel(c);
161
162         /* preempt the channel */
163         WARN_ON(c->g->ops.fifo.preempt_channel(c->g, c->hw_chid));
164
165         /* value field is 8 bits long */
166         while (value >= 1 << 8) {
167                 value >>= 1;
168                 shift++;
169         }
170
171         /* time slice register is only 18bits long */
172         if ((value << shift) >= 1<<19) {
173                 pr_err("Requested timeslice value is clamped to 18 bits\n");
174                 value = 255;
175                 shift = 10;
176         }
177
178         /* set new timeslice */
179         gk20a_mem_wr32(inst_ptr, ram_fc_runlist_timeslice_w(),
180                 value | (shift << 12) |
181                 fifo_runlist_timeslice_enable_true_f());
182
183         /* enable channel */
184         gk20a_writel(c->g, ccsr_channel_r(c->hw_chid),
185                 gk20a_readl(c->g, ccsr_channel_r(c->hw_chid)) |
186                 ccsr_channel_enable_set_true_f());
187
188         return 0;
189 }
190
191 int channel_gk20a_setup_ramfc(struct channel_gk20a *c,
192                         u64 gpfifo_base, u32 gpfifo_entries, u32 flags)
193 {
194         void *inst_ptr;
195
196         gk20a_dbg_fn("");
197
198         inst_ptr = c->inst_block.cpu_va;
199         if (!inst_ptr)
200                 return -ENOMEM;
201
202         memset(inst_ptr, 0, ram_fc_size_val_v());
203
204         gk20a_mem_wr32(inst_ptr, ram_fc_gp_base_w(),
205                 pbdma_gp_base_offset_f(
206                 u64_lo32(gpfifo_base >> pbdma_gp_base_rsvd_s())));
207
208         gk20a_mem_wr32(inst_ptr, ram_fc_gp_base_hi_w(),
209                 pbdma_gp_base_hi_offset_f(u64_hi32(gpfifo_base)) |
210                 pbdma_gp_base_hi_limit2_f(ilog2(gpfifo_entries)));
211
212         gk20a_mem_wr32(inst_ptr, ram_fc_signature_w(),
213                  c->g->ops.fifo.get_pbdma_signature(c->g));
214
215         gk20a_mem_wr32(inst_ptr, ram_fc_formats_w(),
216                 pbdma_formats_gp_fermi0_f() |
217                 pbdma_formats_pb_fermi1_f() |
218                 pbdma_formats_mp_fermi0_f());
219
220         gk20a_mem_wr32(inst_ptr, ram_fc_pb_header_w(),
221                 pbdma_pb_header_priv_user_f() |
222                 pbdma_pb_header_method_zero_f() |
223                 pbdma_pb_header_subchannel_zero_f() |
224                 pbdma_pb_header_level_main_f() |
225                 pbdma_pb_header_first_true_f() |
226                 pbdma_pb_header_type_inc_f());
227
228         gk20a_mem_wr32(inst_ptr, ram_fc_subdevice_w(),
229                 pbdma_subdevice_id_f(1) |
230                 pbdma_subdevice_status_active_f() |
231                 pbdma_subdevice_channel_dma_enable_f());
232
233         gk20a_mem_wr32(inst_ptr, ram_fc_target_w(), pbdma_target_engine_sw_f());
234
235         gk20a_mem_wr32(inst_ptr, ram_fc_acquire_w(),
236                 pbdma_acquire_retry_man_2_f() |
237                 pbdma_acquire_retry_exp_2_f() |
238                 pbdma_acquire_timeout_exp_max_f() |
239                 pbdma_acquire_timeout_man_max_f() |
240                 pbdma_acquire_timeout_en_disable_f());
241
242         gk20a_mem_wr32(inst_ptr, ram_fc_runlist_timeslice_w(),
243                 fifo_runlist_timeslice_timeout_128_f() |
244                 fifo_runlist_timeslice_timescale_3_f() |
245                 fifo_runlist_timeslice_enable_true_f());
246
247         gk20a_mem_wr32(inst_ptr, ram_fc_pb_timeslice_w(),
248                 fifo_pb_timeslice_timeout_16_f() |
249                 fifo_pb_timeslice_timescale_0_f() |
250                 fifo_pb_timeslice_enable_true_f());
251
252         gk20a_mem_wr32(inst_ptr, ram_fc_chid_w(), ram_fc_chid_id_f(c->hw_chid));
253
254         return channel_gk20a_commit_userd(c);
255 }
256
257 static int channel_gk20a_setup_userd(struct channel_gk20a *c)
258 {
259         BUG_ON(!c->userd_cpu_va);
260
261         gk20a_dbg_fn("");
262
263         gk20a_mem_wr32(c->userd_cpu_va, ram_userd_put_w(), 0);
264         gk20a_mem_wr32(c->userd_cpu_va, ram_userd_get_w(), 0);
265         gk20a_mem_wr32(c->userd_cpu_va, ram_userd_ref_w(), 0);
266         gk20a_mem_wr32(c->userd_cpu_va, ram_userd_put_hi_w(), 0);
267         gk20a_mem_wr32(c->userd_cpu_va, ram_userd_ref_threshold_w(), 0);
268         gk20a_mem_wr32(c->userd_cpu_va, ram_userd_gp_top_level_get_w(), 0);
269         gk20a_mem_wr32(c->userd_cpu_va, ram_userd_gp_top_level_get_hi_w(), 0);
270         gk20a_mem_wr32(c->userd_cpu_va, ram_userd_get_hi_w(), 0);
271         gk20a_mem_wr32(c->userd_cpu_va, ram_userd_gp_get_w(), 0);
272         gk20a_mem_wr32(c->userd_cpu_va, ram_userd_gp_put_w(), 0);
273
274         return 0;
275 }
276
277 static void channel_gk20a_bind(struct channel_gk20a *ch_gk20a)
278 {
279         struct gk20a *g = ch_gk20a->g;
280         struct fifo_gk20a *f = &g->fifo;
281         struct fifo_engine_info_gk20a *engine_info =
282                 f->engine_info + ENGINE_GR_GK20A;
283
284         u32 inst_ptr = gk20a_mem_phys(&ch_gk20a->inst_block)
285                 >> ram_in_base_shift_v();
286
287         gk20a_dbg_info("bind channel %d inst ptr 0x%08x",
288                 ch_gk20a->hw_chid, inst_ptr);
289
290         ch_gk20a->bound = true;
291
292         gk20a_writel(g, ccsr_channel_r(ch_gk20a->hw_chid),
293                 (gk20a_readl(g, ccsr_channel_r(ch_gk20a->hw_chid)) &
294                  ~ccsr_channel_runlist_f(~0)) |
295                  ccsr_channel_runlist_f(engine_info->runlist_id));
296
297         gk20a_writel(g, ccsr_channel_inst_r(ch_gk20a->hw_chid),
298                 ccsr_channel_inst_ptr_f(inst_ptr) |
299                 ccsr_channel_inst_target_vid_mem_f() |
300                 ccsr_channel_inst_bind_true_f());
301
302         gk20a_writel(g, ccsr_channel_r(ch_gk20a->hw_chid),
303                 (gk20a_readl(g, ccsr_channel_r(ch_gk20a->hw_chid)) &
304                  ~ccsr_channel_enable_set_f(~0)) |
305                  ccsr_channel_enable_set_true_f());
306 }
307
308 void channel_gk20a_unbind(struct channel_gk20a *ch_gk20a)
309 {
310         struct gk20a *g = ch_gk20a->g;
311         struct gk20a_platform *platform = gk20a_get_platform(g->dev);
312
313         gk20a_dbg_fn("");
314
315         if (ch_gk20a->bound)
316                 gk20a_writel(g, ccsr_channel_inst_r(ch_gk20a->hw_chid),
317                         ccsr_channel_inst_ptr_f(0) |
318                         ccsr_channel_inst_bind_false_f());
319
320         ch_gk20a->bound = false;
321
322         /*
323          * if we are agrressive then we can destroy the syncpt
324          * resource at this point
325          * if not, then it will be destroyed at channel_free()
326          */
327         if (ch_gk20a->sync && platform->aggressive_sync_destroy) {
328                 ch_gk20a->sync->destroy(ch_gk20a->sync);
329                 ch_gk20a->sync = NULL;
330         }
331 }
332
333 int channel_gk20a_alloc_inst(struct gk20a *g, struct channel_gk20a *ch)
334 {
335         int err;
336
337         gk20a_dbg_fn("");
338
339         err = gk20a_alloc_inst_block(g, &ch->inst_block);
340         if (err)
341                 return err;
342
343         gk20a_dbg_info("channel %d inst block physical addr: 0x%16llx",
344                 ch->hw_chid, (u64)gk20a_mem_phys(&ch->inst_block));
345
346         gk20a_dbg_fn("done");
347         return 0;
348 }
349
350 void channel_gk20a_free_inst(struct gk20a *g, struct channel_gk20a *ch)
351 {
352         gk20a_free_inst_block(g, &ch->inst_block);
353 }
354
355 static int channel_gk20a_update_runlist(struct channel_gk20a *c, bool add)
356 {
357         return c->g->ops.fifo.update_runlist(c->g, 0, c->hw_chid, add, true);
358 }
359
360 void channel_gk20a_enable(struct channel_gk20a *ch)
361 {
362         /* enable channel */
363         gk20a_writel(ch->g, ccsr_channel_r(ch->hw_chid),
364                 gk20a_readl(ch->g, ccsr_channel_r(ch->hw_chid)) |
365                 ccsr_channel_enable_set_true_f());
366 }
367
368 void channel_gk20a_disable(struct channel_gk20a *ch)
369 {
370         /* disable channel */
371         gk20a_writel(ch->g, ccsr_channel_r(ch->hw_chid),
372                 gk20a_readl(ch->g,
373                         ccsr_channel_r(ch->hw_chid)) |
374                         ccsr_channel_enable_clr_true_f());
375 }
376
377 void gk20a_channel_abort(struct channel_gk20a *ch)
378 {
379         struct channel_gk20a_job *job, *n;
380         bool released_job_semaphore = false;
381
382         gk20a_dbg_fn("");
383
384         /* make sure new kickoffs are prevented */
385         ch->has_timedout = true;
386
387         /* ensure no fences are pending */
388         mutex_lock(&ch->submit_lock);
389         if (ch->sync)
390                 ch->sync->set_min_eq_max(ch->sync);
391         mutex_unlock(&ch->submit_lock);
392
393         /* release all job semaphores (applies only to jobs that use
394            semaphore synchronization) */
395         mutex_lock(&ch->jobs_lock);
396         list_for_each_entry_safe(job, n, &ch->jobs, list) {
397                 if (job->post_fence->semaphore) {
398                         gk20a_semaphore_release(job->post_fence->semaphore);
399                         released_job_semaphore = true;
400                 }
401         }
402         mutex_unlock(&ch->jobs_lock);
403
404         ch->g->ops.fifo.disable_channel(ch);
405
406         if (released_job_semaphore) {
407                 wake_up_interruptible_all(&ch->semaphore_wq);
408                 gk20a_channel_update(ch, 0);
409         }
410 }
411
412 int gk20a_wait_channel_idle(struct channel_gk20a *ch)
413 {
414         bool channel_idle = false;
415         unsigned long end_jiffies = jiffies +
416                 msecs_to_jiffies(gk20a_get_gr_idle_timeout(ch->g));
417
418         do {
419                 mutex_lock(&ch->jobs_lock);
420                 channel_idle = list_empty(&ch->jobs);
421                 mutex_unlock(&ch->jobs_lock);
422                 if (channel_idle)
423                         break;
424
425                 usleep_range(1000, 3000);
426         } while (time_before(jiffies, end_jiffies)
427                         || !tegra_platform_is_silicon());
428
429         if (!channel_idle) {
430                 gk20a_err(dev_from_gk20a(ch->g), "jobs not freed for channel %d\n",
431                                 ch->hw_chid);
432                 return -EBUSY;
433         }
434
435         return 0;
436 }
437
438 void gk20a_disable_channel(struct channel_gk20a *ch,
439                            bool finish,
440                            unsigned long finish_timeout)
441 {
442         gk20a_dbg_fn("");
443
444         if (finish) {
445                 int err = gk20a_channel_finish(ch, finish_timeout);
446                 WARN_ON(err);
447         }
448
449         /* disable the channel from hw and increment syncpoints */
450         gk20a_channel_abort(ch);
451
452         gk20a_wait_channel_idle(ch);
453
454         /* preempt the channel */
455         ch->g->ops.fifo.preempt_channel(ch->g, ch->hw_chid);
456
457         /* remove channel from runlist */
458         channel_gk20a_update_runlist(ch, false);
459 }
460
461 #if defined(CONFIG_GK20A_CYCLE_STATS)
462
463 static void gk20a_free_cycle_stats_buffer(struct channel_gk20a *ch)
464 {
465         /* disable existing cyclestats buffer */
466         mutex_lock(&ch->cyclestate.cyclestate_buffer_mutex);
467         if (ch->cyclestate.cyclestate_buffer_handler) {
468                 dma_buf_vunmap(ch->cyclestate.cyclestate_buffer_handler,
469                                 ch->cyclestate.cyclestate_buffer);
470                 dma_buf_put(ch->cyclestate.cyclestate_buffer_handler);
471                 ch->cyclestate.cyclestate_buffer_handler = NULL;
472                 ch->cyclestate.cyclestate_buffer = NULL;
473                 ch->cyclestate.cyclestate_buffer_size = 0;
474         }
475         mutex_unlock(&ch->cyclestate.cyclestate_buffer_mutex);
476 }
477
478 static int gk20a_channel_cycle_stats(struct channel_gk20a *ch,
479                        struct nvgpu_cycle_stats_args *args)
480 {
481         struct dma_buf *dmabuf;
482         void *virtual_address;
483
484         /* is it allowed to handle calls for current GPU? */
485         if (0 == (ch->g->gpu_characteristics.flags &
486                         NVGPU_GPU_FLAGS_SUPPORT_CYCLE_STATS))
487                 return -ENOSYS;
488
489         if (args->dmabuf_fd && !ch->cyclestate.cyclestate_buffer_handler) {
490
491                 /* set up new cyclestats buffer */
492                 dmabuf = dma_buf_get(args->dmabuf_fd);
493                 if (IS_ERR(dmabuf))
494                         return PTR_ERR(dmabuf);
495                 virtual_address = dma_buf_vmap(dmabuf);
496                 if (!virtual_address)
497                         return -ENOMEM;
498
499                 ch->cyclestate.cyclestate_buffer_handler = dmabuf;
500                 ch->cyclestate.cyclestate_buffer = virtual_address;
501                 ch->cyclestate.cyclestate_buffer_size = dmabuf->size;
502                 return 0;
503
504         } else if (!args->dmabuf_fd &&
505                         ch->cyclestate.cyclestate_buffer_handler) {
506                 gk20a_free_cycle_stats_buffer(ch);
507                 return 0;
508
509         } else if (!args->dmabuf_fd &&
510                         !ch->cyclestate.cyclestate_buffer_handler) {
511                 /* no requst from GL */
512                 return 0;
513
514         } else {
515                 pr_err("channel already has cyclestats buffer\n");
516                 return -EINVAL;
517         }
518 }
519
520
521 static int gk20a_flush_cycle_stats_snapshot(struct channel_gk20a *ch)
522 {
523         int ret;
524
525         mutex_lock(&ch->cs_client_mutex);
526         if (ch->cs_client)
527                 ret = gr_gk20a_css_flush(ch->g, ch->cs_client);
528         else
529                 ret = -EBADF;
530         mutex_unlock(&ch->cs_client_mutex);
531
532         return ret;
533 }
534
535 static int gk20a_attach_cycle_stats_snapshot(struct channel_gk20a *ch,
536                                 u32 dmabuf_fd,
537                                 u32 perfmon_id_count,
538                                 u32 *perfmon_id_start)
539 {
540         int ret;
541
542         mutex_lock(&ch->cs_client_mutex);
543         if (ch->cs_client) {
544                 ret = -EEXIST;
545         } else {
546                 ret = gr_gk20a_css_attach(ch->g,
547                                         dmabuf_fd,
548                                         perfmon_id_count,
549                                         perfmon_id_start,
550                                         &ch->cs_client);
551         }
552         mutex_unlock(&ch->cs_client_mutex);
553
554         return ret;
555 }
556
557 static int gk20a_free_cycle_stats_snapshot(struct channel_gk20a *ch)
558 {
559         int ret;
560
561         mutex_lock(&ch->cs_client_mutex);
562         if (ch->cs_client) {
563                 ret = gr_gk20a_css_detach(ch->g, ch->cs_client);
564                 ch->cs_client = NULL;
565         } else {
566                 ret = 0;
567         }
568         mutex_unlock(&ch->cs_client_mutex);
569
570         return ret;
571 }
572
573 static int gk20a_channel_cycle_stats_snapshot(struct channel_gk20a *ch,
574                         struct nvgpu_cycle_stats_snapshot_args *args)
575 {
576         int ret;
577
578         /* is it allowed to handle calls for current GPU? */
579         if (0 == (ch->g->gpu_characteristics.flags &
580                         NVGPU_GPU_FLAGS_SUPPORT_CYCLE_STATS_SNAPSHOT))
581                 return -ENOSYS;
582
583         if (!args->dmabuf_fd)
584                 return -EINVAL;
585
586         /* handle the command (most frequent cases first) */
587         switch (args->cmd) {
588         case NVGPU_IOCTL_CHANNEL_CYCLE_STATS_SNAPSHOT_CMD_FLUSH:
589                 ret = gk20a_flush_cycle_stats_snapshot(ch);
590                 args->extra = 0;
591                 break;
592
593         case NVGPU_IOCTL_CHANNEL_CYCLE_STATS_SNAPSHOT_CMD_ATTACH:
594                 ret = gk20a_attach_cycle_stats_snapshot(ch,
595                                                 args->dmabuf_fd,
596                                                 args->extra,
597                                                 &args->extra);
598                 break;
599
600         case NVGPU_IOCTL_CHANNEL_CYCLE_STATS_SNAPSHOT_CMD_DETACH:
601                 ret = gk20a_free_cycle_stats_snapshot(ch);
602                 args->extra = 0;
603                 break;
604
605         default:
606                 pr_err("cyclestats: unknown command %u\n", args->cmd);
607                 ret = -EINVAL;
608                 break;
609         }
610
611         return ret;
612 }
613 #endif
614
615 static int gk20a_init_error_notifier(struct channel_gk20a *ch,
616                 struct nvgpu_set_error_notifier *args) {
617         void *va;
618
619         struct dma_buf *dmabuf;
620
621         if (!args->mem) {
622                 pr_err("gk20a_init_error_notifier: invalid memory handle\n");
623                 return -EINVAL;
624         }
625
626         dmabuf = dma_buf_get(args->mem);
627
628         if (ch->error_notifier_ref)
629                 gk20a_free_error_notifiers(ch);
630
631         if (IS_ERR(dmabuf)) {
632                 pr_err("Invalid handle: %d\n", args->mem);
633                 return -EINVAL;
634         }
635         /* map handle */
636         va = dma_buf_vmap(dmabuf);
637         if (!va) {
638                 dma_buf_put(dmabuf);
639                 pr_err("Cannot map notifier handle\n");
640                 return -ENOMEM;
641         }
642
643         /* set channel notifiers pointer */
644         ch->error_notifier_ref = dmabuf;
645         ch->error_notifier = va + args->offset;
646         ch->error_notifier_va = va;
647         memset(ch->error_notifier, 0, sizeof(struct nvgpu_notification));
648         return 0;
649 }
650
651 void gk20a_set_error_notifier(struct channel_gk20a *ch, __u32 error)
652 {
653         if (ch->error_notifier_ref) {
654                 struct timespec time_data;
655                 u64 nsec;
656                 getnstimeofday(&time_data);
657                 nsec = ((u64)time_data.tv_sec) * 1000000000u +
658                                 (u64)time_data.tv_nsec;
659                 ch->error_notifier->time_stamp.nanoseconds[0] =
660                                 (u32)nsec;
661                 ch->error_notifier->time_stamp.nanoseconds[1] =
662                                 (u32)(nsec >> 32);
663                 ch->error_notifier->info32 = error;
664                 ch->error_notifier->status = 0xffff;
665
666                 gk20a_err(dev_from_gk20a(ch->g),
667                     "error notifier set to %d for ch %d", error, ch->hw_chid);
668         }
669 }
670
671 static void gk20a_free_error_notifiers(struct channel_gk20a *ch)
672 {
673         if (ch->error_notifier_ref) {
674                 dma_buf_vunmap(ch->error_notifier_ref, ch->error_notifier_va);
675                 dma_buf_put(ch->error_notifier_ref);
676                 ch->error_notifier_ref = NULL;
677                 ch->error_notifier = NULL;
678                 ch->error_notifier_va = NULL;
679         }
680 }
681
682 /* Returns delta of cyclic integers a and b. If a is ahead of b, delta
683  * is positive */
684 static int cyclic_delta(int a, int b)
685 {
686         return a - b;
687 }
688
689 static void gk20a_wait_for_deferred_interrupts(struct gk20a *g)
690 {
691         int stall_irq_threshold = atomic_read(&g->hw_irq_stall_count);
692         int nonstall_irq_threshold = atomic_read(&g->hw_irq_nonstall_count);
693
694         /* wait until all stalling irqs are handled */
695         wait_event(g->sw_irq_stall_last_handled_wq,
696                    cyclic_delta(stall_irq_threshold,
697                                 atomic_read(&g->sw_irq_stall_last_handled))
698                    <= 0);
699
700         /* wait until all non-stalling irqs are handled */
701         wait_event(g->sw_irq_nonstall_last_handled_wq,
702                    cyclic_delta(nonstall_irq_threshold,
703                                 atomic_read(&g->sw_irq_nonstall_last_handled))
704                    <= 0);
705 }
706
707 static void gk20a_wait_until_counter_is_N(
708         struct channel_gk20a *ch, atomic_t *counter, int wait_value,
709         wait_queue_head_t *wq, const char *caller, const char *counter_name)
710 {
711         while (true) {
712                 if (wait_event_timeout(
713                             *wq,
714                             atomic_read(counter) == wait_value,
715                             msecs_to_jiffies(5000)) > 0)
716                         break;
717
718                 gk20a_warn(dev_from_gk20a(ch->g),
719                            "%s: channel %d, still waiting, %s left: %d, waiting for: %d",
720                            caller, ch->hw_chid, counter_name,
721                            atomic_read(counter), wait_value);
722         }
723 }
724
725
726
727 /* call ONLY when no references to the channel exist: after the last put */
728 static void gk20a_free_channel(struct channel_gk20a *ch)
729 {
730         struct gk20a *g = ch->g;
731         struct fifo_gk20a *f = &g->fifo;
732         struct gr_gk20a *gr = &g->gr;
733         struct vm_gk20a *ch_vm = ch->vm;
734         unsigned long timeout = gk20a_get_gr_idle_timeout(g);
735         struct dbg_session_gk20a *dbg_s;
736         bool was_reset;
737         gk20a_dbg_fn("");
738
739         WARN_ON(ch->g == NULL);
740
741         trace_gk20a_free_channel(ch->hw_chid);
742
743         /* prevent new kickoffs */
744         ch->has_timedout = true;
745         wmb();
746
747         /* wait until there's only our ref to the channel */
748         gk20a_wait_until_counter_is_N(
749                 ch, &ch->ref_count, 1, &ch->ref_count_dec_wq,
750                 __func__, "references");
751
752         /* wait until all pending interrupts for recently completed
753          * jobs are handled */
754         gk20a_wait_for_deferred_interrupts(g);
755
756         /* prevent new refs */
757         spin_lock(&ch->ref_obtain_lock);
758         if (!ch->referenceable) {
759                 spin_unlock(&ch->ref_obtain_lock);
760                 gk20a_err(dev_from_gk20a(ch->g),
761                           "Extra %s() called to channel %u",
762                           __func__, ch->hw_chid);
763                 return;
764         }
765         ch->referenceable = false;
766         spin_unlock(&ch->ref_obtain_lock);
767
768         /* matches with the initial reference in gk20a_open_new_channel() */
769         atomic_dec(&ch->ref_count);
770
771         /* wait until no more refs to the channel */
772         gk20a_wait_until_counter_is_N(
773                 ch, &ch->ref_count, 0, &ch->ref_count_dec_wq,
774                 __func__, "references");
775
776         /* if engine reset was deferred, perform it now */
777         mutex_lock(&f->deferred_reset_mutex);
778         if (g->fifo.deferred_reset_pending) {
779                 gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, "engine reset was"
780                            " deferred, running now");
781                 was_reset = mutex_is_locked(&g->fifo.gr_reset_mutex);
782                 mutex_lock(&g->fifo.gr_reset_mutex);
783                 /* if lock is already taken, a reset is taking place
784                 so no need to repeat */
785                 if (!was_reset) {
786                         gk20a_fifo_reset_engine(g,
787                                 g->fifo.deferred_fault_engines);
788                 }
789                 mutex_unlock(&g->fifo.gr_reset_mutex);
790                 g->fifo.deferred_fault_engines = 0;
791                 g->fifo.deferred_reset_pending = false;
792         }
793         mutex_unlock(&f->deferred_reset_mutex);
794
795         if (!ch->bound)
796                 goto release;
797
798         if (!gk20a_channel_as_bound(ch))
799                 goto unbind;
800
801         gk20a_dbg_info("freeing bound channel context, timeout=%ld",
802                         timeout);
803
804         gk20a_disable_channel(ch, !ch->has_timedout, timeout);
805
806         gk20a_free_error_notifiers(ch);
807
808         /* release channel ctx */
809         g->ops.gr.free_channel_ctx(ch);
810
811         gk20a_gr_flush_channel_tlb(gr);
812
813         memset(&ch->ramfc, 0, sizeof(struct mem_desc_sub));
814
815         gk20a_gmmu_unmap_free(ch_vm, &ch->gpfifo.mem);
816
817         memset(&ch->gpfifo, 0, sizeof(struct gpfifo_desc));
818
819 #if defined(CONFIG_GK20A_CYCLE_STATS)
820         gk20a_free_cycle_stats_buffer(ch);
821         gk20a_free_cycle_stats_snapshot(ch);
822 #endif
823
824         channel_gk20a_free_priv_cmdbuf(ch);
825
826         /* sync must be destroyed before releasing channel vm */
827         if (ch->sync) {
828                 ch->sync->destroy(ch->sync);
829                 ch->sync = NULL;
830         }
831
832         /* release channel binding to the as_share */
833         if (ch_vm->as_share)
834                 gk20a_as_release_share(ch_vm->as_share);
835         else
836                 gk20a_vm_put(ch_vm);
837
838         spin_lock(&ch->update_fn_lock);
839         ch->update_fn = NULL;
840         ch->update_fn_data = NULL;
841         spin_unlock(&ch->update_fn_lock);
842         cancel_work_sync(&ch->update_fn_work);
843
844         /* make sure we don't have deferred interrupts pending that
845          * could still touch the channel */
846         gk20a_wait_for_deferred_interrupts(g);
847
848 unbind:
849         if (gk20a_is_channel_marked_as_tsg(ch))
850                 gk20a_tsg_unbind_channel(ch);
851
852         g->ops.fifo.unbind_channel(ch);
853         g->ops.fifo.free_inst(g, ch);
854
855         ch->vpr = false;
856         ch->vm = NULL;
857
858         mutex_lock(&ch->submit_lock);
859         gk20a_fence_put(ch->last_submit.pre_fence);
860         gk20a_fence_put(ch->last_submit.post_fence);
861         ch->last_submit.pre_fence = NULL;
862         ch->last_submit.post_fence = NULL;
863         mutex_unlock(&ch->submit_lock);
864         WARN_ON(ch->sync);
865
866         /* unlink all debug sessions */
867         mutex_lock(&ch->dbg_s_lock);
868
869         list_for_each_entry(dbg_s, &ch->dbg_s_list, dbg_s_list_node) {
870                 dbg_s->ch = NULL;
871                 list_del_init(&dbg_s->dbg_s_list_node);
872         }
873
874         mutex_unlock(&ch->dbg_s_lock);
875
876 release:
877         /* make sure we catch accesses of unopened channels in case
878          * there's non-refcounted channel pointers hanging around */
879         ch->g = NULL;
880         wmb();
881
882         /* ALWAYS last */
883         free_channel(f, ch);
884 }
885
886 /* Try to get a reference to the channel. Return nonzero on success. If fails,
887  * the channel is dead or being freed elsewhere and you must not touch it.
888  *
889  * Always when a channel_gk20a pointer is seen and about to be used, a
890  * reference must be held to it - either by you or the caller, which should be
891  * documented well or otherwise clearly seen. This usually boils down to the
892  * file from ioctls directly, or an explicit get in exception handlers when the
893  * channel is found by a hw_chid.
894  *
895  * Most global functions in this file require a reference to be held by the
896  * caller.
897  */
898 struct channel_gk20a *_gk20a_channel_get(struct channel_gk20a *ch,
899                                          const char *caller) {
900         struct channel_gk20a *ret;
901
902         spin_lock(&ch->ref_obtain_lock);
903
904         if (likely(ch->referenceable)) {
905                 atomic_inc(&ch->ref_count);
906                 ret = ch;
907         } else
908                 ret = NULL;
909
910         spin_unlock(&ch->ref_obtain_lock);
911
912         if (ret)
913                 trace_gk20a_channel_get(ch->hw_chid, caller);
914
915         return ret;
916 }
917
918 void _gk20a_channel_put(struct channel_gk20a *ch, const char *caller)
919 {
920         trace_gk20a_channel_put(ch->hw_chid, caller);
921         atomic_dec(&ch->ref_count);
922         wake_up_all(&ch->ref_count_dec_wq);
923
924         /* More puts than gets. Channel is probably going to get
925          * stuck. */
926         WARN_ON(atomic_read(&ch->ref_count) < 0);
927
928         /* Also, more puts than gets. ref_count can go to 0 only if
929          * the channel is closing. Channel is probably going to get
930          * stuck. */
931         WARN_ON(atomic_read(&ch->ref_count) == 0 && ch->referenceable);
932 }
933
934 void gk20a_channel_close(struct channel_gk20a *ch)
935 {
936         gk20a_free_channel(ch);
937 }
938
939 int gk20a_channel_release(struct inode *inode, struct file *filp)
940 {
941         struct channel_gk20a *ch = (struct channel_gk20a *)filp->private_data;
942         struct gk20a *g = ch ? ch->g : NULL;
943         int err;
944
945         if (!ch)
946                 return 0;
947
948         trace_gk20a_channel_release(dev_name(&g->dev->dev));
949
950         err = gk20a_busy(g->dev);
951         if (err) {
952                 gk20a_err(dev_from_gk20a(g), "failed to release channel %d",
953                         ch->hw_chid);
954                 return err;
955         }
956         gk20a_channel_close(ch);
957         gk20a_idle(g->dev);
958
959         filp->private_data = NULL;
960         return 0;
961 }
962
963 static void gk20a_channel_update_runcb_fn(struct work_struct *work)
964 {
965         struct channel_gk20a *ch =
966                 container_of(work, struct channel_gk20a, update_fn_work);
967         void (*update_fn)(struct channel_gk20a *, void *);
968         void *update_fn_data;
969
970         spin_lock(&ch->update_fn_lock);
971         update_fn = ch->update_fn;
972         update_fn_data = ch->update_fn_data;
973         spin_unlock(&ch->update_fn_lock);
974
975         if (update_fn)
976                 update_fn(ch, update_fn_data);
977 }
978
979 struct channel_gk20a *gk20a_open_new_channel_with_cb(struct gk20a *g,
980                 void (*update_fn)(struct channel_gk20a *, void *),
981                 void *update_fn_data)
982 {
983         struct channel_gk20a *ch = gk20a_open_new_channel(g);
984
985         if (ch) {
986                 spin_lock(&ch->update_fn_lock);
987                 ch->update_fn = update_fn;
988                 ch->update_fn_data = update_fn_data;
989                 spin_unlock(&ch->update_fn_lock);
990         }
991
992         return ch;
993 }
994
995 struct channel_gk20a *gk20a_open_new_channel(struct gk20a *g)
996 {
997         struct fifo_gk20a *f = &g->fifo;
998         struct channel_gk20a *ch;
999
1000         gk20a_dbg_fn("");
1001
1002         ch = allocate_channel(f);
1003         if (ch == NULL) {
1004                 /* TBD: we want to make this virtualizable */
1005                 gk20a_err(dev_from_gk20a(g), "out of hw chids");
1006                 return NULL;
1007         }
1008
1009         trace_gk20a_open_new_channel(ch->hw_chid);
1010
1011         BUG_ON(ch->g);
1012         ch->g = g;
1013
1014         if (g->ops.fifo.alloc_inst(g, ch)) {
1015                 ch->g = NULL;
1016                 free_channel(f, ch);
1017                 gk20a_err(dev_from_gk20a(g),
1018                            "failed to open gk20a channel, out of inst mem");
1019                 return NULL;
1020         }
1021
1022         /* now the channel is in a limbo out of the free list but not marked as
1023          * alive and used (i.e. get-able) yet */
1024
1025         ch->pid = current->pid;
1026
1027         /* By default, channel is regular (non-TSG) channel */
1028         ch->tsgid = NVGPU_INVALID_TSG_ID;
1029
1030         /* reset timeout counter and update timestamp */
1031         ch->timeout_accumulated_ms = 0;
1032         ch->timeout_gpfifo_get = 0;
1033         /* set gr host default timeout */
1034         ch->timeout_ms_max = gk20a_get_gr_idle_timeout(g);
1035         ch->timeout_debug_dump = true;
1036         ch->has_timedout = false;
1037         ch->obj_class = 0;
1038
1039         /* The channel is *not* runnable at this point. It still needs to have
1040          * an address space bound and allocate a gpfifo and grctx. */
1041
1042         init_waitqueue_head(&ch->notifier_wq);
1043         init_waitqueue_head(&ch->semaphore_wq);
1044         init_waitqueue_head(&ch->submit_wq);
1045
1046         mutex_init(&ch->poll_events.lock);
1047         ch->poll_events.events_enabled = false;
1048         ch->poll_events.num_pending_events = 0;
1049
1050         ch->update_fn = NULL;
1051         ch->update_fn_data = NULL;
1052         spin_lock_init(&ch->update_fn_lock);
1053         INIT_WORK(&ch->update_fn_work, gk20a_channel_update_runcb_fn);
1054
1055         /* Mark the channel alive, get-able, with 1 initial use
1056          * references. The initial reference will be decreased in
1057          * gk20a_free_channel() */
1058         ch->referenceable = true;
1059         atomic_set(&ch->ref_count, 1);
1060         wmb();
1061
1062         return ch;
1063 }
1064
1065 static int __gk20a_channel_open(struct gk20a *g, struct file *filp)
1066 {
1067         int err;
1068         struct channel_gk20a *ch;
1069
1070         trace_gk20a_channel_open(dev_name(&g->dev->dev));
1071
1072         err = gk20a_busy(g->dev);
1073         if (err) {
1074                 gk20a_err(dev_from_gk20a(g), "failed to power on, %d", err);
1075                 return err;
1076         }
1077         ch = gk20a_open_new_channel(g);
1078         gk20a_idle(g->dev);
1079         if (!ch) {
1080                 gk20a_err(dev_from_gk20a(g),
1081                         "failed to get f");
1082                 return -ENOMEM;
1083         }
1084
1085         filp->private_data = ch;
1086         return 0;
1087 }
1088
1089 int gk20a_channel_open(struct inode *inode, struct file *filp)
1090 {
1091         struct gk20a *g = container_of(inode->i_cdev,
1092                         struct gk20a, channel.cdev);
1093         int ret;
1094
1095         gk20a_dbg_fn("start");
1096         ret = __gk20a_channel_open(g, filp);
1097
1098         gk20a_dbg_fn("end");
1099         return ret;
1100 }
1101
1102 int gk20a_channel_open_ioctl(struct gk20a *g,
1103                 struct nvgpu_channel_open_args *args)
1104 {
1105         int err;
1106         int fd;
1107         struct file *file;
1108         char *name;
1109
1110         err = get_unused_fd_flags(O_RDWR);
1111         if (err < 0)
1112                 return err;
1113         fd = err;
1114
1115         name = kasprintf(GFP_KERNEL, "nvhost-%s-fd%d",
1116                         dev_name(&g->dev->dev), fd);
1117         if (!name) {
1118                 err = -ENOMEM;
1119                 goto clean_up;
1120         }
1121
1122         file = anon_inode_getfile(name, g->channel.cdev.ops, NULL, O_RDWR);
1123         kfree(name);
1124         if (IS_ERR(file)) {
1125                 err = PTR_ERR(file);
1126                 goto clean_up;
1127         }
1128
1129         err = __gk20a_channel_open(g, file);
1130         if (err)
1131                 goto clean_up_file;
1132
1133         fd_install(fd, file);
1134         args->channel_fd = fd;
1135         return 0;
1136
1137 clean_up_file:
1138         fput(file);
1139 clean_up:
1140         put_unused_fd(fd);
1141         return err;
1142 }
1143
1144 /* allocate private cmd buffer.
1145    used for inserting commands before/after user submitted buffers. */
1146 static int channel_gk20a_alloc_priv_cmdbuf(struct channel_gk20a *c)
1147 {
1148         struct device *d = dev_from_gk20a(c->g);
1149         struct vm_gk20a *ch_vm = c->vm;
1150         struct priv_cmd_queue *q = &c->priv_cmd_q;
1151         u32 size;
1152         int err = 0;
1153
1154         /* Kernel can insert gpfifos before and after user gpfifos.
1155            Before user gpfifos, kernel inserts fence_wait, which takes
1156            syncpoint_a (2 dwords) + syncpoint_b (2 dwords) = 4 dwords.
1157            After user gpfifos, kernel inserts fence_get, which takes
1158            wfi (2 dwords) + syncpoint_a (2 dwords) + syncpoint_b (2 dwords)
1159            = 6 dwords.
1160            Worse case if kernel adds both of them for every user gpfifo,
1161            max size of priv_cmdbuf is :
1162            (gpfifo entry number * (2 / 3) * (4 + 6) * 4 bytes */
1163         size = roundup_pow_of_two(
1164                 c->gpfifo.entry_num * 2 * 12 * sizeof(u32) / 3);
1165
1166         err = gk20a_gmmu_alloc_map(ch_vm, size, &q->mem);
1167         if (err) {
1168                 gk20a_err(d, "%s: memory allocation failed\n", __func__);
1169                 goto clean_up;
1170         }
1171
1172         q->size = q->mem.size / sizeof (u32);
1173
1174         INIT_LIST_HEAD(&q->head);
1175         INIT_LIST_HEAD(&q->free);
1176
1177         return 0;
1178
1179 clean_up:
1180         channel_gk20a_free_priv_cmdbuf(c);
1181         return err;
1182 }
1183
1184 static void channel_gk20a_free_priv_cmdbuf(struct channel_gk20a *c)
1185 {
1186         struct vm_gk20a *ch_vm = c->vm;
1187         struct priv_cmd_queue *q = &c->priv_cmd_q;
1188         struct priv_cmd_entry *e;
1189         struct list_head *pos, *tmp, *head;
1190
1191         if (q->size == 0)
1192                 return;
1193
1194         gk20a_gmmu_unmap_free(ch_vm, &q->mem);
1195
1196         /* free used list */
1197         head = &q->head;
1198         list_for_each_safe(pos, tmp, head) {
1199                 e = container_of(pos, struct priv_cmd_entry, list);
1200                 free_priv_cmdbuf(c, e);
1201         }
1202
1203         /* free free list */
1204         head = &q->free;
1205         list_for_each_safe(pos, tmp, head) {
1206                 e = container_of(pos, struct priv_cmd_entry, list);
1207                 kfree(e);
1208         }
1209
1210         memset(q, 0, sizeof(struct priv_cmd_queue));
1211 }
1212
1213 /* allocate a cmd buffer with given size. size is number of u32 entries */
1214 int gk20a_channel_alloc_priv_cmdbuf(struct channel_gk20a *c, u32 orig_size,
1215                              struct priv_cmd_entry **entry)
1216 {
1217         struct priv_cmd_queue *q = &c->priv_cmd_q;
1218         struct priv_cmd_entry *e;
1219         u32 free_count;
1220         u32 size = orig_size;
1221         bool no_retry = false;
1222
1223         gk20a_dbg_fn("size %d", orig_size);
1224
1225         *entry = NULL;
1226
1227         /* if free space in the end is less than requested, increase the size
1228          * to make the real allocated space start from beginning. */
1229         if (q->put + size > q->size)
1230                 size = orig_size + (q->size - q->put);
1231
1232         gk20a_dbg_info("ch %d: priv cmd queue get:put %d:%d",
1233                         c->hw_chid, q->get, q->put);
1234
1235 TRY_AGAIN:
1236         free_count = (q->size - (q->put - q->get) - 1) % q->size;
1237
1238         if (size > free_count) {
1239                 if (!no_retry) {
1240                         recycle_priv_cmdbuf(c);
1241                         no_retry = true;
1242                         goto TRY_AGAIN;
1243                 } else
1244                         return -EAGAIN;
1245         }
1246
1247         e = kzalloc(sizeof(struct priv_cmd_entry), GFP_KERNEL);
1248         if (!e) {
1249                 gk20a_err(dev_from_gk20a(c->g),
1250                         "ch %d: fail to allocate priv cmd entry",
1251                         c->hw_chid);
1252                 return -ENOMEM;
1253         }
1254
1255         e->size = orig_size;
1256         e->gp_get = c->gpfifo.get;
1257         e->gp_put = c->gpfifo.put;
1258         e->gp_wrap = c->gpfifo.wrap;
1259
1260         /* if we have increased size to skip free space in the end, set put
1261            to beginning of cmd buffer (0) + size */
1262         if (size != orig_size) {
1263                 e->ptr = (u32 *)q->mem.cpu_va;
1264                 e->gva = q->mem.gpu_va;
1265                 q->put = orig_size;
1266         } else {
1267                 e->ptr = (u32 *)q->mem.cpu_va + q->put;
1268                 e->gva = q->mem.gpu_va + q->put * sizeof(u32);
1269                 q->put = (q->put + orig_size) & (q->size - 1);
1270         }
1271
1272         /* we already handled q->put + size > q->size so BUG_ON this */
1273         BUG_ON(q->put > q->size);
1274
1275         /* add new entry to head since we free from head */
1276         list_add(&e->list, &q->head);
1277
1278         *entry = e;
1279
1280         gk20a_dbg_fn("done");
1281
1282         return 0;
1283 }
1284
1285 /* Don't call this to free an explict cmd entry.
1286  * It doesn't update priv_cmd_queue get/put */
1287 static void free_priv_cmdbuf(struct channel_gk20a *c,
1288                              struct priv_cmd_entry *e)
1289 {
1290         if (!e)
1291                 return;
1292
1293         list_del(&e->list);
1294
1295         kfree(e);
1296 }
1297
1298 /* free entries if they're no longer being used */
1299 static void recycle_priv_cmdbuf(struct channel_gk20a *c)
1300 {
1301         struct priv_cmd_queue *q = &c->priv_cmd_q;
1302         struct priv_cmd_entry *e, *tmp;
1303         struct list_head *head = &q->head;
1304         bool wrap_around, found = false;
1305
1306         gk20a_dbg_fn("");
1307
1308         /* Find the most recent free entry. Free it and everything before it */
1309         list_for_each_entry(e, head, list) {
1310
1311                 gk20a_dbg_info("ch %d: cmd entry get:put:wrap %d:%d:%d "
1312                         "curr get:put:wrap %d:%d:%d",
1313                         c->hw_chid, e->gp_get, e->gp_put, e->gp_wrap,
1314                         c->gpfifo.get, c->gpfifo.put, c->gpfifo.wrap);
1315
1316                 wrap_around = (c->gpfifo.wrap != e->gp_wrap);
1317                 if (e->gp_get < e->gp_put) {
1318                         if (c->gpfifo.get >= e->gp_put ||
1319                             wrap_around) {
1320                                 found = true;
1321                                 break;
1322                         } else
1323                                 e->gp_get = c->gpfifo.get;
1324                 } else if (e->gp_get > e->gp_put) {
1325                         if (wrap_around &&
1326                             c->gpfifo.get >= e->gp_put) {
1327                                 found = true;
1328                                 break;
1329                         } else
1330                                 e->gp_get = c->gpfifo.get;
1331                 }
1332         }
1333
1334         if (found)
1335                 q->get = (e->ptr - (u32 *)q->mem.cpu_va) + e->size;
1336         else {
1337                 gk20a_dbg_info("no free entry recycled");
1338                 return;
1339         }
1340
1341         list_for_each_entry_safe_continue(e, tmp, head, list) {
1342                 free_priv_cmdbuf(c, e);
1343         }
1344
1345         gk20a_dbg_fn("done");
1346 }
1347
1348
1349 int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c,
1350                 struct nvgpu_alloc_gpfifo_args *args)
1351 {
1352         struct gk20a *g = c->g;
1353         struct device *d = dev_from_gk20a(g);
1354         struct vm_gk20a *ch_vm;
1355         u32 gpfifo_size;
1356         int err = 0;
1357
1358         /* Kernel can insert one extra gpfifo entry before user submitted gpfifos
1359            and another one after, for internal usage. Triple the requested size. */
1360         gpfifo_size = roundup_pow_of_two(args->num_entries * 3);
1361
1362         if (args->flags & NVGPU_ALLOC_GPFIFO_FLAGS_VPR_ENABLED)
1363                 c->vpr = true;
1364
1365         /* an address space needs to have been bound at this point. */
1366         if (!gk20a_channel_as_bound(c)) {
1367                 gk20a_err(d,
1368                             "not bound to an address space at time of gpfifo"
1369                             " allocation.");
1370                 return -EINVAL;
1371         }
1372         ch_vm = c->vm;
1373
1374         c->cmds_pending = false;
1375         mutex_lock(&c->submit_lock);
1376         gk20a_fence_put(c->last_submit.pre_fence);
1377         gk20a_fence_put(c->last_submit.post_fence);
1378         c->last_submit.pre_fence = NULL;
1379         c->last_submit.post_fence = NULL;
1380         mutex_unlock(&c->submit_lock);
1381
1382         c->ramfc.offset = 0;
1383         c->ramfc.size = ram_in_ramfc_s() / 8;
1384
1385         if (c->gpfifo.mem.cpu_va) {
1386                 gk20a_err(d, "channel %d :"
1387                            "gpfifo already allocated", c->hw_chid);
1388                 return -EEXIST;
1389         }
1390
1391         err = gk20a_gmmu_alloc_map(ch_vm, gpfifo_size * sizeof(struct gpfifo),
1392                         &c->gpfifo.mem);
1393         if (err) {
1394                 gk20a_err(d, "%s: memory allocation failed\n", __func__);
1395                 goto clean_up;
1396         }
1397
1398         c->gpfifo.entry_num = gpfifo_size;
1399         c->gpfifo.get = c->gpfifo.put = 0;
1400
1401         gk20a_dbg_info("channel %d : gpfifo_base 0x%016llx, size %d",
1402                 c->hw_chid, c->gpfifo.mem.gpu_va, c->gpfifo.entry_num);
1403
1404         channel_gk20a_setup_userd(c);
1405
1406         err = g->ops.fifo.setup_ramfc(c, c->gpfifo.mem.gpu_va,
1407                                         c->gpfifo.entry_num, args->flags);
1408         if (err)
1409                 goto clean_up_unmap;
1410
1411         /* TBD: setup engine contexts */
1412
1413         err = channel_gk20a_alloc_priv_cmdbuf(c);
1414         if (err)
1415                 goto clean_up_unmap;
1416
1417         err = channel_gk20a_update_runlist(c, true);
1418         if (err)
1419                 goto clean_up_unmap;
1420
1421         g->ops.fifo.bind_channel(c);
1422
1423         gk20a_dbg_fn("done");
1424         return 0;
1425
1426 clean_up_unmap:
1427         gk20a_gmmu_unmap_free(ch_vm, &c->gpfifo.mem);
1428 clean_up:
1429         memset(&c->gpfifo, 0, sizeof(struct gpfifo_desc));
1430         gk20a_err(d, "fail");
1431         return err;
1432 }
1433
1434 static inline bool check_gp_put(struct gk20a *g,
1435                                 struct channel_gk20a *c)
1436 {
1437         u32 put;
1438         /* gp_put changed unexpectedly since last update? */
1439         put = gk20a_bar1_readl(g,
1440                c->userd_gpu_va + 4 * ram_userd_gp_put_w());
1441         if (c->gpfifo.put != put) {
1442                 /*TBD: BUG_ON/teardown on this*/
1443                 gk20a_err(dev_from_gk20a(g), "gp_put changed unexpectedly "
1444                           "since last update, channel put = %u, ram put = %u\n",
1445                           c->gpfifo.put, put);
1446                 c->gpfifo.put = put;
1447                 return false; /* surprise! */
1448         }
1449         return true; /* checked out ok */
1450 }
1451
1452 /* Update with this periodically to determine how the gpfifo is draining. */
1453 static inline u32 update_gp_get(struct gk20a *g,
1454                                 struct channel_gk20a *c)
1455 {
1456         u32 new_get = gk20a_bar1_readl(g,
1457                 c->userd_gpu_va + sizeof(u32) * ram_userd_gp_get_w());
1458         if (new_get < c->gpfifo.get)
1459                 c->gpfifo.wrap = !c->gpfifo.wrap;
1460         c->gpfifo.get = new_get;
1461         return new_get;
1462 }
1463
1464 static inline u32 gp_free_count(struct channel_gk20a *c)
1465 {
1466         return (c->gpfifo.entry_num - (c->gpfifo.put - c->gpfifo.get) - 1) %
1467                 c->gpfifo.entry_num;
1468 }
1469
1470 bool gk20a_channel_update_and_check_timeout(struct channel_gk20a *ch,
1471                 u32 timeout_delta_ms)
1472 {
1473         u32 gpfifo_get = update_gp_get(ch->g, ch);
1474         /* Count consequent timeout isr */
1475         if (gpfifo_get == ch->timeout_gpfifo_get) {
1476                 /* we didn't advance since previous channel timeout check */
1477                 ch->timeout_accumulated_ms += timeout_delta_ms;
1478         } else {
1479                 /* first timeout isr encountered */
1480                 ch->timeout_accumulated_ms = timeout_delta_ms;
1481         }
1482
1483         ch->timeout_gpfifo_get = gpfifo_get;
1484
1485         return ch->g->timeouts_enabled &&
1486                 ch->timeout_accumulated_ms > ch->timeout_ms_max;
1487 }
1488
1489 static u32 get_gp_free_count(struct channel_gk20a *c)
1490 {
1491         update_gp_get(c->g, c);
1492         return gp_free_count(c);
1493 }
1494
1495 static void trace_write_pushbuffer(struct channel_gk20a *c,
1496                                    struct nvgpu_gpfifo *g)
1497 {
1498         void *mem = NULL;
1499         unsigned int words;
1500         u64 offset;
1501         struct dma_buf *dmabuf = NULL;
1502
1503         if (gk20a_debug_trace_cmdbuf) {
1504                 u64 gpu_va = (u64)g->entry0 |
1505                         (u64)((u64)pbdma_gp_entry1_get_hi_v(g->entry1) << 32);
1506                 int err;
1507
1508                 words = pbdma_gp_entry1_length_v(g->entry1);
1509                 err = gk20a_vm_find_buffer(c->vm, gpu_va, &dmabuf, &offset);
1510                 if (!err)
1511                         mem = dma_buf_vmap(dmabuf);
1512         }
1513
1514         if (mem) {
1515                 u32 i;
1516                 /*
1517                  * Write in batches of 128 as there seems to be a limit
1518                  * of how much you can output to ftrace at once.
1519                  */
1520                 for (i = 0; i < words; i += 128U) {
1521                         trace_gk20a_push_cmdbuf(
1522                                 c->g->dev->name,
1523                                 0,
1524                                 min(words - i, 128U),
1525                                 offset + i * sizeof(u32),
1526                                 mem);
1527                 }
1528                 dma_buf_vunmap(dmabuf, mem);
1529         }
1530 }
1531
1532 static void trace_write_pushbuffer_range(struct channel_gk20a *c,
1533                                          struct nvgpu_gpfifo *g,
1534                                          struct nvgpu_submit_gpfifo_args *args,
1535                                          int offset,
1536                                          int count)
1537 {
1538         u32 size;
1539         int i;
1540         struct nvgpu_gpfifo *gp;
1541         bool gpfifo_allocated = false;
1542
1543         if (!gk20a_debug_trace_cmdbuf)
1544                 return;
1545
1546         if (!g && !args)
1547                 return;
1548
1549         if (!g) {
1550                 size = args->num_entries * sizeof(struct nvgpu_gpfifo);
1551                 if (size) {
1552                         g = nvgpu_alloc(size, false);
1553                         if (!g)
1554                                 return;
1555
1556                         if (copy_from_user(g,
1557                                 (void __user *)(uintptr_t)args->gpfifo, size)) {
1558                                 return;
1559                         }
1560                 }
1561                 gpfifo_allocated = true;
1562         }
1563
1564         gp = g + offset;
1565         for (i = 0; i < count; i++, gp++)
1566                 trace_write_pushbuffer(c, gp);
1567
1568         if (gpfifo_allocated)
1569                 nvgpu_free(g);
1570 }
1571
1572 static int gk20a_channel_add_job(struct channel_gk20a *c,
1573                                  struct gk20a_fence *pre_fence,
1574                                  struct gk20a_fence *post_fence,
1575                                  bool skip_buffer_refcounting)
1576 {
1577         struct vm_gk20a *vm = c->vm;
1578         struct channel_gk20a_job *job = NULL;
1579         struct mapped_buffer_node **mapped_buffers = NULL;
1580         int err = 0, num_mapped_buffers = 0;
1581
1582         /* job needs reference to this vm (released in channel_update) */
1583         gk20a_vm_get(vm);
1584
1585         if (!skip_buffer_refcounting) {
1586                 err = gk20a_vm_get_buffers(vm, &mapped_buffers,
1587                                         &num_mapped_buffers);
1588                 if (err) {
1589                         gk20a_vm_put(vm);
1590                         return err;
1591                 }
1592         }
1593
1594         job = kzalloc(sizeof(*job), GFP_KERNEL);
1595         if (!job) {
1596                 gk20a_vm_put_buffers(vm, mapped_buffers, num_mapped_buffers);
1597                 gk20a_vm_put(vm);
1598                 return -ENOMEM;
1599         }
1600
1601         /* put() is done in gk20a_channel_update() when the job is done */
1602         c = gk20a_channel_get(c);
1603
1604         if (c) {
1605                 job->num_mapped_buffers = num_mapped_buffers;
1606                 job->mapped_buffers = mapped_buffers;
1607                 job->pre_fence = gk20a_fence_get(pre_fence);
1608                 job->post_fence = gk20a_fence_get(post_fence);
1609
1610                 mutex_lock(&c->jobs_lock);
1611                 list_add_tail(&job->list, &c->jobs);
1612                 mutex_unlock(&c->jobs_lock);
1613         } else {
1614                 return -ETIMEDOUT;
1615         }
1616
1617         return 0;
1618 }
1619
1620 void gk20a_channel_update(struct channel_gk20a *c, int nr_completed)
1621 {
1622         struct vm_gk20a *vm = c->vm;
1623         struct channel_gk20a_job *job, *n;
1624         struct gk20a_platform *platform = gk20a_get_platform(c->g->dev);
1625
1626         trace_gk20a_channel_update(c->hw_chid);
1627
1628         wake_up(&c->submit_wq);
1629
1630         mutex_lock(&c->submit_lock);
1631         mutex_lock(&c->jobs_lock);
1632         list_for_each_entry_safe(job, n, &c->jobs, list) {
1633                 struct gk20a *g = c->g;
1634
1635                 bool completed = gk20a_fence_is_expired(job->post_fence);
1636                 if (!completed)
1637                         break;
1638
1639                 if (c->sync)
1640                         c->sync->signal_timeline(c->sync);
1641
1642                 if (job->num_mapped_buffers)
1643                         gk20a_vm_put_buffers(vm, job->mapped_buffers,
1644                                 job->num_mapped_buffers);
1645
1646                 /* Close the fences (this will unref the semaphores and release
1647                  * them to the pool). */
1648                 gk20a_fence_put(job->pre_fence);
1649                 gk20a_fence_put(job->post_fence);
1650
1651                 /* job is done. release its vm reference (taken in add_job) */
1652                 gk20a_vm_put(vm);
1653                 /* another bookkeeping taken in add_job. caller must hold a ref
1654                  * so this wouldn't get freed here. */
1655                 gk20a_channel_put(c);
1656
1657                 list_del_init(&job->list);
1658                 kfree(job);
1659                 gk20a_idle(g->dev);
1660         }
1661
1662         /*
1663          * If job list is empty then channel is idle and we can free
1664          * the syncpt here (given aggressive_destroy flag is set)
1665          * Note: check if last submit is complete before destroying
1666          * the sync resource
1667          */
1668         if (list_empty(&c->jobs)) {
1669                 if (c->sync && platform->aggressive_sync_destroy &&
1670                           gk20a_fence_is_expired(c->last_submit.post_fence)) {
1671                         c->sync->destroy(c->sync);
1672                         c->sync = NULL;
1673                 }
1674         }
1675         mutex_unlock(&c->jobs_lock);
1676         mutex_unlock(&c->submit_lock);
1677
1678         if (c->update_fn)
1679                 schedule_work(&c->update_fn_work);
1680 }
1681
1682 int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
1683                                 struct nvgpu_gpfifo *gpfifo,
1684                                 struct nvgpu_submit_gpfifo_args *args,
1685                                 u32 num_entries,
1686                                 u32 flags,
1687                                 struct nvgpu_fence *fence,
1688                                 struct gk20a_fence **fence_out)
1689 {
1690         struct gk20a *g = c->g;
1691         struct device *d = dev_from_gk20a(g);
1692         int err = 0;
1693         int start, end;
1694         int wait_fence_fd = -1;
1695         struct priv_cmd_entry *wait_cmd = NULL;
1696         struct priv_cmd_entry *incr_cmd = NULL;
1697         struct gk20a_fence *pre_fence = NULL;
1698         struct gk20a_fence *post_fence = NULL;
1699         /* we might need two extra gpfifo entries - one for pre fence
1700          * and one for post fence. */
1701         const int extra_entries = 2;
1702         bool need_wfi = !(flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SUPPRESS_WFI);
1703         bool skip_buffer_refcounting = (flags &
1704                         NVGPU_SUBMIT_GPFIFO_FLAGS_SKIP_BUFFER_REFCOUNTING);
1705
1706         if (c->has_timedout)
1707                 return -ETIMEDOUT;
1708
1709         /* fifo not large enough for request. Return error immediately.
1710          * Kernel can insert gpfifo entries before and after user gpfifos.
1711          * So, add extra_entries in user request. Also, HW with fifo size N
1712          * can accept only N-1 entreis and so the below condition */
1713         if (c->gpfifo.entry_num - 1 < num_entries + extra_entries) {
1714                 gk20a_err(d, "not enough gpfifo space allocated");
1715                 return -ENOMEM;
1716         }
1717
1718         if (!gpfifo && !args)
1719                 return -EINVAL;
1720
1721         if ((flags & (NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT |
1722                       NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET)) &&
1723             !fence)
1724                 return -EINVAL;
1725
1726         /* an address space needs to have been bound at this point. */
1727         if (!gk20a_channel_as_bound(c)) {
1728                 gk20a_err(d,
1729                             "not bound to an address space at time of gpfifo"
1730                             " submission.");
1731                 return -EINVAL;
1732         }
1733
1734 #ifdef CONFIG_DEBUG_FS
1735         /* update debug settings */
1736         if (g->ops.ltc.sync_debugfs)
1737                 g->ops.ltc.sync_debugfs(g);
1738 #endif
1739
1740         gk20a_dbg_info("channel %d", c->hw_chid);
1741
1742         /* gk20a_channel_update releases this ref. */
1743         err = gk20a_busy(g->dev);
1744         if (err) {
1745                 gk20a_err(d, "failed to host gk20a to submit gpfifo");
1746                 return err;
1747         }
1748
1749         trace_gk20a_channel_submit_gpfifo(c->g->dev->name,
1750                                           c->hw_chid,
1751                                           num_entries,
1752                                           flags,
1753                                           fence ? fence->id : 0,
1754                                           fence ? fence->value : 0);
1755         check_gp_put(g, c);
1756         update_gp_get(g, c);
1757
1758         gk20a_dbg_info("pre-submit put %d, get %d, size %d",
1759                 c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num);
1760
1761         /* Make sure we have enough space for gpfifo entries. If not,
1762          * wait for signals from completed submits */
1763         if (gp_free_count(c) < num_entries + extra_entries) {
1764                 /* we can get here via locked ioctl and other paths too */
1765                 int locked_path = mutex_is_locked(&c->ioctl_lock);
1766                 if (locked_path)
1767                         mutex_unlock(&c->ioctl_lock);
1768
1769                 trace_gk20a_gpfifo_submit_wait_for_space(c->g->dev->name);
1770                 err = wait_event_interruptible(c->submit_wq,
1771                         get_gp_free_count(c) >= num_entries + extra_entries ||
1772                         c->has_timedout);
1773                 trace_gk20a_gpfifo_submit_wait_for_space_done(c->g->dev->name);
1774
1775                 if (locked_path)
1776                         mutex_lock(&c->ioctl_lock);
1777         }
1778
1779         if (c->has_timedout) {
1780                 err = -ETIMEDOUT;
1781                 goto clean_up;
1782         }
1783
1784         if (err) {
1785                 gk20a_err(d, "timeout waiting for gpfifo space");
1786                 err = -EAGAIN;
1787                 goto clean_up;
1788         }
1789
1790         mutex_lock(&c->submit_lock);
1791
1792         if (!c->sync) {
1793                 c->sync = gk20a_channel_sync_create(c);
1794                 if (!c->sync) {
1795                         err = -ENOMEM;
1796                         mutex_unlock(&c->submit_lock);
1797                         goto clean_up;
1798                 }
1799                 if (g->ops.fifo.resetup_ramfc)
1800                         err = g->ops.fifo.resetup_ramfc(c);
1801                 if (err)
1802                         return err;
1803         }
1804
1805         /*
1806          * optionally insert syncpt wait in the beginning of gpfifo submission
1807          * when user requested and the wait hasn't expired.
1808          * validate that the id makes sense, elide if not
1809          * the only reason this isn't being unceremoniously killed is to
1810          * keep running some tests which trigger this condition
1811          */
1812         if (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT) {
1813                 if (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE) {
1814                         wait_fence_fd = fence->id;
1815                         err = c->sync->wait_fd(c->sync, wait_fence_fd,
1816                                         &wait_cmd, &pre_fence);
1817                 } else {
1818                         err = c->sync->wait_syncpt(c->sync, fence->id,
1819                                         fence->value, &wait_cmd, &pre_fence);
1820                 }
1821         }
1822         if (err) {
1823                 mutex_unlock(&c->submit_lock);
1824                 goto clean_up;
1825         }
1826
1827
1828         /* always insert syncpt increment at end of gpfifo submission
1829            to keep track of method completion for idle railgating */
1830         if (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET)
1831                 err = c->sync->incr_user(c->sync, wait_fence_fd, &incr_cmd,
1832                                          &post_fence, need_wfi);
1833         else
1834                 err = c->sync->incr(c->sync, &incr_cmd,
1835                                     &post_fence);
1836         if (err) {
1837                 mutex_unlock(&c->submit_lock);
1838                 goto clean_up;
1839         }
1840
1841         if (wait_cmd) {
1842                 ((struct gpfifo *)(c->gpfifo.mem.cpu_va))[c->gpfifo.put].entry0 =
1843                         u64_lo32(wait_cmd->gva);
1844                 ((struct gpfifo *)(c->gpfifo.mem.cpu_va))[c->gpfifo.put].entry1 =
1845                         u64_hi32(wait_cmd->gva) |
1846                         pbdma_gp_entry1_length_f(wait_cmd->size);
1847                 trace_gk20a_push_cmdbuf(c->g->dev->name,
1848                         0, wait_cmd->size, 0, wait_cmd->ptr);
1849
1850                 c->gpfifo.put = (c->gpfifo.put + 1) &
1851                         (c->gpfifo.entry_num - 1);
1852
1853                 /* save gp_put */
1854                 wait_cmd->gp_put = c->gpfifo.put;
1855         }
1856
1857         /*
1858          * Copy source gpfifo entries into the gpfifo ring buffer,
1859          * potentially splitting into two memcpies to handle the
1860          * ring buffer wrap-around case.
1861          */
1862         start = c->gpfifo.put;
1863         end = start + num_entries;
1864
1865         if (gpfifo) {
1866                 if (end > c->gpfifo.entry_num) {
1867                         int length0 = c->gpfifo.entry_num - start;
1868                         int length1 = num_entries - length0;
1869
1870                         memcpy((struct gpfifo *)c->gpfifo.mem.cpu_va + start,
1871                                 gpfifo,
1872                                 length0 * sizeof(*gpfifo));
1873
1874                         memcpy((struct gpfifo *)c->gpfifo.mem.cpu_va,
1875                                 gpfifo + length0,
1876                                 length1 * sizeof(*gpfifo));
1877
1878                         trace_write_pushbuffer_range(c, gpfifo, NULL,
1879                                         0, length0);
1880                         trace_write_pushbuffer_range(c, gpfifo, NULL,
1881                                         length0, length1);
1882                 } else {
1883                         memcpy((struct gpfifo *)c->gpfifo.mem.cpu_va + start,
1884                                 gpfifo,
1885                                 num_entries * sizeof(*gpfifo));
1886
1887                         trace_write_pushbuffer_range(c, gpfifo, NULL,
1888                                         0, num_entries);
1889                 }
1890         } else {
1891                 struct nvgpu_gpfifo __user *user_gpfifo =
1892                         (struct nvgpu_gpfifo __user *)(uintptr_t)args->gpfifo;
1893                 if (end > c->gpfifo.entry_num) {
1894                         int length0 = c->gpfifo.entry_num - start;
1895                         int length1 = num_entries - length0;
1896
1897                         err = copy_from_user((struct gpfifo *)c->gpfifo.mem.cpu_va + start,
1898                                 user_gpfifo,
1899                                 length0 * sizeof(*user_gpfifo));
1900                         if (err) {
1901                                 mutex_unlock(&c->submit_lock);
1902                                 goto clean_up;
1903                         }
1904
1905                         err = copy_from_user((struct gpfifo *)c->gpfifo.mem.cpu_va,
1906                                 user_gpfifo + length0,
1907                                 length1 * sizeof(*user_gpfifo));
1908                         if (err) {
1909                                 mutex_unlock(&c->submit_lock);
1910                                 goto clean_up;
1911                         }
1912
1913                         trace_write_pushbuffer_range(c, NULL, args,
1914                                         0, length0);
1915                         trace_write_pushbuffer_range(c, NULL, args,
1916                                         length0, length1);
1917                 } else {
1918                         err = copy_from_user((struct gpfifo *)c->gpfifo.mem.cpu_va + start,
1919                                 user_gpfifo,
1920                                 num_entries * sizeof(*user_gpfifo));
1921                         if (err) {
1922                                 mutex_unlock(&c->submit_lock);
1923                                 goto clean_up;
1924                         }
1925
1926                         trace_write_pushbuffer_range(c, NULL, args,
1927                                         0, num_entries);
1928                 }
1929         }
1930
1931         c->gpfifo.put = (c->gpfifo.put + num_entries) &
1932                 (c->gpfifo.entry_num - 1);
1933
1934         if (incr_cmd) {
1935                 ((struct gpfifo *)(c->gpfifo.mem.cpu_va))[c->gpfifo.put].entry0 =
1936                         u64_lo32(incr_cmd->gva);
1937                 ((struct gpfifo *)(c->gpfifo.mem.cpu_va))[c->gpfifo.put].entry1 =
1938                         u64_hi32(incr_cmd->gva) |
1939                         pbdma_gp_entry1_length_f(incr_cmd->size);
1940                 trace_gk20a_push_cmdbuf(c->g->dev->name,
1941                         0, incr_cmd->size, 0, incr_cmd->ptr);
1942
1943                 c->gpfifo.put = (c->gpfifo.put + 1) &
1944                         (c->gpfifo.entry_num - 1);
1945
1946                 /* save gp_put */
1947                 incr_cmd->gp_put = c->gpfifo.put;
1948         }
1949
1950         gk20a_fence_put(c->last_submit.pre_fence);
1951         gk20a_fence_put(c->last_submit.post_fence);
1952         c->last_submit.pre_fence = pre_fence;
1953         c->last_submit.post_fence = post_fence;
1954         if (fence_out)
1955                 *fence_out = gk20a_fence_get(post_fence);
1956
1957         /* TODO! Check for errors... */
1958         gk20a_channel_add_job(c, pre_fence, post_fence,
1959                                 skip_buffer_refcounting);
1960
1961         c->cmds_pending = true;
1962         gk20a_bar1_writel(g,
1963                 c->userd_gpu_va + 4 * ram_userd_gp_put_w(),
1964                 c->gpfifo.put);
1965
1966         mutex_unlock(&c->submit_lock);
1967
1968         trace_gk20a_channel_submitted_gpfifo(c->g->dev->name,
1969                                              c->hw_chid,
1970                                              num_entries,
1971                                              flags,
1972                                              post_fence->syncpt_id,
1973                                              post_fence->syncpt_value);
1974
1975         gk20a_dbg_info("post-submit put %d, get %d, size %d",
1976                 c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num);
1977
1978         gk20a_dbg_fn("done");
1979         return err;
1980
1981 clean_up:
1982         gk20a_err(d, "fail");
1983         free_priv_cmdbuf(c, wait_cmd);
1984         free_priv_cmdbuf(c, incr_cmd);
1985         gk20a_fence_put(pre_fence);
1986         gk20a_fence_put(post_fence);
1987         gk20a_idle(g->dev);
1988         return err;
1989 }
1990
1991 int gk20a_init_channel_support(struct gk20a *g, u32 chid)
1992 {
1993         struct channel_gk20a *c = g->fifo.channel+chid;
1994         c->g = NULL;
1995         c->hw_chid = chid;
1996         c->bound = false;
1997         spin_lock_init(&c->ref_obtain_lock);
1998         atomic_set(&c->ref_count, 0);
1999         c->referenceable = false;
2000         init_waitqueue_head(&c->ref_count_dec_wq);
2001         mutex_init(&c->ioctl_lock);
2002         mutex_init(&c->jobs_lock);
2003         mutex_init(&c->submit_lock);
2004         INIT_LIST_HEAD(&c->jobs);
2005 #if defined(CONFIG_GK20A_CYCLE_STATS)
2006         mutex_init(&c->cyclestate.cyclestate_buffer_mutex);
2007         mutex_init(&c->cs_client_mutex);
2008 #endif
2009         INIT_LIST_HEAD(&c->dbg_s_list);
2010         mutex_init(&c->dbg_s_lock);
2011         list_add(&c->free_chs, &g->fifo.free_chs);
2012
2013         return 0;
2014 }
2015
2016 int gk20a_channel_finish(struct channel_gk20a *ch, unsigned long timeout)
2017 {
2018         int err = 0;
2019         struct gk20a_fence *fence = ch->last_submit.post_fence;
2020
2021         if (!ch->cmds_pending)
2022                 return 0;
2023
2024         /* Do not wait for a timedout channel */
2025         if (ch->has_timedout)
2026                 return -ETIMEDOUT;
2027
2028         gk20a_dbg_fn("waiting for channel to finish thresh:%d sema:%p",
2029                      fence->syncpt_value, fence->semaphore);
2030
2031         err = gk20a_fence_wait(fence, timeout);
2032         if (WARN_ON(err))
2033                 dev_warn(dev_from_gk20a(ch->g),
2034                        "timed out waiting for gk20a channel to finish");
2035         else
2036                 ch->cmds_pending = false;
2037
2038         return err;
2039 }
2040
2041 static int gk20a_channel_wait_semaphore(struct channel_gk20a *ch,
2042                                         ulong id, u32 offset,
2043                                         u32 payload, long timeout)
2044 {
2045         struct platform_device *pdev = ch->g->dev;
2046         struct dma_buf *dmabuf;
2047         void *data;
2048         u32 *semaphore;
2049         int ret = 0;
2050         long remain;
2051
2052         /* do not wait if channel has timed out */
2053         if (ch->has_timedout)
2054                 return -ETIMEDOUT;
2055
2056         dmabuf = dma_buf_get(id);
2057         if (IS_ERR(dmabuf)) {
2058                 gk20a_err(&pdev->dev, "invalid notifier nvmap handle 0x%lx",
2059                            id);
2060                 return -EINVAL;
2061         }
2062
2063         data = dma_buf_kmap(dmabuf, offset >> PAGE_SHIFT);
2064         if (!data) {
2065                 gk20a_err(&pdev->dev, "failed to map notifier memory");
2066                 ret = -EINVAL;
2067                 goto cleanup_put;
2068         }
2069
2070         semaphore = data + (offset & ~PAGE_MASK);
2071
2072         remain = wait_event_interruptible_timeout(
2073                         ch->semaphore_wq,
2074                         *semaphore == payload || ch->has_timedout,
2075                         timeout);
2076
2077         if (remain == 0 && *semaphore != payload)
2078                 ret = -ETIMEDOUT;
2079         else if (remain < 0)
2080                 ret = remain;
2081
2082         dma_buf_kunmap(dmabuf, offset >> PAGE_SHIFT, data);
2083 cleanup_put:
2084         dma_buf_put(dmabuf);
2085         return ret;
2086 }
2087
2088 static int gk20a_channel_wait(struct channel_gk20a *ch,
2089                               struct nvgpu_wait_args *args)
2090 {
2091         struct device *d = dev_from_gk20a(ch->g);
2092         struct dma_buf *dmabuf;
2093         struct notification *notif;
2094         struct timespec tv;
2095         u64 jiffies;
2096         ulong id;
2097         u32 offset;
2098         unsigned long timeout;
2099         int remain, ret = 0;
2100
2101         gk20a_dbg_fn("");
2102
2103         if (ch->has_timedout)
2104                 return -ETIMEDOUT;
2105
2106         if (args->timeout == NVGPU_NO_TIMEOUT)
2107                 timeout = MAX_SCHEDULE_TIMEOUT;
2108         else
2109                 timeout = (u32)msecs_to_jiffies(args->timeout);
2110
2111         switch (args->type) {
2112         case NVGPU_WAIT_TYPE_NOTIFIER:
2113                 id = args->condition.notifier.dmabuf_fd;
2114                 offset = args->condition.notifier.offset;
2115
2116                 dmabuf = dma_buf_get(id);
2117                 if (IS_ERR(dmabuf)) {
2118                         gk20a_err(d, "invalid notifier nvmap handle 0x%lx",
2119                                    id);
2120                         return -EINVAL;
2121                 }
2122
2123                 notif = dma_buf_vmap(dmabuf);
2124                 if (!notif) {
2125                         gk20a_err(d, "failed to map notifier memory");
2126                         return -ENOMEM;
2127                 }
2128
2129                 notif = (struct notification *)((uintptr_t)notif + offset);
2130
2131                 /* user should set status pending before
2132                  * calling this ioctl */
2133                 remain = wait_event_interruptible_timeout(
2134                                 ch->notifier_wq,
2135                                 notif->status == 0 || ch->has_timedout,
2136                                 timeout);
2137
2138                 if (remain == 0 && notif->status != 0) {
2139                         ret = -ETIMEDOUT;
2140                         goto notif_clean_up;
2141                 } else if (remain < 0) {
2142                         ret = -EINTR;
2143                         goto notif_clean_up;
2144                 }
2145
2146                 /* TBD: fill in correct information */
2147                 jiffies = get_jiffies_64();
2148                 jiffies_to_timespec(jiffies, &tv);
2149                 notif->timestamp.nanoseconds[0] = tv.tv_nsec;
2150                 notif->timestamp.nanoseconds[1] = tv.tv_sec;
2151                 notif->info32 = 0xDEADBEEF; /* should be object name */
2152                 notif->info16 = ch->hw_chid; /* should be method offset */
2153
2154 notif_clean_up:
2155                 dma_buf_vunmap(dmabuf, notif);
2156                 return ret;
2157
2158         case NVGPU_WAIT_TYPE_SEMAPHORE:
2159                 ret = gk20a_channel_wait_semaphore(ch,
2160                                 args->condition.semaphore.dmabuf_fd,
2161                                 args->condition.semaphore.offset,
2162                                 args->condition.semaphore.payload,
2163                                 timeout);
2164
2165                 break;
2166
2167         default:
2168                 ret = -EINVAL;
2169                 break;
2170         }
2171
2172         return ret;
2173 }
2174
2175 /* poll events for semaphores */
2176
2177 static void gk20a_channel_events_enable(struct channel_gk20a_poll_events *ev)
2178 {
2179         gk20a_dbg_fn("");
2180
2181         mutex_lock(&ev->lock);
2182
2183         ev->events_enabled = true;
2184         ev->num_pending_events = 0;
2185
2186         mutex_unlock(&ev->lock);
2187 }
2188
2189 static void gk20a_channel_events_disable(struct channel_gk20a_poll_events *ev)
2190 {
2191         gk20a_dbg_fn("");
2192
2193         mutex_lock(&ev->lock);
2194
2195         ev->events_enabled = false;
2196         ev->num_pending_events = 0;
2197
2198         mutex_unlock(&ev->lock);
2199 }
2200
2201 static void gk20a_channel_events_clear(struct channel_gk20a_poll_events *ev)
2202 {
2203         gk20a_dbg_fn("");
2204
2205         mutex_lock(&ev->lock);
2206
2207         if (ev->events_enabled &&
2208                         ev->num_pending_events > 0)
2209                 ev->num_pending_events--;
2210
2211         mutex_unlock(&ev->lock);
2212 }
2213
2214 static int gk20a_channel_events_ctrl(struct channel_gk20a *ch,
2215                           struct nvgpu_channel_events_ctrl_args *args)
2216 {
2217         int ret = 0;
2218
2219         gk20a_dbg(gpu_dbg_fn | gpu_dbg_info,
2220                         "channel events ctrl cmd %d", args->cmd);
2221
2222         switch (args->cmd) {
2223         case NVGPU_IOCTL_CHANNEL_EVENTS_CTRL_CMD_ENABLE:
2224                 gk20a_channel_events_enable(&ch->poll_events);
2225                 break;
2226
2227         case NVGPU_IOCTL_CHANNEL_EVENTS_CTRL_CMD_DISABLE:
2228                 gk20a_channel_events_disable(&ch->poll_events);
2229                 break;
2230
2231         case NVGPU_IOCTL_CHANNEL_EVENTS_CTRL_CMD_CLEAR:
2232                 gk20a_channel_events_clear(&ch->poll_events);
2233                 break;
2234
2235         default:
2236                 gk20a_err(dev_from_gk20a(ch->g),
2237                            "unrecognized channel events ctrl cmd: 0x%x",
2238                            args->cmd);
2239                 ret = -EINVAL;
2240                 break;
2241         }
2242
2243         return ret;
2244 }
2245
2246 void gk20a_channel_event(struct channel_gk20a *ch)
2247 {
2248         mutex_lock(&ch->poll_events.lock);
2249
2250         if (ch->poll_events.events_enabled) {
2251                 gk20a_dbg_info("posting event on channel id %d",
2252                                 ch->hw_chid);
2253                 gk20a_dbg_info("%d channel events pending",
2254                                 ch->poll_events.num_pending_events);
2255
2256                 ch->poll_events.num_pending_events++;
2257                 /* not waking up here, caller does that */
2258         }
2259
2260         mutex_unlock(&ch->poll_events.lock);
2261 }
2262
2263 unsigned int gk20a_channel_poll(struct file *filep, poll_table *wait)
2264 {
2265         unsigned int mask = 0;
2266         struct channel_gk20a *ch = filep->private_data;
2267
2268         gk20a_dbg(gpu_dbg_fn | gpu_dbg_info, "");
2269
2270         poll_wait(filep, &ch->semaphore_wq, wait);
2271
2272         mutex_lock(&ch->poll_events.lock);
2273
2274         if (ch->poll_events.events_enabled &&
2275                         ch->poll_events.num_pending_events > 0) {
2276                 gk20a_dbg_info("found pending event on channel id %d",
2277                                 ch->hw_chid);
2278                 gk20a_dbg_info("%d channel events pending",
2279                                 ch->poll_events.num_pending_events);
2280                 mask = (POLLPRI | POLLIN);
2281         }
2282
2283         mutex_unlock(&ch->poll_events.lock);
2284
2285         return mask;
2286 }
2287
2288 static int gk20a_channel_set_priority(struct channel_gk20a *ch,
2289                 u32 priority)
2290 {
2291         u32 timeslice_timeout;
2292         /* set priority of graphics channel */
2293         switch (priority) {
2294         case NVGPU_PRIORITY_LOW:
2295                 /* 64 << 3 = 512us */
2296                 timeslice_timeout = 64;
2297                 break;
2298         case NVGPU_PRIORITY_MEDIUM:
2299                 /* 128 << 3 = 1024us */
2300                 timeslice_timeout = 128;
2301                 break;
2302         case NVGPU_PRIORITY_HIGH:
2303                 /* 255 << 3 = 2048us */
2304                 timeslice_timeout = 255;
2305                 break;
2306         default:
2307                 pr_err("Unsupported priority");
2308                 return -EINVAL;
2309         }
2310         channel_gk20a_set_schedule_params(ch,
2311                         timeslice_timeout);
2312         return 0;
2313 }
2314
2315 static int gk20a_channel_zcull_bind(struct channel_gk20a *ch,
2316                             struct nvgpu_zcull_bind_args *args)
2317 {
2318         struct gk20a *g = ch->g;
2319         struct gr_gk20a *gr = &g->gr;
2320
2321         gk20a_dbg_fn("");
2322
2323         return g->ops.gr.bind_ctxsw_zcull(g, gr, ch,
2324                                 args->gpu_va, args->mode);
2325 }
2326
2327 /* in this context the "channel" is the host1x channel which
2328  * maps to *all* gk20a channels */
2329 int gk20a_channel_suspend(struct gk20a *g)
2330 {
2331         struct fifo_gk20a *f = &g->fifo;
2332         u32 chid;
2333         bool channels_in_use = false;
2334         int err;
2335
2336         gk20a_dbg_fn("");
2337
2338         /* wait for engine idle */
2339         err = g->ops.fifo.wait_engine_idle(g);
2340         if (err)
2341                 return err;
2342
2343         for (chid = 0; chid < f->num_channels; chid++) {
2344                 struct channel_gk20a *ch = &f->channel[chid];
2345                 if (gk20a_channel_get(ch)) {
2346                         gk20a_dbg_info("suspend channel %d", chid);
2347                         /* disable channel */
2348                         g->ops.fifo.disable_channel(ch);
2349                         /* preempt the channel */
2350                         g->ops.fifo.preempt_channel(g, chid);
2351                         /* wait for channel update notifiers */
2352                         if (ch->update_fn &&
2353                                         work_pending(&ch->update_fn_work))
2354                                 flush_work(&ch->update_fn_work);
2355
2356                         channels_in_use = true;
2357
2358                         gk20a_channel_put(ch);
2359                 }
2360         }
2361
2362         if (channels_in_use) {
2363                 g->ops.fifo.update_runlist(g, 0, ~0, false, true);
2364
2365                 for (chid = 0; chid < f->num_channels; chid++) {
2366                         if (gk20a_channel_get(&f->channel[chid])) {
2367                                 g->ops.fifo.unbind_channel(&f->channel[chid]);
2368                                 gk20a_channel_put(&f->channel[chid]);
2369                         }
2370                 }
2371         }
2372
2373         gk20a_dbg_fn("done");
2374         return 0;
2375 }
2376
2377 int gk20a_channel_resume(struct gk20a *g)
2378 {
2379         struct fifo_gk20a *f = &g->fifo;
2380         u32 chid;
2381         bool channels_in_use = false;
2382
2383         gk20a_dbg_fn("");
2384
2385         for (chid = 0; chid < f->num_channels; chid++) {
2386                 if (gk20a_channel_get(&f->channel[chid])) {
2387                         gk20a_dbg_info("resume channel %d", chid);
2388                         g->ops.fifo.bind_channel(&f->channel[chid]);
2389                         channels_in_use = true;
2390                         gk20a_channel_put(&f->channel[chid]);
2391                 }
2392         }
2393
2394         if (channels_in_use)
2395                 g->ops.fifo.update_runlist(g, 0, ~0, true, true);
2396
2397         gk20a_dbg_fn("done");
2398         return 0;
2399 }
2400
2401 void gk20a_channel_semaphore_wakeup(struct gk20a *g)
2402 {
2403         struct fifo_gk20a *f = &g->fifo;
2404         u32 chid;
2405
2406         gk20a_dbg_fn("");
2407
2408         for (chid = 0; chid < f->num_channels; chid++) {
2409                 struct channel_gk20a *c = g->fifo.channel+chid;
2410                 if (gk20a_channel_get(c)) {
2411                         gk20a_channel_event(c);
2412                         wake_up_interruptible_all(&c->semaphore_wq);
2413                         gk20a_channel_update(c, 0);
2414                         gk20a_channel_put(c);
2415                 }
2416         }
2417 }
2418
2419 static int gk20a_ioctl_channel_submit_gpfifo(
2420         struct channel_gk20a *ch,
2421         struct nvgpu_submit_gpfifo_args *args)
2422 {
2423         struct gk20a_fence *fence_out;
2424         int ret = 0;
2425
2426         gk20a_dbg_fn("");
2427
2428         if (ch->has_timedout)
2429                 return -ETIMEDOUT;
2430
2431         ret = gk20a_submit_channel_gpfifo(ch, NULL, args, args->num_entries,
2432                                           args->flags, &args->fence,
2433                                           &fence_out);
2434
2435         if (ret)
2436                 goto clean_up;
2437
2438         /* Convert fence_out to something we can pass back to user space. */
2439         if (args->flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET) {
2440                 if (args->flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE) {
2441                         int fd = gk20a_fence_install_fd(fence_out);
2442                         if (fd < 0)
2443                                 ret = fd;
2444                         else
2445                                 args->fence.id = fd;
2446                 } else {
2447                         args->fence.id = fence_out->syncpt_id;
2448                         args->fence.value = fence_out->syncpt_value;
2449                 }
2450         }
2451         gk20a_fence_put(fence_out);
2452
2453 clean_up:
2454         return ret;
2455 }
2456
2457 void gk20a_init_channel(struct gpu_ops *gops)
2458 {
2459         gops->fifo.bind_channel = channel_gk20a_bind;
2460         gops->fifo.unbind_channel = channel_gk20a_unbind;
2461         gops->fifo.disable_channel = channel_gk20a_disable;
2462         gops->fifo.alloc_inst = channel_gk20a_alloc_inst;
2463         gops->fifo.free_inst = channel_gk20a_free_inst;
2464         gops->fifo.setup_ramfc = channel_gk20a_setup_ramfc;
2465 }
2466
2467 long gk20a_channel_ioctl(struct file *filp,
2468         unsigned int cmd, unsigned long arg)
2469 {
2470         struct channel_gk20a *ch = filp->private_data;
2471         struct platform_device *dev = ch->g->dev;
2472         u8 buf[NVGPU_IOCTL_CHANNEL_MAX_ARG_SIZE];
2473         int err = 0;
2474
2475         gk20a_dbg_fn("start %d", _IOC_NR(cmd));
2476
2477         if ((_IOC_TYPE(cmd) != NVGPU_IOCTL_MAGIC) ||
2478                 (_IOC_NR(cmd) == 0) ||
2479                 (_IOC_NR(cmd) > NVGPU_IOCTL_CHANNEL_LAST) ||
2480                 (_IOC_SIZE(cmd) > NVGPU_IOCTL_CHANNEL_MAX_ARG_SIZE))
2481                 return -EINVAL;
2482
2483         if (_IOC_DIR(cmd) & _IOC_WRITE) {
2484                 if (copy_from_user(buf, (void __user *)arg, _IOC_SIZE(cmd)))
2485                         return -EFAULT;
2486         }
2487
2488         /* take a ref or return timeout if channel refs can't be taken */
2489         ch = gk20a_channel_get(ch);
2490         if (!ch)
2491                 return -ETIMEDOUT;
2492
2493         /* protect our sanity for threaded userspace - most of the channel is
2494          * not thread safe */
2495         mutex_lock(&ch->ioctl_lock);
2496
2497         /* this ioctl call keeps a ref to the file which keeps a ref to the
2498          * channel */
2499
2500         switch (cmd) {
2501         case NVGPU_IOCTL_CHANNEL_OPEN:
2502                 err = gk20a_channel_open_ioctl(ch->g,
2503                         (struct nvgpu_channel_open_args *)buf);
2504                 break;
2505         case NVGPU_IOCTL_CHANNEL_SET_NVMAP_FD:
2506                 break;
2507         case NVGPU_IOCTL_CHANNEL_ALLOC_OBJ_CTX:
2508                 err = gk20a_busy(dev);
2509                 if (err) {
2510                         dev_err(&dev->dev,
2511                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2512                                 __func__, cmd);
2513                         break;
2514                 }
2515                 err = ch->g->ops.gr.alloc_obj_ctx(ch,
2516                                 (struct nvgpu_alloc_obj_ctx_args *)buf);
2517                 gk20a_idle(dev);
2518                 break;
2519         case NVGPU_IOCTL_CHANNEL_FREE_OBJ_CTX:
2520                 err = gk20a_busy(dev);
2521                 if (err) {
2522                         dev_err(&dev->dev,
2523                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2524                                 __func__, cmd);
2525                         break;
2526                 }
2527                 err = ch->g->ops.gr.free_obj_ctx(ch,
2528                                 (struct nvgpu_free_obj_ctx_args *)buf);
2529                 gk20a_idle(dev);
2530                 break;
2531         case NVGPU_IOCTL_CHANNEL_ALLOC_GPFIFO:
2532                 err = gk20a_busy(dev);
2533                 if (err) {
2534                         dev_err(&dev->dev,
2535                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2536                                 __func__, cmd);
2537                         break;
2538                 }
2539                 err = gk20a_alloc_channel_gpfifo(ch,
2540                                 (struct nvgpu_alloc_gpfifo_args *)buf);
2541                 gk20a_idle(dev);
2542                 break;
2543         case NVGPU_IOCTL_CHANNEL_SUBMIT_GPFIFO:
2544                 err = gk20a_ioctl_channel_submit_gpfifo(ch,
2545                                 (struct nvgpu_submit_gpfifo_args *)buf);
2546                 break;
2547         case NVGPU_IOCTL_CHANNEL_WAIT:
2548                 err = gk20a_busy(dev);
2549                 if (err) {
2550                         dev_err(&dev->dev,
2551                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2552                                 __func__, cmd);
2553                         break;
2554                 }
2555
2556                 /* waiting is thread-safe, not dropping this mutex could
2557                  * deadlock in certain conditions */
2558                 mutex_unlock(&ch->ioctl_lock);
2559
2560                 err = gk20a_channel_wait(ch,
2561                                 (struct nvgpu_wait_args *)buf);
2562
2563                 mutex_lock(&ch->ioctl_lock);
2564
2565                 gk20a_idle(dev);
2566                 break;
2567         case NVGPU_IOCTL_CHANNEL_ZCULL_BIND:
2568                 err = gk20a_busy(dev);
2569                 if (err) {
2570                         dev_err(&dev->dev,
2571                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2572                                 __func__, cmd);
2573                         break;
2574                 }
2575                 err = gk20a_channel_zcull_bind(ch,
2576                                 (struct nvgpu_zcull_bind_args *)buf);
2577                 gk20a_idle(dev);
2578                 break;
2579         case NVGPU_IOCTL_CHANNEL_SET_ERROR_NOTIFIER:
2580                 err = gk20a_busy(dev);
2581                 if (err) {
2582                         dev_err(&dev->dev,
2583                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2584                                 __func__, cmd);
2585                         break;
2586                 }
2587                 err = gk20a_init_error_notifier(ch,
2588                                 (struct nvgpu_set_error_notifier *)buf);
2589                 gk20a_idle(dev);
2590                 break;
2591 #ifdef CONFIG_GK20A_CYCLE_STATS
2592         case NVGPU_IOCTL_CHANNEL_CYCLE_STATS:
2593                 err = gk20a_busy(dev);
2594                 if (err) {
2595                         dev_err(&dev->dev,
2596                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2597                                 __func__, cmd);
2598                         break;
2599                 }
2600                 err = gk20a_channel_cycle_stats(ch,
2601                                 (struct nvgpu_cycle_stats_args *)buf);
2602                 gk20a_idle(dev);
2603                 break;
2604 #endif
2605         case NVGPU_IOCTL_CHANNEL_SET_TIMEOUT:
2606         {
2607                 u32 timeout =
2608                         (u32)((struct nvgpu_set_timeout_args *)buf)->timeout;
2609                 gk20a_dbg(gpu_dbg_gpu_dbg, "setting timeout (%d ms) for chid %d",
2610                            timeout, ch->hw_chid);
2611                 ch->timeout_ms_max = timeout;
2612                 break;
2613         }
2614         case NVGPU_IOCTL_CHANNEL_SET_TIMEOUT_EX:
2615         {
2616                 u32 timeout =
2617                         (u32)((struct nvgpu_set_timeout_args *)buf)->timeout;
2618                 bool timeout_debug_dump = !((u32)
2619                         ((struct nvgpu_set_timeout_ex_args *)buf)->flags &
2620                         (1 << NVGPU_TIMEOUT_FLAG_DISABLE_DUMP));
2621                 gk20a_dbg(gpu_dbg_gpu_dbg, "setting timeout (%d ms) for chid %d",
2622                            timeout, ch->hw_chid);
2623                 ch->timeout_ms_max = timeout;
2624                 ch->timeout_debug_dump = timeout_debug_dump;
2625                 break;
2626         }
2627         case NVGPU_IOCTL_CHANNEL_GET_TIMEDOUT:
2628                 ((struct nvgpu_get_param_args *)buf)->value =
2629                         ch->has_timedout;
2630                 break;
2631         case NVGPU_IOCTL_CHANNEL_SET_PRIORITY:
2632                 err = gk20a_busy(dev);
2633                 if (err) {
2634                         dev_err(&dev->dev,
2635                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2636                                 __func__, cmd);
2637                         break;
2638                 }
2639                 gk20a_channel_set_priority(ch,
2640                         ((struct nvgpu_set_priority_args *)buf)->priority);
2641                 gk20a_idle(dev);
2642                 break;
2643         case NVGPU_IOCTL_CHANNEL_ENABLE:
2644                 err = gk20a_busy(dev);
2645                 if (err) {
2646                         dev_err(&dev->dev,
2647                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2648                                 __func__, cmd);
2649                         break;
2650                 }
2651                 /* enable channel */
2652                 gk20a_writel(ch->g, ccsr_channel_r(ch->hw_chid),
2653                         gk20a_readl(ch->g, ccsr_channel_r(ch->hw_chid)) |
2654                         ccsr_channel_enable_set_true_f());
2655                 gk20a_idle(dev);
2656                 break;
2657         case NVGPU_IOCTL_CHANNEL_DISABLE:
2658                 err = gk20a_busy(dev);
2659                 if (err) {
2660                         dev_err(&dev->dev,
2661                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2662                                 __func__, cmd);
2663                         break;
2664                 }
2665                 /* disable channel */
2666                 gk20a_writel(ch->g, ccsr_channel_r(ch->hw_chid),
2667                         gk20a_readl(ch->g, ccsr_channel_r(ch->hw_chid)) |
2668                         ccsr_channel_enable_clr_true_f());
2669                 gk20a_idle(dev);
2670                 break;
2671         case NVGPU_IOCTL_CHANNEL_PREEMPT:
2672                 err = gk20a_busy(dev);
2673                 if (err) {
2674                         dev_err(&dev->dev,
2675                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2676                                 __func__, cmd);
2677                         break;
2678                 }
2679                 err = gk20a_fifo_preempt(ch->g, ch);
2680                 gk20a_idle(dev);
2681                 break;
2682         case NVGPU_IOCTL_CHANNEL_FORCE_RESET:
2683                 err = gk20a_busy(dev);
2684                 if (err) {
2685                         dev_err(&dev->dev,
2686                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2687                                 __func__, cmd);
2688                         break;
2689                 }
2690                 err = gk20a_fifo_force_reset_ch(ch, true);
2691                 gk20a_idle(dev);
2692                 break;
2693         case NVGPU_IOCTL_CHANNEL_EVENTS_CTRL:
2694                 err = gk20a_channel_events_ctrl(ch,
2695                            (struct nvgpu_channel_events_ctrl_args *)buf);
2696                 break;
2697 #ifdef CONFIG_GK20A_CYCLE_STATS
2698         case NVGPU_IOCTL_CHANNEL_CYCLE_STATS_SNAPSHOT:
2699                 err = gk20a_busy(dev);
2700                 if (err) {
2701                         dev_err(&dev->dev,
2702                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2703                                 __func__, cmd);
2704                         break;
2705                 }
2706                 err = gk20a_channel_cycle_stats_snapshot(ch,
2707                                 (struct nvgpu_cycle_stats_snapshot_args *)buf);
2708                 gk20a_idle(dev);
2709                 break;
2710 #endif
2711         default:
2712                 dev_dbg(&dev->dev, "unrecognized ioctl cmd: 0x%x", cmd);
2713                 err = -ENOTTY;
2714                 break;
2715         }
2716
2717         if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ))
2718                 err = copy_to_user((void __user *)arg, buf, _IOC_SIZE(cmd));
2719
2720         mutex_unlock(&ch->ioctl_lock);
2721
2722         gk20a_channel_put(ch);
2723
2724         gk20a_dbg_fn("end");
2725
2726         return err;
2727 }