gpu: nvgpu: unify instance block creation
[linux-3.10.git] / drivers / gpu / nvgpu / gk20a / channel_gk20a.c
1 /*
2  * GK20A Graphics channel
3  *
4  * Copyright (c) 2011-2015, NVIDIA CORPORATION.  All rights reserved.
5  *
6  * This program is free software; you can redistribute it and/or modify it
7  * under the terms and conditions of the GNU General Public License,
8  * version 2, as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope it will be useful, but WITHOUT
11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13  * more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
17  */
18
19 #include <linux/nvhost.h>
20 #include <linux/list.h>
21 #include <linux/delay.h>
22 #include <linux/highmem.h> /* need for nvmap.h*/
23 #include <trace/events/gk20a.h>
24 #include <linux/scatterlist.h>
25 #include <linux/file.h>
26 #include <linux/anon_inodes.h>
27 #include <linux/dma-buf.h>
28
29 #include "debug_gk20a.h"
30
31 #include "gk20a.h"
32 #include "dbg_gpu_gk20a.h"
33 #include "fence_gk20a.h"
34 #include "semaphore_gk20a.h"
35
36 #include "hw_ram_gk20a.h"
37 #include "hw_fifo_gk20a.h"
38 #include "hw_pbdma_gk20a.h"
39 #include "hw_ccsr_gk20a.h"
40 #include "hw_ltc_gk20a.h"
41
42 #define NVMAP_HANDLE_PARAM_SIZE 1
43
44 static struct channel_gk20a *acquire_unused_channel(struct fifo_gk20a *f);
45 static void release_used_channel(struct fifo_gk20a *f, struct channel_gk20a *c);
46
47 static void free_priv_cmdbuf(struct channel_gk20a *c,
48                              struct priv_cmd_entry *e);
49 static void recycle_priv_cmdbuf(struct channel_gk20a *c);
50
51 static int channel_gk20a_alloc_priv_cmdbuf(struct channel_gk20a *c);
52 static void channel_gk20a_free_priv_cmdbuf(struct channel_gk20a *c);
53
54 static int channel_gk20a_commit_userd(struct channel_gk20a *c);
55 static int channel_gk20a_setup_userd(struct channel_gk20a *c);
56
57 static void channel_gk20a_bind(struct channel_gk20a *ch_gk20a);
58
59 static int channel_gk20a_update_runlist(struct channel_gk20a *c,
60                                         bool add);
61 static void gk20a_free_error_notifiers(struct channel_gk20a *ch);
62
63 static struct channel_gk20a *acquire_unused_channel(struct fifo_gk20a *f)
64 {
65         struct channel_gk20a *ch = NULL;
66         int chid;
67
68         mutex_lock(&f->ch_inuse_mutex);
69         for (chid = 0; chid < f->num_channels; chid++) {
70                 if (!f->channel[chid].in_use) {
71                         f->channel[chid].in_use = true;
72                         ch = &f->channel[chid];
73                         break;
74                 }
75         }
76         mutex_unlock(&f->ch_inuse_mutex);
77
78         return ch;
79 }
80
81 static void release_used_channel(struct fifo_gk20a *f, struct channel_gk20a *c)
82 {
83         mutex_lock(&f->ch_inuse_mutex);
84         f->channel[c->hw_chid].in_use = false;
85         mutex_unlock(&f->ch_inuse_mutex);
86 }
87
88 int channel_gk20a_commit_va(struct channel_gk20a *c)
89 {
90         u64 addr;
91         u32 addr_lo;
92         u32 addr_hi;
93         void *inst_ptr;
94
95         gk20a_dbg_fn("");
96
97         inst_ptr = c->inst_block.cpuva;
98         if (!inst_ptr)
99                 return -ENOMEM;
100
101         addr = gk20a_mm_iova_addr(c->g, c->vm->pdes.sgt->sgl);
102         addr_lo = u64_lo32(addr >> 12);
103         addr_hi = u64_hi32(addr);
104
105         gk20a_dbg_info("pde pa=0x%llx addr_lo=0x%x addr_hi=0x%x",
106                    (u64)addr, addr_lo, addr_hi);
107
108         gk20a_mem_wr32(inst_ptr, ram_in_page_dir_base_lo_w(),
109                 ram_in_page_dir_base_target_vid_mem_f() |
110                 ram_in_page_dir_base_vol_true_f() |
111                 ram_in_page_dir_base_lo_f(addr_lo));
112
113         gk20a_mem_wr32(inst_ptr, ram_in_page_dir_base_hi_w(),
114                 ram_in_page_dir_base_hi_f(addr_hi));
115
116         gk20a_mem_wr32(inst_ptr, ram_in_adr_limit_lo_w(),
117                  u64_lo32(c->vm->va_limit) | 0xFFF);
118
119         gk20a_mem_wr32(inst_ptr, ram_in_adr_limit_hi_w(),
120                 ram_in_adr_limit_hi_f(u64_hi32(c->vm->va_limit)));
121
122         if (c->g->ops.mm.set_big_page_size)
123                 c->g->ops.mm.set_big_page_size(c->g, inst_ptr,
124                                                c->vm->gmmu_page_sizes[gmmu_page_size_big]);
125
126         return 0;
127 }
128
129 static int channel_gk20a_commit_userd(struct channel_gk20a *c)
130 {
131         u32 addr_lo;
132         u32 addr_hi;
133         void *inst_ptr;
134
135         gk20a_dbg_fn("");
136
137         inst_ptr = c->inst_block.cpuva;
138         if (!inst_ptr)
139                 return -ENOMEM;
140
141         addr_lo = u64_lo32(c->userd_iova >> ram_userd_base_shift_v());
142         addr_hi = u64_hi32(c->userd_iova);
143
144         gk20a_dbg_info("channel %d : set ramfc userd 0x%16llx",
145                 c->hw_chid, (u64)c->userd_iova);
146
147         gk20a_mem_wr32(inst_ptr, ram_in_ramfc_w() + ram_fc_userd_w(),
148                  pbdma_userd_target_vid_mem_f() |
149                  pbdma_userd_addr_f(addr_lo));
150
151         gk20a_mem_wr32(inst_ptr, ram_in_ramfc_w() + ram_fc_userd_hi_w(),
152                  pbdma_userd_target_vid_mem_f() |
153                  pbdma_userd_hi_addr_f(addr_hi));
154
155         return 0;
156 }
157
158 static int channel_gk20a_set_schedule_params(struct channel_gk20a *c,
159                                 u32 timeslice_timeout)
160 {
161         void *inst_ptr;
162         int shift = 3;
163         int value = timeslice_timeout;
164
165         inst_ptr = c->inst_block.cpuva;
166         if (!inst_ptr)
167                 return -ENOMEM;
168
169         /* disable channel */
170         c->g->ops.fifo.disable_channel(c);
171
172         /* preempt the channel */
173         WARN_ON(c->g->ops.fifo.preempt_channel(c->g, c->hw_chid));
174
175         /* value field is 8 bits long */
176         while (value >= 1 << 8) {
177                 value >>= 1;
178                 shift++;
179         }
180
181         /* time slice register is only 18bits long */
182         if ((value << shift) >= 1<<19) {
183                 pr_err("Requested timeslice value is clamped to 18 bits\n");
184                 value = 255;
185                 shift = 10;
186         }
187
188         /* set new timeslice */
189         gk20a_mem_wr32(inst_ptr, ram_fc_runlist_timeslice_w(),
190                 value | (shift << 12) |
191                 fifo_runlist_timeslice_enable_true_f());
192
193         /* enable channel */
194         gk20a_writel(c->g, ccsr_channel_r(c->hw_chid),
195                 gk20a_readl(c->g, ccsr_channel_r(c->hw_chid)) |
196                 ccsr_channel_enable_set_true_f());
197
198         return 0;
199 }
200
201 int channel_gk20a_setup_ramfc(struct channel_gk20a *c,
202                         u64 gpfifo_base, u32 gpfifo_entries)
203 {
204         void *inst_ptr;
205
206         gk20a_dbg_fn("");
207
208         inst_ptr = c->inst_block.cpuva;
209         if (!inst_ptr)
210                 return -ENOMEM;
211
212         memset(inst_ptr, 0, ram_fc_size_val_v());
213
214         gk20a_mem_wr32(inst_ptr, ram_fc_gp_base_w(),
215                 pbdma_gp_base_offset_f(
216                 u64_lo32(gpfifo_base >> pbdma_gp_base_rsvd_s())));
217
218         gk20a_mem_wr32(inst_ptr, ram_fc_gp_base_hi_w(),
219                 pbdma_gp_base_hi_offset_f(u64_hi32(gpfifo_base)) |
220                 pbdma_gp_base_hi_limit2_f(ilog2(gpfifo_entries)));
221
222         gk20a_mem_wr32(inst_ptr, ram_fc_signature_w(),
223                  pbdma_signature_hw_valid_f() | pbdma_signature_sw_zero_f());
224
225         gk20a_mem_wr32(inst_ptr, ram_fc_formats_w(),
226                 pbdma_formats_gp_fermi0_f() |
227                 pbdma_formats_pb_fermi1_f() |
228                 pbdma_formats_mp_fermi0_f());
229
230         gk20a_mem_wr32(inst_ptr, ram_fc_pb_header_w(),
231                 pbdma_pb_header_priv_user_f() |
232                 pbdma_pb_header_method_zero_f() |
233                 pbdma_pb_header_subchannel_zero_f() |
234                 pbdma_pb_header_level_main_f() |
235                 pbdma_pb_header_first_true_f() |
236                 pbdma_pb_header_type_inc_f());
237
238         gk20a_mem_wr32(inst_ptr, ram_fc_subdevice_w(),
239                 pbdma_subdevice_id_f(1) |
240                 pbdma_subdevice_status_active_f() |
241                 pbdma_subdevice_channel_dma_enable_f());
242
243         gk20a_mem_wr32(inst_ptr, ram_fc_target_w(), pbdma_target_engine_sw_f());
244
245         gk20a_mem_wr32(inst_ptr, ram_fc_acquire_w(),
246                 pbdma_acquire_retry_man_2_f() |
247                 pbdma_acquire_retry_exp_2_f() |
248                 pbdma_acquire_timeout_exp_max_f() |
249                 pbdma_acquire_timeout_man_max_f() |
250                 pbdma_acquire_timeout_en_disable_f());
251
252         gk20a_mem_wr32(inst_ptr, ram_fc_runlist_timeslice_w(),
253                 fifo_runlist_timeslice_timeout_128_f() |
254                 fifo_runlist_timeslice_timescale_3_f() |
255                 fifo_runlist_timeslice_enable_true_f());
256
257         gk20a_mem_wr32(inst_ptr, ram_fc_pb_timeslice_w(),
258                 fifo_pb_timeslice_timeout_16_f() |
259                 fifo_pb_timeslice_timescale_0_f() |
260                 fifo_pb_timeslice_enable_true_f());
261
262         gk20a_mem_wr32(inst_ptr, ram_fc_chid_w(), ram_fc_chid_id_f(c->hw_chid));
263
264         return channel_gk20a_commit_userd(c);
265 }
266
267 static int channel_gk20a_setup_userd(struct channel_gk20a *c)
268 {
269         BUG_ON(!c->userd_cpu_va);
270
271         gk20a_dbg_fn("");
272
273         gk20a_mem_wr32(c->userd_cpu_va, ram_userd_put_w(), 0);
274         gk20a_mem_wr32(c->userd_cpu_va, ram_userd_get_w(), 0);
275         gk20a_mem_wr32(c->userd_cpu_va, ram_userd_ref_w(), 0);
276         gk20a_mem_wr32(c->userd_cpu_va, ram_userd_put_hi_w(), 0);
277         gk20a_mem_wr32(c->userd_cpu_va, ram_userd_ref_threshold_w(), 0);
278         gk20a_mem_wr32(c->userd_cpu_va, ram_userd_gp_top_level_get_w(), 0);
279         gk20a_mem_wr32(c->userd_cpu_va, ram_userd_gp_top_level_get_hi_w(), 0);
280         gk20a_mem_wr32(c->userd_cpu_va, ram_userd_get_hi_w(), 0);
281         gk20a_mem_wr32(c->userd_cpu_va, ram_userd_gp_get_w(), 0);
282         gk20a_mem_wr32(c->userd_cpu_va, ram_userd_gp_put_w(), 0);
283
284         return 0;
285 }
286
287 static void channel_gk20a_bind(struct channel_gk20a *ch_gk20a)
288 {
289         struct gk20a *g = ch_gk20a->g;
290         struct fifo_gk20a *f = &g->fifo;
291         struct fifo_engine_info_gk20a *engine_info =
292                 f->engine_info + ENGINE_GR_GK20A;
293
294         u32 inst_ptr = ch_gk20a->inst_block.cpu_pa
295                 >> ram_in_base_shift_v();
296
297         gk20a_dbg_info("bind channel %d inst ptr 0x%08x",
298                 ch_gk20a->hw_chid, inst_ptr);
299
300         ch_gk20a->bound = true;
301
302         gk20a_writel(g, ccsr_channel_r(ch_gk20a->hw_chid),
303                 (gk20a_readl(g, ccsr_channel_r(ch_gk20a->hw_chid)) &
304                  ~ccsr_channel_runlist_f(~0)) |
305                  ccsr_channel_runlist_f(engine_info->runlist_id));
306
307         gk20a_writel(g, ccsr_channel_inst_r(ch_gk20a->hw_chid),
308                 ccsr_channel_inst_ptr_f(inst_ptr) |
309                 ccsr_channel_inst_target_vid_mem_f() |
310                 ccsr_channel_inst_bind_true_f());
311
312         gk20a_writel(g, ccsr_channel_r(ch_gk20a->hw_chid),
313                 (gk20a_readl(g, ccsr_channel_r(ch_gk20a->hw_chid)) &
314                  ~ccsr_channel_enable_set_f(~0)) |
315                  ccsr_channel_enable_set_true_f());
316 }
317
318 void channel_gk20a_unbind(struct channel_gk20a *ch_gk20a)
319 {
320         struct gk20a *g = ch_gk20a->g;
321
322         gk20a_dbg_fn("");
323
324         if (ch_gk20a->bound)
325                 gk20a_writel(g, ccsr_channel_inst_r(ch_gk20a->hw_chid),
326                         ccsr_channel_inst_ptr_f(0) |
327                         ccsr_channel_inst_bind_false_f());
328
329         ch_gk20a->bound = false;
330
331         /*
332          * if we are agrressive then we can destroy the syncpt
333          * resource at this point
334          * if not, then it will be destroyed at channel_free()
335          */
336         if (ch_gk20a->sync && ch_gk20a->sync->aggressive_destroy) {
337                 ch_gk20a->sync->destroy(ch_gk20a->sync);
338                 ch_gk20a->sync = NULL;
339         }
340 }
341
342 int channel_gk20a_alloc_inst(struct gk20a *g, struct channel_gk20a *ch)
343 {
344         int err;
345
346         gk20a_dbg_fn("");
347
348         err = gk20a_alloc_inst_block(g, &ch->inst_block);
349         if (err)
350                 return err;
351
352         gk20a_dbg_info("channel %d inst block physical addr: 0x%16llx",
353                 ch->hw_chid, (u64)ch->inst_block.cpu_pa);
354
355         gk20a_dbg_fn("done");
356         return 0;
357 }
358
359 void channel_gk20a_free_inst(struct gk20a *g, struct channel_gk20a *ch)
360 {
361         gk20a_free_inst_block(g, &ch->inst_block);
362 }
363
364 static int channel_gk20a_update_runlist(struct channel_gk20a *c, bool add)
365 {
366         return c->g->ops.fifo.update_runlist(c->g, 0, c->hw_chid, add, true);
367 }
368
369 void channel_gk20a_enable(struct channel_gk20a *ch)
370 {
371         /* enable channel */
372         gk20a_writel(ch->g, ccsr_channel_r(ch->hw_chid),
373                 gk20a_readl(ch->g, ccsr_channel_r(ch->hw_chid)) |
374                 ccsr_channel_enable_set_true_f());
375 }
376
377 void channel_gk20a_disable(struct channel_gk20a *ch)
378 {
379         /* disable channel */
380         gk20a_writel(ch->g, ccsr_channel_r(ch->hw_chid),
381                 gk20a_readl(ch->g,
382                         ccsr_channel_r(ch->hw_chid)) |
383                         ccsr_channel_enable_clr_true_f());
384 }
385
386 void gk20a_channel_abort(struct channel_gk20a *ch)
387 {
388         struct channel_gk20a_job *job, *n;
389         bool released_job_semaphore = false;
390
391         /* ensure no fences are pending */
392         mutex_lock(&ch->submit_lock);
393         if (ch->sync)
394                 ch->sync->set_min_eq_max(ch->sync);
395         mutex_unlock(&ch->submit_lock);
396
397         /* release all job semaphores (applies only to jobs that use
398            semaphore synchronization) */
399         mutex_lock(&ch->jobs_lock);
400         list_for_each_entry_safe(job, n, &ch->jobs, list) {
401                 if (job->post_fence->semaphore) {
402                         gk20a_semaphore_release(job->post_fence->semaphore);
403                         released_job_semaphore = true;
404                 }
405         }
406         mutex_unlock(&ch->jobs_lock);
407
408         ch->g->ops.fifo.disable_channel(ch);
409
410         if (released_job_semaphore) {
411                 wake_up_interruptible_all(&ch->semaphore_wq);
412                 gk20a_channel_update(ch, 0);
413         }
414 }
415
416 int gk20a_wait_channel_idle(struct channel_gk20a *ch)
417 {
418         bool channel_idle = false;
419         unsigned long end_jiffies = jiffies +
420                 msecs_to_jiffies(gk20a_get_gr_idle_timeout(ch->g));
421
422         do {
423                 mutex_lock(&ch->jobs_lock);
424                 channel_idle = list_empty(&ch->jobs);
425                 mutex_unlock(&ch->jobs_lock);
426                 if (channel_idle)
427                         break;
428
429                 usleep_range(1000, 3000);
430         } while (time_before(jiffies, end_jiffies)
431                         || !tegra_platform_is_silicon());
432
433         if (!channel_idle) {
434                 gk20a_err(dev_from_gk20a(ch->g), "jobs not freed for channel %d\n",
435                                 ch->hw_chid);
436                 return -EBUSY;
437         }
438
439         return 0;
440 }
441
442 void gk20a_disable_channel(struct channel_gk20a *ch,
443                            bool finish,
444                            unsigned long finish_timeout)
445 {
446         if (finish) {
447                 int err = gk20a_channel_finish(ch, finish_timeout);
448                 WARN_ON(err);
449         }
450
451         /* disable the channel from hw and increment syncpoints */
452         gk20a_channel_abort(ch);
453
454         gk20a_wait_channel_idle(ch);
455
456         /* preempt the channel */
457         ch->g->ops.fifo.preempt_channel(ch->g, ch->hw_chid);
458
459         /* remove channel from runlist */
460         channel_gk20a_update_runlist(ch, false);
461 }
462
463 #if defined(CONFIG_GK20A_CYCLE_STATS)
464
465 static void gk20a_free_cycle_stats_buffer(struct channel_gk20a *ch)
466 {
467         /* disable existing cyclestats buffer */
468         mutex_lock(&ch->cyclestate.cyclestate_buffer_mutex);
469         if (ch->cyclestate.cyclestate_buffer_handler) {
470                 dma_buf_vunmap(ch->cyclestate.cyclestate_buffer_handler,
471                                 ch->cyclestate.cyclestate_buffer);
472                 dma_buf_put(ch->cyclestate.cyclestate_buffer_handler);
473                 ch->cyclestate.cyclestate_buffer_handler = NULL;
474                 ch->cyclestate.cyclestate_buffer = NULL;
475                 ch->cyclestate.cyclestate_buffer_size = 0;
476         }
477         mutex_unlock(&ch->cyclestate.cyclestate_buffer_mutex);
478 }
479
480 static int gk20a_channel_cycle_stats(struct channel_gk20a *ch,
481                        struct nvgpu_cycle_stats_args *args)
482 {
483         struct dma_buf *dmabuf;
484         void *virtual_address;
485
486         if (args->dmabuf_fd && !ch->cyclestate.cyclestate_buffer_handler) {
487
488                 /* set up new cyclestats buffer */
489                 dmabuf = dma_buf_get(args->dmabuf_fd);
490                 if (IS_ERR(dmabuf))
491                         return PTR_ERR(dmabuf);
492                 virtual_address = dma_buf_vmap(dmabuf);
493                 if (!virtual_address)
494                         return -ENOMEM;
495
496                 ch->cyclestate.cyclestate_buffer_handler = dmabuf;
497                 ch->cyclestate.cyclestate_buffer = virtual_address;
498                 ch->cyclestate.cyclestate_buffer_size = dmabuf->size;
499                 return 0;
500
501         } else if (!args->dmabuf_fd &&
502                         ch->cyclestate.cyclestate_buffer_handler) {
503                 gk20a_free_cycle_stats_buffer(ch);
504                 return 0;
505
506         } else if (!args->dmabuf_fd &&
507                         !ch->cyclestate.cyclestate_buffer_handler) {
508                 /* no requst from GL */
509                 return 0;
510
511         } else {
512                 pr_err("channel already has cyclestats buffer\n");
513                 return -EINVAL;
514         }
515 }
516 #endif
517
518 static int gk20a_init_error_notifier(struct channel_gk20a *ch,
519                 struct nvgpu_set_error_notifier *args) {
520         void *va;
521
522         struct dma_buf *dmabuf;
523
524         if (!args->mem) {
525                 pr_err("gk20a_init_error_notifier: invalid memory handle\n");
526                 return -EINVAL;
527         }
528
529         dmabuf = dma_buf_get(args->mem);
530
531         if (ch->error_notifier_ref)
532                 gk20a_free_error_notifiers(ch);
533
534         if (IS_ERR(dmabuf)) {
535                 pr_err("Invalid handle: %d\n", args->mem);
536                 return -EINVAL;
537         }
538         /* map handle */
539         va = dma_buf_vmap(dmabuf);
540         if (!va) {
541                 dma_buf_put(dmabuf);
542                 pr_err("Cannot map notifier handle\n");
543                 return -ENOMEM;
544         }
545
546         /* set channel notifiers pointer */
547         ch->error_notifier_ref = dmabuf;
548         ch->error_notifier = va + args->offset;
549         ch->error_notifier_va = va;
550         memset(ch->error_notifier, 0, sizeof(struct nvgpu_notification));
551         return 0;
552 }
553
554 void gk20a_set_error_notifier(struct channel_gk20a *ch, __u32 error)
555 {
556         if (ch->error_notifier_ref) {
557                 struct timespec time_data;
558                 u64 nsec;
559                 getnstimeofday(&time_data);
560                 nsec = ((u64)time_data.tv_sec) * 1000000000u +
561                                 (u64)time_data.tv_nsec;
562                 ch->error_notifier->time_stamp.nanoseconds[0] =
563                                 (u32)nsec;
564                 ch->error_notifier->time_stamp.nanoseconds[1] =
565                                 (u32)(nsec >> 32);
566                 ch->error_notifier->info32 = error;
567                 ch->error_notifier->status = 0xffff;
568                 gk20a_err(dev_from_gk20a(ch->g),
569                     "error notifier set to %d for ch %d\n", error, ch->hw_chid);
570         }
571 }
572
573 static void gk20a_free_error_notifiers(struct channel_gk20a *ch)
574 {
575         if (ch->error_notifier_ref) {
576                 dma_buf_vunmap(ch->error_notifier_ref, ch->error_notifier_va);
577                 dma_buf_put(ch->error_notifier_ref);
578                 ch->error_notifier_ref = NULL;
579                 ch->error_notifier = NULL;
580                 ch->error_notifier_va = NULL;
581         }
582 }
583
584 void gk20a_free_channel(struct channel_gk20a *ch, bool finish)
585 {
586         struct gk20a *g = ch->g;
587         struct device *d = dev_from_gk20a(g);
588         struct fifo_gk20a *f = &g->fifo;
589         struct gr_gk20a *gr = &g->gr;
590         struct vm_gk20a *ch_vm = ch->vm;
591         unsigned long timeout = gk20a_get_gr_idle_timeout(g);
592         struct dbg_session_gk20a *dbg_s;
593
594         gk20a_dbg_fn("");
595
596         /* if engine reset was deferred, perform it now */
597         mutex_lock(&f->deferred_reset_mutex);
598         if (g->fifo.deferred_reset_pending) {
599                 gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, "engine reset was"
600                            " deferred, running now");
601                 fifo_gk20a_finish_mmu_fault_handling(g, g->fifo.mmu_fault_engines);
602                 g->fifo.mmu_fault_engines = 0;
603                 g->fifo.deferred_reset_pending = false;
604         }
605         mutex_unlock(&f->deferred_reset_mutex);
606
607         if (!ch->bound)
608                 return;
609
610         if (!gk20a_channel_as_bound(ch))
611                 goto unbind;
612
613         gk20a_dbg_info("freeing bound channel context, timeout=%ld",
614                         timeout);
615
616         gk20a_disable_channel(ch, finish && !ch->has_timedout, timeout);
617
618         gk20a_free_error_notifiers(ch);
619
620         /* release channel ctx */
621         g->ops.gr.free_channel_ctx(ch);
622
623         gk20a_gr_flush_channel_tlb(gr);
624
625         memset(&ch->ramfc, 0, sizeof(struct mem_desc_sub));
626
627         /* free gpfifo */
628         if (ch->gpfifo.gpu_va)
629                 gk20a_gmmu_unmap(ch_vm, ch->gpfifo.gpu_va,
630                         ch->gpfifo.size, gk20a_mem_flag_none);
631         if (ch->gpfifo.cpu_va)
632                 dma_free_coherent(d, ch->gpfifo.size,
633                         ch->gpfifo.cpu_va, ch->gpfifo.iova);
634         ch->gpfifo.cpu_va = NULL;
635         ch->gpfifo.iova = 0;
636
637         memset(&ch->gpfifo, 0, sizeof(struct gpfifo_desc));
638
639 #if defined(CONFIG_GK20A_CYCLE_STATS)
640         gk20a_free_cycle_stats_buffer(ch);
641 #endif
642
643         channel_gk20a_free_priv_cmdbuf(ch);
644
645         /* sync must be destroyed before releasing channel vm */
646         if (ch->sync) {
647                 ch->sync->destroy(ch->sync);
648                 ch->sync = NULL;
649         }
650
651         /* release channel binding to the as_share */
652         if (ch_vm->as_share)
653                 gk20a_as_release_share(ch_vm->as_share);
654         else
655                 gk20a_vm_put(ch_vm);
656
657         spin_lock(&ch->update_fn_lock);
658         ch->update_fn = NULL;
659         ch->update_fn_data = NULL;
660         spin_unlock(&ch->update_fn_lock);
661         cancel_work_sync(&ch->update_fn_work);
662
663 unbind:
664         if (gk20a_is_channel_marked_as_tsg(ch))
665                 gk20a_tsg_unbind_channel(ch);
666
667         g->ops.fifo.unbind_channel(ch);
668         g->ops.fifo.free_inst(g, ch);
669
670         ch->vpr = false;
671         ch->vm = NULL;
672
673         mutex_lock(&ch->submit_lock);
674         gk20a_fence_put(ch->last_submit.pre_fence);
675         gk20a_fence_put(ch->last_submit.post_fence);
676         ch->last_submit.pre_fence = NULL;
677         ch->last_submit.post_fence = NULL;
678         mutex_unlock(&ch->submit_lock);
679         WARN_ON(ch->sync);
680
681         /* unlink all debug sessions */
682         mutex_lock(&ch->dbg_s_lock);
683
684         list_for_each_entry(dbg_s, &ch->dbg_s_list, dbg_s_list_node) {
685                 dbg_s->ch = NULL;
686                 list_del_init(&dbg_s->dbg_s_list_node);
687         }
688
689         mutex_unlock(&ch->dbg_s_lock);
690
691         /* ALWAYS last */
692         release_used_channel(f, ch);
693 }
694
695 int gk20a_channel_release(struct inode *inode, struct file *filp)
696 {
697         struct channel_gk20a *ch = (struct channel_gk20a *)filp->private_data;
698         struct gk20a *g = ch ? ch->g : NULL;
699         int err;
700
701         if (!ch)
702                 return 0;
703
704         trace_gk20a_channel_release(dev_name(&g->dev->dev));
705
706         err = gk20a_busy(ch->g->dev);
707         if (err) {
708                 gk20a_err(dev_from_gk20a(g), "failed to release channel %d",
709                         ch->hw_chid);
710                 return err;
711         }
712         gk20a_free_channel(ch, true);
713         gk20a_idle(ch->g->dev);
714
715         filp->private_data = NULL;
716         return 0;
717 }
718
719 static void gk20a_channel_update_runcb_fn(struct work_struct *work)
720 {
721         struct channel_gk20a *ch =
722                 container_of(work, struct channel_gk20a, update_fn_work);
723         void (*update_fn)(struct channel_gk20a *, void *);
724         void *update_fn_data;
725
726         spin_lock(&ch->update_fn_lock);
727         update_fn = ch->update_fn;
728         update_fn_data = ch->update_fn_data;
729         spin_unlock(&ch->update_fn_lock);
730
731         if (update_fn)
732                 update_fn(ch, update_fn_data);
733 }
734
735 struct channel_gk20a *gk20a_open_new_channel_with_cb(struct gk20a *g,
736                 void (*update_fn)(struct channel_gk20a *, void *),
737                 void *update_fn_data)
738 {
739         struct channel_gk20a *ch = gk20a_open_new_channel(g);
740
741         if (ch) {
742                 spin_lock(&ch->update_fn_lock);
743                 ch->update_fn = update_fn;
744                 ch->update_fn_data = update_fn_data;
745                 spin_unlock(&ch->update_fn_lock);
746         }
747
748         return ch;
749 }
750
751 struct channel_gk20a *gk20a_open_new_channel(struct gk20a *g)
752 {
753         struct fifo_gk20a *f = &g->fifo;
754         struct channel_gk20a *ch;
755
756         ch = acquire_unused_channel(f);
757         if (ch == NULL) {
758                 /* TBD: we want to make this virtualizable */
759                 gk20a_err(dev_from_gk20a(g), "out of hw chids");
760                 return NULL;
761         }
762
763         ch->g = g;
764
765         if (g->ops.fifo.alloc_inst(g, ch)) {
766                 ch->in_use = false;
767                 gk20a_err(dev_from_gk20a(g),
768                            "failed to open gk20a channel, out of inst mem");
769
770                 return NULL;
771         }
772         g->ops.fifo.bind_channel(ch);
773         ch->pid = current->pid;
774
775         /* By default, channel is regular (non-TSG) channel */
776         ch->tsgid = NVGPU_INVALID_TSG_ID;
777
778         /* reset timeout counter and update timestamp */
779         ch->timeout_accumulated_ms = 0;
780         ch->timeout_gpfifo_get = 0;
781         /* set gr host default timeout */
782         ch->timeout_ms_max = gk20a_get_gr_idle_timeout(g);
783         ch->timeout_debug_dump = true;
784         ch->has_timedout = false;
785         ch->obj_class = 0;
786
787         /* The channel is *not* runnable at this point. It still needs to have
788          * an address space bound and allocate a gpfifo and grctx. */
789
790         init_waitqueue_head(&ch->notifier_wq);
791         init_waitqueue_head(&ch->semaphore_wq);
792         init_waitqueue_head(&ch->submit_wq);
793
794         mutex_init(&ch->poll_events.lock);
795         ch->poll_events.events_enabled = false;
796         ch->poll_events.num_pending_events = 0;
797
798         ch->update_fn = NULL;
799         ch->update_fn_data = NULL;
800         spin_lock_init(&ch->update_fn_lock);
801         INIT_WORK(&ch->update_fn_work, gk20a_channel_update_runcb_fn);
802
803         return ch;
804 }
805
806 static int __gk20a_channel_open(struct gk20a *g, struct file *filp)
807 {
808         int err;
809         struct channel_gk20a *ch;
810
811         trace_gk20a_channel_open(dev_name(&g->dev->dev));
812
813         err = gk20a_busy(g->dev);
814         if (err) {
815                 gk20a_err(dev_from_gk20a(g), "failed to power on, %d", err);
816                 return err;
817         }
818         ch = gk20a_open_new_channel(g);
819         gk20a_idle(g->dev);
820         if (!ch) {
821                 gk20a_err(dev_from_gk20a(g),
822                         "failed to get f");
823                 return -ENOMEM;
824         }
825
826         filp->private_data = ch;
827         return 0;
828 }
829
830 int gk20a_channel_open(struct inode *inode, struct file *filp)
831 {
832         struct gk20a *g = container_of(inode->i_cdev,
833                         struct gk20a, channel.cdev);
834         int ret;
835
836         gk20a_dbg_fn("start");
837         ret = __gk20a_channel_open(g, filp);
838
839         gk20a_dbg_fn("end");
840         return ret;
841 }
842
843 /* allocate private cmd buffer.
844    used for inserting commands before/after user submitted buffers. */
845 static int channel_gk20a_alloc_priv_cmdbuf(struct channel_gk20a *c)
846 {
847         struct device *d = dev_from_gk20a(c->g);
848         struct vm_gk20a *ch_vm = c->vm;
849         struct priv_cmd_queue *q = &c->priv_cmd_q;
850         struct priv_cmd_entry *e;
851         u32 i = 0, size;
852         int err = 0;
853         struct sg_table *sgt;
854         dma_addr_t iova;
855
856         /* Kernel can insert gpfifos before and after user gpfifos.
857            Before user gpfifos, kernel inserts fence_wait, which takes
858            syncpoint_a (2 dwords) + syncpoint_b (2 dwords) = 4 dwords.
859            After user gpfifos, kernel inserts fence_get, which takes
860            wfi (2 dwords) + syncpoint_a (2 dwords) + syncpoint_b (2 dwords)
861            = 6 dwords.
862            Worse case if kernel adds both of them for every user gpfifo,
863            max size of priv_cmdbuf is :
864            (gpfifo entry number * (2 / 3) * (4 + 6) * 4 bytes */
865         size = roundup_pow_of_two(
866                 c->gpfifo.entry_num * 2 * 10 * sizeof(u32) / 3);
867
868         q->mem.base_cpuva = dma_alloc_coherent(d, size,
869                                         &iova,
870                                         GFP_KERNEL);
871         if (!q->mem.base_cpuva) {
872                 gk20a_err(d, "%s: memory allocation failed\n", __func__);
873                 err = -ENOMEM;
874                 goto clean_up;
875         }
876
877         q->mem.base_iova = iova;
878         q->mem.size = size;
879
880         err = gk20a_get_sgtable(d, &sgt,
881                         q->mem.base_cpuva, q->mem.base_iova, size);
882         if (err) {
883                 gk20a_err(d, "%s: failed to create sg table\n", __func__);
884                 goto clean_up;
885         }
886
887         memset(q->mem.base_cpuva, 0, size);
888
889         q->base_gpuva = gk20a_gmmu_map(ch_vm, &sgt,
890                                         size,
891                                         0, /* flags */
892                                         gk20a_mem_flag_none);
893         if (!q->base_gpuva) {
894                 gk20a_err(d, "ch %d : failed to map gpu va"
895                            "for priv cmd buffer", c->hw_chid);
896                 err = -ENOMEM;
897                 goto clean_up_sgt;
898         }
899
900         q->size = q->mem.size / sizeof (u32);
901
902         INIT_LIST_HEAD(&q->head);
903         INIT_LIST_HEAD(&q->free);
904
905         /* pre-alloc 25% of priv cmdbuf entries and put them on free list */
906         for (i = 0; i < q->size / 4; i++) {
907                 e = kzalloc(sizeof(struct priv_cmd_entry), GFP_KERNEL);
908                 if (!e) {
909                         gk20a_err(d, "ch %d: fail to pre-alloc cmd entry",
910                                 c->hw_chid);
911                         err = -ENOMEM;
912                         goto clean_up_sgt;
913                 }
914                 e->pre_alloc = true;
915                 list_add(&e->list, &q->free);
916         }
917
918         gk20a_free_sgtable(&sgt);
919
920         return 0;
921
922 clean_up_sgt:
923         gk20a_free_sgtable(&sgt);
924 clean_up:
925         channel_gk20a_free_priv_cmdbuf(c);
926         return err;
927 }
928
929 static void channel_gk20a_free_priv_cmdbuf(struct channel_gk20a *c)
930 {
931         struct device *d = dev_from_gk20a(c->g);
932         struct vm_gk20a *ch_vm = c->vm;
933         struct priv_cmd_queue *q = &c->priv_cmd_q;
934         struct priv_cmd_entry *e;
935         struct list_head *pos, *tmp, *head;
936
937         if (q->size == 0)
938                 return;
939
940         if (q->base_gpuva)
941                 gk20a_gmmu_unmap(ch_vm, q->base_gpuva,
942                                 q->mem.size, gk20a_mem_flag_none);
943         if (q->mem.base_cpuva)
944                 dma_free_coherent(d, q->mem.size,
945                         q->mem.base_cpuva, q->mem.base_iova);
946         q->mem.base_cpuva = NULL;
947         q->mem.base_iova = 0;
948
949         /* free used list */
950         head = &q->head;
951         list_for_each_safe(pos, tmp, head) {
952                 e = container_of(pos, struct priv_cmd_entry, list);
953                 free_priv_cmdbuf(c, e);
954         }
955
956         /* free free list */
957         head = &q->free;
958         list_for_each_safe(pos, tmp, head) {
959                 e = container_of(pos, struct priv_cmd_entry, list);
960                 e->pre_alloc = false;
961                 free_priv_cmdbuf(c, e);
962         }
963
964         memset(q, 0, sizeof(struct priv_cmd_queue));
965 }
966
967 /* allocate a cmd buffer with given size. size is number of u32 entries */
968 int gk20a_channel_alloc_priv_cmdbuf(struct channel_gk20a *c, u32 orig_size,
969                              struct priv_cmd_entry **entry)
970 {
971         struct priv_cmd_queue *q = &c->priv_cmd_q;
972         struct priv_cmd_entry *e;
973         struct list_head *node;
974         u32 free_count;
975         u32 size = orig_size;
976         bool no_retry = false;
977
978         gk20a_dbg_fn("size %d", orig_size);
979
980         *entry = NULL;
981
982         /* if free space in the end is less than requested, increase the size
983          * to make the real allocated space start from beginning. */
984         if (q->put + size > q->size)
985                 size = orig_size + (q->size - q->put);
986
987         gk20a_dbg_info("ch %d: priv cmd queue get:put %d:%d",
988                         c->hw_chid, q->get, q->put);
989
990 TRY_AGAIN:
991         free_count = (q->size - (q->put - q->get) - 1) % q->size;
992
993         if (size > free_count) {
994                 if (!no_retry) {
995                         recycle_priv_cmdbuf(c);
996                         no_retry = true;
997                         goto TRY_AGAIN;
998                 } else
999                         return -EAGAIN;
1000         }
1001
1002         if (unlikely(list_empty(&q->free))) {
1003
1004                 gk20a_dbg_info("ch %d: run out of pre-alloc entries",
1005                         c->hw_chid);
1006
1007                 e = kzalloc(sizeof(struct priv_cmd_entry), GFP_KERNEL);
1008                 if (!e) {
1009                         gk20a_err(dev_from_gk20a(c->g),
1010                                 "ch %d: fail to allocate priv cmd entry",
1011                                 c->hw_chid);
1012                         return -ENOMEM;
1013                 }
1014         } else  {
1015                 node = q->free.next;
1016                 list_del(node);
1017                 e = container_of(node, struct priv_cmd_entry, list);
1018         }
1019
1020         e->size = orig_size;
1021         e->gp_get = c->gpfifo.get;
1022         e->gp_put = c->gpfifo.put;
1023         e->gp_wrap = c->gpfifo.wrap;
1024
1025         /* if we have increased size to skip free space in the end, set put
1026            to beginning of cmd buffer (0) + size */
1027         if (size != orig_size) {
1028                 e->ptr = q->mem.base_cpuva;
1029                 e->gva = q->base_gpuva;
1030                 q->put = orig_size;
1031         } else {
1032                 e->ptr = q->mem.base_cpuva + q->put;
1033                 e->gva = q->base_gpuva + q->put * sizeof(u32);
1034                 q->put = (q->put + orig_size) & (q->size - 1);
1035         }
1036
1037         /* we already handled q->put + size > q->size so BUG_ON this */
1038         BUG_ON(q->put > q->size);
1039
1040         /* add new entry to head since we free from head */
1041         list_add(&e->list, &q->head);
1042
1043         *entry = e;
1044
1045         gk20a_dbg_fn("done");
1046
1047         return 0;
1048 }
1049
1050 /* Don't call this to free an explict cmd entry.
1051  * It doesn't update priv_cmd_queue get/put */
1052 static void free_priv_cmdbuf(struct channel_gk20a *c,
1053                              struct priv_cmd_entry *e)
1054 {
1055         struct priv_cmd_queue *q = &c->priv_cmd_q;
1056
1057         if (!e)
1058                 return;
1059
1060         list_del(&e->list);
1061
1062         if (unlikely(!e->pre_alloc))
1063                 kfree(e);
1064         else {
1065                 memset(e, 0, sizeof(struct priv_cmd_entry));
1066                 e->pre_alloc = true;
1067                 list_add(&e->list, &q->free);
1068         }
1069 }
1070
1071 /* free entries if they're no longer being used */
1072 static void recycle_priv_cmdbuf(struct channel_gk20a *c)
1073 {
1074         struct priv_cmd_queue *q = &c->priv_cmd_q;
1075         struct priv_cmd_entry *e, *tmp;
1076         struct list_head *head = &q->head;
1077         bool wrap_around, found = false;
1078
1079         gk20a_dbg_fn("");
1080
1081         /* Find the most recent free entry. Free it and everything before it */
1082         list_for_each_entry(e, head, list) {
1083
1084                 gk20a_dbg_info("ch %d: cmd entry get:put:wrap %d:%d:%d "
1085                         "curr get:put:wrap %d:%d:%d",
1086                         c->hw_chid, e->gp_get, e->gp_put, e->gp_wrap,
1087                         c->gpfifo.get, c->gpfifo.put, c->gpfifo.wrap);
1088
1089                 wrap_around = (c->gpfifo.wrap != e->gp_wrap);
1090                 if (e->gp_get < e->gp_put) {
1091                         if (c->gpfifo.get >= e->gp_put ||
1092                             wrap_around) {
1093                                 found = true;
1094                                 break;
1095                         } else
1096                                 e->gp_get = c->gpfifo.get;
1097                 } else if (e->gp_get > e->gp_put) {
1098                         if (wrap_around &&
1099                             c->gpfifo.get >= e->gp_put) {
1100                                 found = true;
1101                                 break;
1102                         } else
1103                                 e->gp_get = c->gpfifo.get;
1104                 }
1105         }
1106
1107         if (found)
1108                 q->get = (e->ptr - q->mem.base_cpuva) + e->size;
1109         else {
1110                 gk20a_dbg_info("no free entry recycled");
1111                 return;
1112         }
1113
1114         list_for_each_entry_safe_continue(e, tmp, head, list) {
1115                 free_priv_cmdbuf(c, e);
1116         }
1117
1118         gk20a_dbg_fn("done");
1119 }
1120
1121
1122 int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c,
1123                 struct nvgpu_alloc_gpfifo_args *args)
1124 {
1125         struct gk20a *g = c->g;
1126         struct device *d = dev_from_gk20a(g);
1127         struct vm_gk20a *ch_vm;
1128         u32 gpfifo_size;
1129         int err = 0;
1130         struct sg_table *sgt;
1131         dma_addr_t iova;
1132
1133         /* Kernel can insert one extra gpfifo entry before user submitted gpfifos
1134            and another one after, for internal usage. Triple the requested size. */
1135         gpfifo_size = roundup_pow_of_two(args->num_entries * 3);
1136
1137         if (args->flags & NVGPU_ALLOC_GPFIFO_FLAGS_VPR_ENABLED)
1138                 c->vpr = true;
1139
1140         /* an address space needs to have been bound at this point. */
1141         if (!gk20a_channel_as_bound(c)) {
1142                 gk20a_err(d,
1143                             "not bound to an address space at time of gpfifo"
1144                             " allocation.");
1145                 return -EINVAL;
1146         }
1147         ch_vm = c->vm;
1148
1149         c->cmds_pending = false;
1150         mutex_lock(&c->submit_lock);
1151         gk20a_fence_put(c->last_submit.pre_fence);
1152         gk20a_fence_put(c->last_submit.post_fence);
1153         c->last_submit.pre_fence = NULL;
1154         c->last_submit.post_fence = NULL;
1155         mutex_unlock(&c->submit_lock);
1156
1157         c->ramfc.offset = 0;
1158         c->ramfc.size = ram_in_ramfc_s() / 8;
1159
1160         if (c->gpfifo.cpu_va) {
1161                 gk20a_err(d, "channel %d :"
1162                            "gpfifo already allocated", c->hw_chid);
1163                 return -EEXIST;
1164         }
1165
1166         c->gpfifo.size = gpfifo_size * sizeof(struct gpfifo);
1167         c->gpfifo.cpu_va = (struct gpfifo *)dma_alloc_coherent(d,
1168                                                 c->gpfifo.size,
1169                                                 &iova,
1170                                                 GFP_KERNEL);
1171         if (!c->gpfifo.cpu_va) {
1172                 gk20a_err(d, "%s: memory allocation failed\n", __func__);
1173                 err = -ENOMEM;
1174                 goto clean_up;
1175         }
1176
1177         c->gpfifo.iova = iova;
1178         c->gpfifo.entry_num = gpfifo_size;
1179
1180         c->gpfifo.get = c->gpfifo.put = 0;
1181
1182         err = gk20a_get_sgtable(d, &sgt,
1183                         c->gpfifo.cpu_va, c->gpfifo.iova, c->gpfifo.size);
1184         if (err) {
1185                 gk20a_err(d, "%s: failed to allocate sg table\n", __func__);
1186                 goto clean_up;
1187         }
1188
1189         c->gpfifo.gpu_va = gk20a_gmmu_map(ch_vm,
1190                                         &sgt,
1191                                         c->gpfifo.size,
1192                                         0, /* flags */
1193                                         gk20a_mem_flag_none);
1194         if (!c->gpfifo.gpu_va) {
1195                 gk20a_err(d, "channel %d : failed to map"
1196                            " gpu_va for gpfifo", c->hw_chid);
1197                 err = -ENOMEM;
1198                 goto clean_up_sgt;
1199         }
1200
1201         gk20a_dbg_info("channel %d : gpfifo_base 0x%016llx, size %d",
1202                 c->hw_chid, c->gpfifo.gpu_va, c->gpfifo.entry_num);
1203
1204         channel_gk20a_setup_userd(c);
1205
1206         err = g->ops.fifo.setup_ramfc(c, c->gpfifo.gpu_va, c->gpfifo.entry_num);
1207         if (err)
1208                 goto clean_up_unmap;
1209
1210         /* TBD: setup engine contexts */
1211
1212         err = channel_gk20a_alloc_priv_cmdbuf(c);
1213         if (err)
1214                 goto clean_up_unmap;
1215
1216         err = channel_gk20a_update_runlist(c, true);
1217         if (err)
1218                 goto clean_up_unmap;
1219
1220         gk20a_free_sgtable(&sgt);
1221
1222         gk20a_dbg_fn("done");
1223         return 0;
1224
1225 clean_up_unmap:
1226         gk20a_gmmu_unmap(ch_vm, c->gpfifo.gpu_va,
1227                 c->gpfifo.size, gk20a_mem_flag_none);
1228 clean_up_sgt:
1229         gk20a_free_sgtable(&sgt);
1230 clean_up:
1231         dma_free_coherent(d, c->gpfifo.size,
1232                 c->gpfifo.cpu_va, c->gpfifo.iova);
1233         c->gpfifo.cpu_va = NULL;
1234         c->gpfifo.iova = 0;
1235         memset(&c->gpfifo, 0, sizeof(struct gpfifo_desc));
1236         gk20a_err(d, "fail");
1237         return err;
1238 }
1239
1240 static inline bool check_gp_put(struct gk20a *g,
1241                                 struct channel_gk20a *c)
1242 {
1243         u32 put;
1244         /* gp_put changed unexpectedly since last update? */
1245         put = gk20a_bar1_readl(g,
1246                c->userd_gpu_va + 4 * ram_userd_gp_put_w());
1247         if (c->gpfifo.put != put) {
1248                 /*TBD: BUG_ON/teardown on this*/
1249                 gk20a_err(dev_from_gk20a(g), "gp_put changed unexpectedly "
1250                            "since last update");
1251                 c->gpfifo.put = put;
1252                 return false; /* surprise! */
1253         }
1254         return true; /* checked out ok */
1255 }
1256
1257 /* Update with this periodically to determine how the gpfifo is draining. */
1258 static inline u32 update_gp_get(struct gk20a *g,
1259                                 struct channel_gk20a *c)
1260 {
1261         u32 new_get = gk20a_bar1_readl(g,
1262                 c->userd_gpu_va + sizeof(u32) * ram_userd_gp_get_w());
1263         if (new_get < c->gpfifo.get)
1264                 c->gpfifo.wrap = !c->gpfifo.wrap;
1265         c->gpfifo.get = new_get;
1266         return new_get;
1267 }
1268
1269 static inline u32 gp_free_count(struct channel_gk20a *c)
1270 {
1271         return (c->gpfifo.entry_num - (c->gpfifo.put - c->gpfifo.get) - 1) %
1272                 c->gpfifo.entry_num;
1273 }
1274
1275 bool gk20a_channel_update_and_check_timeout(struct channel_gk20a *ch,
1276                 u32 timeout_delta_ms)
1277 {
1278         u32 gpfifo_get = update_gp_get(ch->g, ch);
1279         /* Count consequent timeout isr */
1280         if (gpfifo_get == ch->timeout_gpfifo_get) {
1281                 /* we didn't advance since previous channel timeout check */
1282                 ch->timeout_accumulated_ms += timeout_delta_ms;
1283         } else {
1284                 /* first timeout isr encountered */
1285                 ch->timeout_accumulated_ms = timeout_delta_ms;
1286         }
1287
1288         ch->timeout_gpfifo_get = gpfifo_get;
1289
1290         return ch->g->timeouts_enabled &&
1291                 ch->timeout_accumulated_ms > ch->timeout_ms_max;
1292 }
1293
1294
1295 /* Issue a syncpoint increment *preceded* by a wait-for-idle
1296  * command.  All commands on the channel will have been
1297  * consumed at the time the fence syncpoint increment occurs.
1298  */
1299 static int gk20a_channel_submit_wfi(struct channel_gk20a *c)
1300 {
1301         struct priv_cmd_entry *cmd = NULL;
1302         struct gk20a *g = c->g;
1303         u32 free_count;
1304         int err;
1305
1306         if (c->has_timedout)
1307                 return -ETIMEDOUT;
1308
1309         update_gp_get(g, c);
1310         free_count = gp_free_count(c);
1311         if (unlikely(!free_count)) {
1312                 gk20a_err(dev_from_gk20a(g),
1313                            "not enough gpfifo space");
1314                 return -EAGAIN;
1315         }
1316
1317         mutex_lock(&c->submit_lock);
1318
1319         if (!c->sync) {
1320                 c->sync = gk20a_channel_sync_create(c);
1321                 if (!c->sync) {
1322                         mutex_unlock(&c->submit_lock);
1323                         return -ENOMEM;
1324                 }
1325         }
1326
1327         gk20a_fence_put(c->last_submit.pre_fence);
1328         gk20a_fence_put(c->last_submit.post_fence);
1329         c->last_submit.pre_fence = NULL;
1330         c->last_submit.post_fence = NULL;
1331
1332         err = c->sync->incr_wfi(c->sync, &cmd, &c->last_submit.post_fence);
1333         if (unlikely(err)) {
1334                 mutex_unlock(&c->submit_lock);
1335                 return err;
1336         }
1337
1338         WARN_ON(!c->last_submit.post_fence->wfi);
1339
1340         c->gpfifo.cpu_va[c->gpfifo.put].entry0 = u64_lo32(cmd->gva);
1341         c->gpfifo.cpu_va[c->gpfifo.put].entry1 = u64_hi32(cmd->gva) |
1342                 pbdma_gp_entry1_length_f(cmd->size);
1343
1344         c->gpfifo.put = (c->gpfifo.put + 1) & (c->gpfifo.entry_num - 1);
1345
1346         /* save gp_put */
1347         cmd->gp_put = c->gpfifo.put;
1348
1349         gk20a_bar1_writel(g,
1350                 c->userd_gpu_va + 4 * ram_userd_gp_put_w(),
1351                 c->gpfifo.put);
1352
1353         mutex_unlock(&c->submit_lock);
1354
1355         gk20a_dbg_info("post-submit put %d, get %d, size %d",
1356                 c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num);
1357
1358         return 0;
1359 }
1360
1361 static u32 get_gp_free_count(struct channel_gk20a *c)
1362 {
1363         update_gp_get(c->g, c);
1364         return gp_free_count(c);
1365 }
1366
1367 static void trace_write_pushbuffer(struct channel_gk20a *c, struct gpfifo *g)
1368 {
1369         void *mem = NULL;
1370         unsigned int words;
1371         u64 offset;
1372         struct dma_buf *dmabuf = NULL;
1373
1374         if (gk20a_debug_trace_cmdbuf) {
1375                 u64 gpu_va = (u64)g->entry0 |
1376                         (u64)((u64)pbdma_gp_entry1_get_hi_v(g->entry1) << 32);
1377                 int err;
1378
1379                 words = pbdma_gp_entry1_length_v(g->entry1);
1380                 err = gk20a_vm_find_buffer(c->vm, gpu_va, &dmabuf, &offset);
1381                 if (!err)
1382                         mem = dma_buf_vmap(dmabuf);
1383         }
1384
1385         if (mem) {
1386                 u32 i;
1387                 /*
1388                  * Write in batches of 128 as there seems to be a limit
1389                  * of how much you can output to ftrace at once.
1390                  */
1391                 for (i = 0; i < words; i += 128U) {
1392                         trace_gk20a_push_cmdbuf(
1393                                 c->g->dev->name,
1394                                 0,
1395                                 min(words - i, 128U),
1396                                 offset + i * sizeof(u32),
1397                                 mem);
1398                 }
1399                 dma_buf_vunmap(dmabuf, mem);
1400         }
1401 }
1402
1403 static int gk20a_channel_add_job(struct channel_gk20a *c,
1404                                  struct gk20a_fence *pre_fence,
1405                                  struct gk20a_fence *post_fence)
1406 {
1407         struct vm_gk20a *vm = c->vm;
1408         struct channel_gk20a_job *job = NULL;
1409         struct mapped_buffer_node **mapped_buffers = NULL;
1410         int err = 0, num_mapped_buffers;
1411
1412         /* job needs reference to this vm */
1413         gk20a_vm_get(vm);
1414
1415         err = gk20a_vm_get_buffers(vm, &mapped_buffers, &num_mapped_buffers);
1416         if (err) {
1417                 gk20a_vm_put(vm);
1418                 return err;
1419         }
1420
1421         job = kzalloc(sizeof(*job), GFP_KERNEL);
1422         if (!job) {
1423                 gk20a_vm_put_buffers(vm, mapped_buffers, num_mapped_buffers);
1424                 gk20a_vm_put(vm);
1425                 return -ENOMEM;
1426         }
1427
1428         job->num_mapped_buffers = num_mapped_buffers;
1429         job->mapped_buffers = mapped_buffers;
1430         job->pre_fence = gk20a_fence_get(pre_fence);
1431         job->post_fence = gk20a_fence_get(post_fence);
1432
1433         mutex_lock(&c->jobs_lock);
1434         list_add_tail(&job->list, &c->jobs);
1435         mutex_unlock(&c->jobs_lock);
1436
1437         return 0;
1438 }
1439
1440 void gk20a_channel_update(struct channel_gk20a *c, int nr_completed)
1441 {
1442         struct vm_gk20a *vm = c->vm;
1443         struct channel_gk20a_job *job, *n;
1444
1445         trace_gk20a_channel_update(c);
1446
1447         wake_up(&c->submit_wq);
1448
1449         mutex_lock(&c->submit_lock);
1450         mutex_lock(&c->jobs_lock);
1451         list_for_each_entry_safe(job, n, &c->jobs, list) {
1452                 bool completed = gk20a_fence_is_expired(job->post_fence);
1453                 if (!completed)
1454                         break;
1455
1456                 c->sync->signal_timeline(c->sync);
1457
1458                 gk20a_vm_put_buffers(vm, job->mapped_buffers,
1459                                 job->num_mapped_buffers);
1460
1461                 /* Close the fences (this will unref the semaphores and release
1462                  * them to the pool). */
1463                 gk20a_fence_put(job->pre_fence);
1464                 gk20a_fence_put(job->post_fence);
1465
1466                 /* job is done. release its reference to vm */
1467                 gk20a_vm_put(vm);
1468
1469                 list_del_init(&job->list);
1470                 kfree(job);
1471                 gk20a_idle(c->g->dev);
1472         }
1473
1474         /*
1475          * If job list is empty then channel is idle and we can free
1476          * the syncpt here (given aggressive_destroy flag is set)
1477          * Note: check if last submit is complete before destroying
1478          * the sync resource
1479          */
1480         if (list_empty(&c->jobs)) {
1481                 if (c->sync && c->sync->aggressive_destroy &&
1482                           gk20a_fence_is_expired(c->last_submit.post_fence)) {
1483                         c->sync->destroy(c->sync);
1484                         c->sync = NULL;
1485                 }
1486         }
1487         mutex_unlock(&c->jobs_lock);
1488         mutex_unlock(&c->submit_lock);
1489
1490         if (c->update_fn)
1491                 schedule_work(&c->update_fn_work);
1492 }
1493
1494 int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
1495                                 struct nvgpu_gpfifo *gpfifo,
1496                                 u32 num_entries,
1497                                 u32 flags,
1498                                 struct nvgpu_fence *fence,
1499                                 struct gk20a_fence **fence_out)
1500 {
1501         struct gk20a *g = c->g;
1502         struct device *d = dev_from_gk20a(g);
1503         int err = 0;
1504         int i;
1505         int wait_fence_fd = -1;
1506         struct priv_cmd_entry *wait_cmd = NULL;
1507         struct priv_cmd_entry *incr_cmd = NULL;
1508         struct gk20a_fence *pre_fence = NULL;
1509         struct gk20a_fence *post_fence = NULL;
1510         /* we might need two extra gpfifo entries - one for pre fence
1511          * and one for post fence. */
1512         const int extra_entries = 2;
1513         bool need_wfi = !(flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SUPPRESS_WFI);
1514
1515         if (c->has_timedout)
1516                 return -ETIMEDOUT;
1517
1518         /* fifo not large enough for request. Return error immediately */
1519         if (c->gpfifo.entry_num < num_entries) {
1520                 gk20a_err(d, "not enough gpfifo space allocated");
1521                 return -ENOMEM;
1522         }
1523
1524         if ((flags & (NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT |
1525                       NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET)) &&
1526             !fence)
1527                 return -EINVAL;
1528
1529         /* an address space needs to have been bound at this point. */
1530         if (!gk20a_channel_as_bound(c)) {
1531                 gk20a_err(d,
1532                             "not bound to an address space at time of gpfifo"
1533                             " submission.");
1534                 return -EINVAL;
1535         }
1536
1537 #ifdef CONFIG_DEBUG_FS
1538         /* update debug settings */
1539         if (g->ops.ltc.sync_debugfs)
1540                 g->ops.ltc.sync_debugfs(g);
1541 #endif
1542
1543         gk20a_dbg_info("channel %d", c->hw_chid);
1544
1545         /* gk20a_channel_update releases this ref. */
1546         err = gk20a_busy(g->dev);
1547         if (err) {
1548                 gk20a_err(d, "failed to host gk20a to submit gpfifo");
1549                 return err;
1550         }
1551
1552         trace_gk20a_channel_submit_gpfifo(c->g->dev->name,
1553                                           c->hw_chid,
1554                                           num_entries,
1555                                           flags,
1556                                           fence ? fence->id : 0,
1557                                           fence ? fence->value : 0);
1558         check_gp_put(g, c);
1559         update_gp_get(g, c);
1560
1561         gk20a_dbg_info("pre-submit put %d, get %d, size %d",
1562                 c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num);
1563
1564         /* Invalidate tlb if it's dirty...                                   */
1565         /* TBD: this should be done in the cmd stream, not with PRIs.        */
1566         /* We don't know what context is currently running...                */
1567         /* Note also: there can be more than one context associated with the */
1568         /* address space (vm).   */
1569         g->ops.mm.tlb_invalidate(c->vm);
1570
1571         /* Make sure we have enough space for gpfifo entries. If not,
1572          * wait for signals from completed submits */
1573         if (gp_free_count(c) < num_entries + extra_entries) {
1574                 err = wait_event_interruptible(c->submit_wq,
1575                         get_gp_free_count(c) >= num_entries + extra_entries ||
1576                         c->has_timedout);
1577         }
1578
1579         if (c->has_timedout) {
1580                 err = -ETIMEDOUT;
1581                 goto clean_up;
1582         }
1583
1584         if (err) {
1585                 gk20a_err(d, "timeout waiting for gpfifo space");
1586                 err = -EAGAIN;
1587                 goto clean_up;
1588         }
1589
1590         mutex_lock(&c->submit_lock);
1591
1592         if (!c->sync) {
1593                 c->sync = gk20a_channel_sync_create(c);
1594                 if (!c->sync) {
1595                         err = -ENOMEM;
1596                         mutex_unlock(&c->submit_lock);
1597                         goto clean_up;
1598                 }
1599         }
1600
1601         /*
1602          * optionally insert syncpt wait in the beginning of gpfifo submission
1603          * when user requested and the wait hasn't expired.
1604          * validate that the id makes sense, elide if not
1605          * the only reason this isn't being unceremoniously killed is to
1606          * keep running some tests which trigger this condition
1607          */
1608         if (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT) {
1609                 if (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE) {
1610                         wait_fence_fd = fence->id;
1611                         err = c->sync->wait_fd(c->sync, wait_fence_fd,
1612                                         &wait_cmd, &pre_fence);
1613                 } else {
1614                         err = c->sync->wait_syncpt(c->sync, fence->id,
1615                                         fence->value, &wait_cmd, &pre_fence);
1616                 }
1617         }
1618         if (err) {
1619                 mutex_unlock(&c->submit_lock);
1620                 goto clean_up;
1621         }
1622
1623
1624         /* always insert syncpt increment at end of gpfifo submission
1625            to keep track of method completion for idle railgating */
1626         if (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET)
1627                 err = c->sync->incr_user(c->sync, wait_fence_fd, &incr_cmd,
1628                                          &post_fence, need_wfi);
1629         else
1630                 err = c->sync->incr(c->sync, &incr_cmd,
1631                                     &post_fence);
1632         if (err) {
1633                 mutex_unlock(&c->submit_lock);
1634                 goto clean_up;
1635         }
1636
1637         if (wait_cmd) {
1638                 c->gpfifo.cpu_va[c->gpfifo.put].entry0 =
1639                         u64_lo32(wait_cmd->gva);
1640                 c->gpfifo.cpu_va[c->gpfifo.put].entry1 =
1641                         u64_hi32(wait_cmd->gva) |
1642                         pbdma_gp_entry1_length_f(wait_cmd->size);
1643                 trace_gk20a_push_cmdbuf(c->g->dev->name,
1644                         0, wait_cmd->size, 0, wait_cmd->ptr);
1645
1646                 c->gpfifo.put = (c->gpfifo.put + 1) &
1647                         (c->gpfifo.entry_num - 1);
1648
1649                 /* save gp_put */
1650                 wait_cmd->gp_put = c->gpfifo.put;
1651         }
1652
1653         for (i = 0; i < num_entries; i++) {
1654                 c->gpfifo.cpu_va[c->gpfifo.put].entry0 =
1655                         gpfifo[i].entry0; /* cmd buf va low 32 */
1656                 c->gpfifo.cpu_va[c->gpfifo.put].entry1 =
1657                         gpfifo[i].entry1; /* cmd buf va high 32 | words << 10 */
1658                 trace_write_pushbuffer(c, &c->gpfifo.cpu_va[c->gpfifo.put]);
1659                 c->gpfifo.put = (c->gpfifo.put + 1) &
1660                         (c->gpfifo.entry_num - 1);
1661         }
1662
1663         if (incr_cmd) {
1664                 c->gpfifo.cpu_va[c->gpfifo.put].entry0 =
1665                         u64_lo32(incr_cmd->gva);
1666                 c->gpfifo.cpu_va[c->gpfifo.put].entry1 =
1667                         u64_hi32(incr_cmd->gva) |
1668                         pbdma_gp_entry1_length_f(incr_cmd->size);
1669                 trace_gk20a_push_cmdbuf(c->g->dev->name,
1670                         0, incr_cmd->size, 0, incr_cmd->ptr);
1671
1672                 c->gpfifo.put = (c->gpfifo.put + 1) &
1673                         (c->gpfifo.entry_num - 1);
1674
1675                 /* save gp_put */
1676                 incr_cmd->gp_put = c->gpfifo.put;
1677         }
1678
1679         gk20a_fence_put(c->last_submit.pre_fence);
1680         gk20a_fence_put(c->last_submit.post_fence);
1681         c->last_submit.pre_fence = pre_fence;
1682         c->last_submit.post_fence = post_fence;
1683         if (fence_out)
1684                 *fence_out = gk20a_fence_get(post_fence);
1685
1686         /* TODO! Check for errors... */
1687         gk20a_channel_add_job(c, pre_fence, post_fence);
1688
1689         c->cmds_pending = true;
1690         gk20a_bar1_writel(g,
1691                 c->userd_gpu_va + 4 * ram_userd_gp_put_w(),
1692                 c->gpfifo.put);
1693
1694         mutex_unlock(&c->submit_lock);
1695
1696         trace_gk20a_channel_submitted_gpfifo(c->g->dev->name,
1697                                              c->hw_chid,
1698                                              num_entries,
1699                                              flags,
1700                                              post_fence->syncpt_id,
1701                                              post_fence->syncpt_value);
1702
1703         gk20a_dbg_info("post-submit put %d, get %d, size %d",
1704                 c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num);
1705
1706         gk20a_dbg_fn("done");
1707         return err;
1708
1709 clean_up:
1710         gk20a_err(d, "fail");
1711         free_priv_cmdbuf(c, wait_cmd);
1712         free_priv_cmdbuf(c, incr_cmd);
1713         gk20a_fence_put(pre_fence);
1714         gk20a_fence_put(post_fence);
1715         gk20a_idle(g->dev);
1716         return err;
1717 }
1718
1719 int gk20a_init_channel_support(struct gk20a *g, u32 chid)
1720 {
1721         struct channel_gk20a *c = g->fifo.channel+chid;
1722         c->g = g;
1723         c->in_use = false;
1724         c->hw_chid = chid;
1725         c->bound = false;
1726         mutex_init(&c->jobs_lock);
1727         mutex_init(&c->submit_lock);
1728         INIT_LIST_HEAD(&c->jobs);
1729 #if defined(CONFIG_GK20A_CYCLE_STATS)
1730         mutex_init(&c->cyclestate.cyclestate_buffer_mutex);
1731 #endif
1732         INIT_LIST_HEAD(&c->dbg_s_list);
1733         mutex_init(&c->dbg_s_lock);
1734
1735         return 0;
1736 }
1737
1738 int gk20a_channel_finish(struct channel_gk20a *ch, unsigned long timeout)
1739 {
1740         int err = 0;
1741         struct gk20a_fence *fence = ch->last_submit.post_fence;
1742
1743         if (!ch->cmds_pending)
1744                 return 0;
1745
1746         /* Do not wait for a timedout channel */
1747         if (ch->has_timedout)
1748                 return -ETIMEDOUT;
1749
1750         if (!(fence && fence->wfi) && ch->obj_class != KEPLER_C) {
1751                 gk20a_dbg_fn("issuing wfi, incr to finish the channel");
1752                 err = gk20a_channel_submit_wfi(ch);
1753                 fence = ch->last_submit.post_fence;
1754         }
1755         if (err)
1756                 return err;
1757
1758         BUG_ON(!(fence && fence->wfi) && ch->obj_class != KEPLER_C);
1759
1760         gk20a_dbg_fn("waiting for channel to finish thresh:%d sema:%p",
1761                      fence->syncpt_value, fence->semaphore);
1762
1763         err = gk20a_fence_wait(fence, timeout);
1764         if (WARN_ON(err))
1765                 dev_warn(dev_from_gk20a(ch->g),
1766                        "timed out waiting for gk20a channel to finish");
1767         else
1768                 ch->cmds_pending = false;
1769
1770         return err;
1771 }
1772
1773 static int gk20a_channel_wait_semaphore(struct channel_gk20a *ch,
1774                                         ulong id, u32 offset,
1775                                         u32 payload, long timeout)
1776 {
1777         struct platform_device *pdev = ch->g->dev;
1778         struct dma_buf *dmabuf;
1779         void *data;
1780         u32 *semaphore;
1781         int ret = 0;
1782         long remain;
1783
1784         /* do not wait if channel has timed out */
1785         if (ch->has_timedout)
1786                 return -ETIMEDOUT;
1787
1788         dmabuf = dma_buf_get(id);
1789         if (IS_ERR(dmabuf)) {
1790                 gk20a_err(&pdev->dev, "invalid notifier nvmap handle 0x%lx",
1791                            id);
1792                 return -EINVAL;
1793         }
1794
1795         data = dma_buf_kmap(dmabuf, offset >> PAGE_SHIFT);
1796         if (!data) {
1797                 gk20a_err(&pdev->dev, "failed to map notifier memory");
1798                 ret = -EINVAL;
1799                 goto cleanup_put;
1800         }
1801
1802         semaphore = data + (offset & ~PAGE_MASK);
1803
1804         remain = wait_event_interruptible_timeout(
1805                         ch->semaphore_wq,
1806                         *semaphore == payload || ch->has_timedout,
1807                         timeout);
1808
1809         if (remain == 0 && *semaphore != payload)
1810                 ret = -ETIMEDOUT;
1811         else if (remain < 0)
1812                 ret = remain;
1813
1814         dma_buf_kunmap(dmabuf, offset >> PAGE_SHIFT, data);
1815 cleanup_put:
1816         dma_buf_put(dmabuf);
1817         return ret;
1818 }
1819
1820 static int gk20a_channel_wait(struct channel_gk20a *ch,
1821                               struct nvgpu_wait_args *args)
1822 {
1823         struct device *d = dev_from_gk20a(ch->g);
1824         struct dma_buf *dmabuf;
1825         struct notification *notif;
1826         struct timespec tv;
1827         u64 jiffies;
1828         ulong id;
1829         u32 offset;
1830         unsigned long timeout;
1831         int remain, ret = 0;
1832
1833         gk20a_dbg_fn("");
1834
1835         if (ch->has_timedout)
1836                 return -ETIMEDOUT;
1837
1838         if (args->timeout == NVGPU_NO_TIMEOUT)
1839                 timeout = MAX_SCHEDULE_TIMEOUT;
1840         else
1841                 timeout = (u32)msecs_to_jiffies(args->timeout);
1842
1843         switch (args->type) {
1844         case NVGPU_WAIT_TYPE_NOTIFIER:
1845                 id = args->condition.notifier.dmabuf_fd;
1846                 offset = args->condition.notifier.offset;
1847
1848                 dmabuf = dma_buf_get(id);
1849                 if (IS_ERR(dmabuf)) {
1850                         gk20a_err(d, "invalid notifier nvmap handle 0x%lx",
1851                                    id);
1852                         return -EINVAL;
1853                 }
1854
1855                 notif = dma_buf_vmap(dmabuf);
1856                 if (!notif) {
1857                         gk20a_err(d, "failed to map notifier memory");
1858                         return -ENOMEM;
1859                 }
1860
1861                 notif = (struct notification *)((uintptr_t)notif + offset);
1862
1863                 /* user should set status pending before
1864                  * calling this ioctl */
1865                 remain = wait_event_interruptible_timeout(
1866                                 ch->notifier_wq,
1867                                 notif->status == 0 || ch->has_timedout,
1868                                 timeout);
1869
1870                 if (remain == 0 && notif->status != 0) {
1871                         ret = -ETIMEDOUT;
1872                         goto notif_clean_up;
1873                 } else if (remain < 0) {
1874                         ret = -EINTR;
1875                         goto notif_clean_up;
1876                 }
1877
1878                 /* TBD: fill in correct information */
1879                 jiffies = get_jiffies_64();
1880                 jiffies_to_timespec(jiffies, &tv);
1881                 notif->timestamp.nanoseconds[0] = tv.tv_nsec;
1882                 notif->timestamp.nanoseconds[1] = tv.tv_sec;
1883                 notif->info32 = 0xDEADBEEF; /* should be object name */
1884                 notif->info16 = ch->hw_chid; /* should be method offset */
1885
1886 notif_clean_up:
1887                 dma_buf_vunmap(dmabuf, notif);
1888                 return ret;
1889
1890         case NVGPU_WAIT_TYPE_SEMAPHORE:
1891                 ret = gk20a_channel_wait_semaphore(ch,
1892                                 args->condition.semaphore.dmabuf_fd,
1893                                 args->condition.semaphore.offset,
1894                                 args->condition.semaphore.payload,
1895                                 timeout);
1896
1897                 break;
1898
1899         default:
1900                 ret = -EINVAL;
1901                 break;
1902         }
1903
1904         return ret;
1905 }
1906
1907 /* poll events for semaphores */
1908
1909 static void gk20a_channel_events_enable(struct channel_gk20a_poll_events *ev)
1910 {
1911         gk20a_dbg_fn("");
1912
1913         mutex_lock(&ev->lock);
1914
1915         ev->events_enabled = true;
1916         ev->num_pending_events = 0;
1917
1918         mutex_unlock(&ev->lock);
1919 }
1920
1921 static void gk20a_channel_events_disable(struct channel_gk20a_poll_events *ev)
1922 {
1923         gk20a_dbg_fn("");
1924
1925         mutex_lock(&ev->lock);
1926
1927         ev->events_enabled = false;
1928         ev->num_pending_events = 0;
1929
1930         mutex_unlock(&ev->lock);
1931 }
1932
1933 static void gk20a_channel_events_clear(struct channel_gk20a_poll_events *ev)
1934 {
1935         gk20a_dbg_fn("");
1936
1937         mutex_lock(&ev->lock);
1938
1939         if (ev->events_enabled &&
1940                         ev->num_pending_events > 0)
1941                 ev->num_pending_events--;
1942
1943         mutex_unlock(&ev->lock);
1944 }
1945
1946 static int gk20a_channel_events_ctrl(struct channel_gk20a *ch,
1947                           struct nvgpu_channel_events_ctrl_args *args)
1948 {
1949         int ret = 0;
1950
1951         gk20a_dbg(gpu_dbg_fn | gpu_dbg_info,
1952                         "channel events ctrl cmd %d", args->cmd);
1953
1954         switch (args->cmd) {
1955         case NVGPU_IOCTL_CHANNEL_EVENTS_CTRL_CMD_ENABLE:
1956                 gk20a_channel_events_enable(&ch->poll_events);
1957                 break;
1958
1959         case NVGPU_IOCTL_CHANNEL_EVENTS_CTRL_CMD_DISABLE:
1960                 gk20a_channel_events_disable(&ch->poll_events);
1961                 break;
1962
1963         case NVGPU_IOCTL_CHANNEL_EVENTS_CTRL_CMD_CLEAR:
1964                 gk20a_channel_events_clear(&ch->poll_events);
1965                 break;
1966
1967         default:
1968                 gk20a_err(dev_from_gk20a(ch->g),
1969                            "unrecognized channel events ctrl cmd: 0x%x",
1970                            args->cmd);
1971                 ret = -EINVAL;
1972                 break;
1973         }
1974
1975         return ret;
1976 }
1977
1978 void gk20a_channel_event(struct channel_gk20a *ch)
1979 {
1980         mutex_lock(&ch->poll_events.lock);
1981
1982         if (ch->poll_events.events_enabled) {
1983                 gk20a_dbg_info("posting event on channel id %d",
1984                                 ch->hw_chid);
1985                 gk20a_dbg_info("%d channel events pending",
1986                                 ch->poll_events.num_pending_events);
1987
1988                 ch->poll_events.num_pending_events++;
1989                 /* not waking up here, caller does that */
1990         }
1991
1992         mutex_unlock(&ch->poll_events.lock);
1993 }
1994
1995 unsigned int gk20a_channel_poll(struct file *filep, poll_table *wait)
1996 {
1997         unsigned int mask = 0;
1998         struct channel_gk20a *ch = filep->private_data;
1999
2000         gk20a_dbg(gpu_dbg_fn | gpu_dbg_info, "");
2001
2002         poll_wait(filep, &ch->semaphore_wq, wait);
2003
2004         mutex_lock(&ch->poll_events.lock);
2005
2006         if (ch->poll_events.events_enabled &&
2007                         ch->poll_events.num_pending_events > 0) {
2008                 gk20a_dbg_info("found pending event on channel id %d",
2009                                 ch->hw_chid);
2010                 gk20a_dbg_info("%d channel events pending",
2011                                 ch->poll_events.num_pending_events);
2012                 mask = (POLLPRI | POLLIN);
2013         }
2014
2015         mutex_unlock(&ch->poll_events.lock);
2016
2017         return mask;
2018 }
2019
2020 static int gk20a_channel_set_priority(struct channel_gk20a *ch,
2021                 u32 priority)
2022 {
2023         u32 timeslice_timeout;
2024         /* set priority of graphics channel */
2025         switch (priority) {
2026         case NVGPU_PRIORITY_LOW:
2027                 /* 64 << 3 = 512us */
2028                 timeslice_timeout = 64;
2029                 break;
2030         case NVGPU_PRIORITY_MEDIUM:
2031                 /* 128 << 3 = 1024us */
2032                 timeslice_timeout = 128;
2033                 break;
2034         case NVGPU_PRIORITY_HIGH:
2035                 /* 255 << 3 = 2048us */
2036                 timeslice_timeout = 255;
2037                 break;
2038         default:
2039                 pr_err("Unsupported priority");
2040                 return -EINVAL;
2041         }
2042         channel_gk20a_set_schedule_params(ch,
2043                         timeslice_timeout);
2044         return 0;
2045 }
2046
2047 static int gk20a_channel_zcull_bind(struct channel_gk20a *ch,
2048                             struct nvgpu_zcull_bind_args *args)
2049 {
2050         struct gk20a *g = ch->g;
2051         struct gr_gk20a *gr = &g->gr;
2052
2053         gk20a_dbg_fn("");
2054
2055         return g->ops.gr.bind_ctxsw_zcull(g, gr, ch,
2056                                 args->gpu_va, args->mode);
2057 }
2058
2059 /* in this context the "channel" is the host1x channel which
2060  * maps to *all* gk20a channels */
2061 int gk20a_channel_suspend(struct gk20a *g)
2062 {
2063         struct fifo_gk20a *f = &g->fifo;
2064         u32 chid;
2065         bool channels_in_use = false;
2066         int err;
2067
2068         gk20a_dbg_fn("");
2069
2070         /* wait for engine idle */
2071         err = g->ops.fifo.wait_engine_idle(g);
2072         if (err)
2073                 return err;
2074
2075         for (chid = 0; chid < f->num_channels; chid++) {
2076                 struct channel_gk20a *ch = &f->channel[chid];
2077                 if (ch->in_use) {
2078
2079                         gk20a_dbg_info("suspend channel %d", chid);
2080                         /* disable channel */
2081                         g->ops.fifo.disable_channel(ch);
2082                         /* preempt the channel */
2083                         g->ops.fifo.preempt_channel(g, chid);
2084                         /* wait for channel update notifiers */
2085                         if (ch->update_fn &&
2086                                         work_pending(&ch->update_fn_work))
2087                                 flush_work(&ch->update_fn_work);
2088
2089                         channels_in_use = true;
2090                 }
2091         }
2092
2093         if (channels_in_use) {
2094                 g->ops.fifo.update_runlist(g, 0, ~0, false, true);
2095
2096                 for (chid = 0; chid < f->num_channels; chid++) {
2097                         if (f->channel[chid].in_use)
2098                                 g->ops.fifo.unbind_channel(&f->channel[chid]);
2099                 }
2100         }
2101
2102         gk20a_dbg_fn("done");
2103         return 0;
2104 }
2105
2106 /* in this context the "channel" is the host1x channel which
2107  * maps to *all* gk20a channels */
2108 int gk20a_channel_resume(struct gk20a *g)
2109 {
2110         struct fifo_gk20a *f = &g->fifo;
2111         u32 chid;
2112         bool channels_in_use = false;
2113
2114         gk20a_dbg_fn("");
2115
2116         for (chid = 0; chid < f->num_channels; chid++) {
2117                 if (f->channel[chid].in_use) {
2118                         gk20a_dbg_info("resume channel %d", chid);
2119                         g->ops.fifo.bind_channel(&f->channel[chid]);
2120                         channels_in_use = true;
2121                 }
2122         }
2123
2124         if (channels_in_use)
2125                 g->ops.fifo.update_runlist(g, 0, ~0, true, true);
2126
2127         gk20a_dbg_fn("done");
2128         return 0;
2129 }
2130
2131 void gk20a_channel_semaphore_wakeup(struct gk20a *g)
2132 {
2133         struct fifo_gk20a *f = &g->fifo;
2134         u32 chid;
2135
2136         gk20a_dbg_fn("");
2137
2138         for (chid = 0; chid < f->num_channels; chid++) {
2139                 struct channel_gk20a *c = g->fifo.channel+chid;
2140                 if (c->in_use) {
2141                         wake_up_interruptible_all(&c->semaphore_wq);
2142                         gk20a_channel_update(c, 0);
2143                 }
2144         }
2145 }
2146
2147 static int gk20a_ioctl_channel_submit_gpfifo(
2148         struct channel_gk20a *ch,
2149         struct nvgpu_submit_gpfifo_args *args)
2150 {
2151         struct gk20a_fence *fence_out;
2152         void *gpfifo;
2153         u32 size;
2154         int ret = 0;
2155
2156         gk20a_dbg_fn("");
2157
2158         if (ch->has_timedout)
2159                 return -ETIMEDOUT;
2160
2161         size = args->num_entries * sizeof(struct nvgpu_gpfifo);
2162
2163         gpfifo = kzalloc(size, GFP_KERNEL);
2164         if (!gpfifo)
2165                 return -ENOMEM;
2166
2167         if (copy_from_user(gpfifo,
2168                            (void __user *)(uintptr_t)args->gpfifo, size)) {
2169                 ret = -EINVAL;
2170                 goto clean_up;
2171         }
2172
2173         ret = gk20a_submit_channel_gpfifo(ch, gpfifo, args->num_entries,
2174                                           args->flags, &args->fence,
2175                                           &fence_out);
2176
2177         if (ret)
2178                 goto clean_up;
2179
2180         /* Convert fence_out to something we can pass back to user space. */
2181         if (args->flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET) {
2182                 if (args->flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE) {
2183                         int fd = gk20a_fence_install_fd(fence_out);
2184                         if (fd < 0)
2185                                 ret = fd;
2186                         else
2187                                 args->fence.id = fd;
2188                 } else {
2189                         args->fence.id = fence_out->syncpt_id;
2190                         args->fence.value = fence_out->syncpt_value;
2191                 }
2192         }
2193         gk20a_fence_put(fence_out);
2194
2195 clean_up:
2196         kfree(gpfifo);
2197         return ret;
2198 }
2199
2200 void gk20a_init_channel(struct gpu_ops *gops)
2201 {
2202         gops->fifo.bind_channel = channel_gk20a_bind;
2203         gops->fifo.unbind_channel = channel_gk20a_unbind;
2204         gops->fifo.disable_channel = channel_gk20a_disable;
2205         gops->fifo.alloc_inst = channel_gk20a_alloc_inst;
2206         gops->fifo.free_inst = channel_gk20a_free_inst;
2207         gops->fifo.setup_ramfc = channel_gk20a_setup_ramfc;
2208 }
2209
2210 long gk20a_channel_ioctl(struct file *filp,
2211         unsigned int cmd, unsigned long arg)
2212 {
2213         struct channel_gk20a *ch = filp->private_data;
2214         struct platform_device *dev = ch->g->dev;
2215         u8 buf[NVGPU_IOCTL_CHANNEL_MAX_ARG_SIZE];
2216         int err = 0;
2217
2218         gk20a_dbg_fn("start %d", _IOC_NR(cmd));
2219
2220         if ((_IOC_TYPE(cmd) != NVGPU_IOCTL_MAGIC) ||
2221                 (_IOC_NR(cmd) == 0) ||
2222                 (_IOC_NR(cmd) > NVGPU_IOCTL_CHANNEL_LAST) ||
2223                 (_IOC_SIZE(cmd) > NVGPU_IOCTL_CHANNEL_MAX_ARG_SIZE))
2224                 return -EINVAL;
2225
2226         if (_IOC_DIR(cmd) & _IOC_WRITE) {
2227                 if (copy_from_user(buf, (void __user *)arg, _IOC_SIZE(cmd)))
2228                         return -EFAULT;
2229         }
2230
2231         switch (cmd) {
2232         case NVGPU_IOCTL_CHANNEL_OPEN:
2233         {
2234                 int fd;
2235                 struct file *file;
2236                 char *name;
2237
2238                 err = get_unused_fd_flags(O_RDWR);
2239                 if (err < 0)
2240                         break;
2241                 fd = err;
2242
2243                 name = kasprintf(GFP_KERNEL, "nvhost-%s-fd%d",
2244                                 dev_name(&dev->dev), fd);
2245                 if (!name) {
2246                         err = -ENOMEM;
2247                         put_unused_fd(fd);
2248                         break;
2249                 }
2250
2251                 file = anon_inode_getfile(name, filp->f_op, NULL, O_RDWR);
2252                 kfree(name);
2253                 if (IS_ERR(file)) {
2254                         err = PTR_ERR(file);
2255                         put_unused_fd(fd);
2256                         break;
2257                 }
2258                 fd_install(fd, file);
2259
2260                 err = __gk20a_channel_open(ch->g, file);
2261                 if (err) {
2262                         put_unused_fd(fd);
2263                         fput(file);
2264                         break;
2265                 }
2266
2267                 ((struct nvgpu_channel_open_args *)buf)->channel_fd = fd;
2268                 break;
2269         }
2270         case NVGPU_IOCTL_CHANNEL_SET_NVMAP_FD:
2271                 break;
2272         case NVGPU_IOCTL_CHANNEL_ALLOC_OBJ_CTX:
2273                 err = gk20a_busy(dev);
2274                 if (err) {
2275                         dev_err(&dev->dev,
2276                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2277                                 __func__, cmd);
2278                         return err;
2279                 }
2280                 err = ch->g->ops.gr.alloc_obj_ctx(ch,
2281                                 (struct nvgpu_alloc_obj_ctx_args *)buf);
2282                 gk20a_idle(dev);
2283                 break;
2284         case NVGPU_IOCTL_CHANNEL_FREE_OBJ_CTX:
2285                 err = gk20a_busy(dev);
2286                 if (err) {
2287                         dev_err(&dev->dev,
2288                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2289                                 __func__, cmd);
2290                         return err;
2291                 }
2292                 err = ch->g->ops.gr.free_obj_ctx(ch,
2293                                 (struct nvgpu_free_obj_ctx_args *)buf);
2294                 gk20a_idle(dev);
2295                 break;
2296         case NVGPU_IOCTL_CHANNEL_ALLOC_GPFIFO:
2297                 err = gk20a_busy(dev);
2298                 if (err) {
2299                         dev_err(&dev->dev,
2300                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2301                                 __func__, cmd);
2302                         return err;
2303                 }
2304                 err = gk20a_alloc_channel_gpfifo(ch,
2305                                 (struct nvgpu_alloc_gpfifo_args *)buf);
2306                 gk20a_idle(dev);
2307                 break;
2308         case NVGPU_IOCTL_CHANNEL_SUBMIT_GPFIFO:
2309                 err = gk20a_ioctl_channel_submit_gpfifo(ch,
2310                                 (struct nvgpu_submit_gpfifo_args *)buf);
2311                 break;
2312         case NVGPU_IOCTL_CHANNEL_WAIT:
2313                 err = gk20a_busy(dev);
2314                 if (err) {
2315                         dev_err(&dev->dev,
2316                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2317                                 __func__, cmd);
2318                         return err;
2319                 }
2320                 err = gk20a_channel_wait(ch,
2321                                 (struct nvgpu_wait_args *)buf);
2322                 gk20a_idle(dev);
2323                 break;
2324         case NVGPU_IOCTL_CHANNEL_ZCULL_BIND:
2325                 err = gk20a_busy(dev);
2326                 if (err) {
2327                         dev_err(&dev->dev,
2328                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2329                                 __func__, cmd);
2330                         return err;
2331                 }
2332                 err = gk20a_channel_zcull_bind(ch,
2333                                 (struct nvgpu_zcull_bind_args *)buf);
2334                 gk20a_idle(dev);
2335                 break;
2336         case NVGPU_IOCTL_CHANNEL_SET_ERROR_NOTIFIER:
2337                 err = gk20a_busy(dev);
2338                 if (err) {
2339                         dev_err(&dev->dev,
2340                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2341                                 __func__, cmd);
2342                         return err;
2343                 }
2344                 err = gk20a_init_error_notifier(ch,
2345                                 (struct nvgpu_set_error_notifier *)buf);
2346                 gk20a_idle(dev);
2347                 break;
2348 #ifdef CONFIG_GK20A_CYCLE_STATS
2349         case NVGPU_IOCTL_CHANNEL_CYCLE_STATS:
2350                 err = gk20a_busy(dev);
2351                 if (err) {
2352                         dev_err(&dev->dev,
2353                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2354                                 __func__, cmd);
2355                         return err;
2356                 }
2357                 err = gk20a_channel_cycle_stats(ch,
2358                                 (struct nvgpu_cycle_stats_args *)buf);
2359                 gk20a_idle(dev);
2360                 break;
2361 #endif
2362         case NVGPU_IOCTL_CHANNEL_SET_TIMEOUT:
2363         {
2364                 u32 timeout =
2365                         (u32)((struct nvgpu_set_timeout_args *)buf)->timeout;
2366                 gk20a_dbg(gpu_dbg_gpu_dbg, "setting timeout (%d ms) for chid %d",
2367                            timeout, ch->hw_chid);
2368                 ch->timeout_ms_max = timeout;
2369                 break;
2370         }
2371         case NVGPU_IOCTL_CHANNEL_SET_TIMEOUT_EX:
2372         {
2373                 u32 timeout =
2374                         (u32)((struct nvgpu_set_timeout_args *)buf)->timeout;
2375                 bool timeout_debug_dump = !((u32)
2376                         ((struct nvgpu_set_timeout_ex_args *)buf)->flags &
2377                         (1 << NVGPU_TIMEOUT_FLAG_DISABLE_DUMP));
2378                 gk20a_dbg(gpu_dbg_gpu_dbg, "setting timeout (%d ms) for chid %d",
2379                            timeout, ch->hw_chid);
2380                 ch->timeout_ms_max = timeout;
2381                 ch->timeout_debug_dump = timeout_debug_dump;
2382                 break;
2383         }
2384         case NVGPU_IOCTL_CHANNEL_GET_TIMEDOUT:
2385                 ((struct nvgpu_get_param_args *)buf)->value =
2386                         ch->has_timedout;
2387                 break;
2388         case NVGPU_IOCTL_CHANNEL_SET_PRIORITY:
2389                 err = gk20a_busy(dev);
2390                 if (err) {
2391                         dev_err(&dev->dev,
2392                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2393                                 __func__, cmd);
2394                         return err;
2395                 }
2396                 gk20a_channel_set_priority(ch,
2397                         ((struct nvgpu_set_priority_args *)buf)->priority);
2398                 gk20a_idle(dev);
2399                 break;
2400         case NVGPU_IOCTL_CHANNEL_ENABLE:
2401                 err = gk20a_busy(dev);
2402                 if (err) {
2403                         dev_err(&dev->dev,
2404                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2405                                 __func__, cmd);
2406                         return err;
2407                 }
2408                 /* enable channel */
2409                 gk20a_writel(ch->g, ccsr_channel_r(ch->hw_chid),
2410                         gk20a_readl(ch->g, ccsr_channel_r(ch->hw_chid)) |
2411                         ccsr_channel_enable_set_true_f());
2412                 gk20a_idle(dev);
2413                 break;
2414         case NVGPU_IOCTL_CHANNEL_DISABLE:
2415                 err = gk20a_busy(dev);
2416                 if (err) {
2417                         dev_err(&dev->dev,
2418                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2419                                 __func__, cmd);
2420                         return err;
2421                 }
2422                 /* disable channel */
2423                 gk20a_writel(ch->g, ccsr_channel_r(ch->hw_chid),
2424                         gk20a_readl(ch->g, ccsr_channel_r(ch->hw_chid)) |
2425                         ccsr_channel_enable_clr_true_f());
2426                 gk20a_idle(dev);
2427                 break;
2428         case NVGPU_IOCTL_CHANNEL_PREEMPT:
2429                 err = gk20a_busy(dev);
2430                 if (err) {
2431                         dev_err(&dev->dev,
2432                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2433                                 __func__, cmd);
2434                         return err;
2435                 }
2436                 err = gk20a_fifo_preempt(ch->g, ch);
2437                 gk20a_idle(dev);
2438                 break;
2439         case NVGPU_IOCTL_CHANNEL_FORCE_RESET:
2440                 err = gk20a_busy(dev);
2441                 if (err) {
2442                         dev_err(&dev->dev,
2443                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2444                                 __func__, cmd);
2445                         return err;
2446                 }
2447                 err = gk20a_fifo_force_reset_ch(ch, true);
2448                 gk20a_idle(dev);
2449                 break;
2450         case NVGPU_IOCTL_CHANNEL_EVENTS_CTRL:
2451                 err = gk20a_channel_events_ctrl(ch,
2452                            (struct nvgpu_channel_events_ctrl_args *)buf);
2453                 break;
2454         default:
2455                 dev_dbg(&dev->dev, "unrecognized ioctl cmd: 0x%x", cmd);
2456                 err = -ENOTTY;
2457                 break;
2458         }
2459
2460         if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ))
2461                 err = copy_to_user((void __user *)arg, buf, _IOC_SIZE(cmd));
2462
2463         gk20a_dbg_fn("end");
2464
2465         return err;
2466 }