gpu: nvgpu: add open channel ioctl to ctrl node
[linux-3.10.git] / drivers / gpu / nvgpu / gk20a / channel_gk20a.c
1 /*
2  * GK20A Graphics channel
3  *
4  * Copyright (c) 2011-2015, NVIDIA CORPORATION.  All rights reserved.
5  *
6  * This program is free software; you can redistribute it and/or modify it
7  * under the terms and conditions of the GNU General Public License,
8  * version 2, as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope it will be useful, but WITHOUT
11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13  * more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
17  */
18
19 #include <linux/nvhost.h>
20 #include <linux/list.h>
21 #include <linux/delay.h>
22 #include <linux/highmem.h> /* need for nvmap.h*/
23 #include <trace/events/gk20a.h>
24 #include <linux/scatterlist.h>
25 #include <linux/file.h>
26 #include <linux/anon_inodes.h>
27 #include <linux/dma-buf.h>
28
29 #include "debug_gk20a.h"
30
31 #include "gk20a.h"
32 #include "dbg_gpu_gk20a.h"
33 #include "fence_gk20a.h"
34 #include "semaphore_gk20a.h"
35
36 #include "hw_ram_gk20a.h"
37 #include "hw_fifo_gk20a.h"
38 #include "hw_pbdma_gk20a.h"
39 #include "hw_ccsr_gk20a.h"
40 #include "hw_ltc_gk20a.h"
41
42 #define NVMAP_HANDLE_PARAM_SIZE 1
43
44 static struct channel_gk20a *acquire_unused_channel(struct fifo_gk20a *f);
45 static void release_used_channel(struct fifo_gk20a *f, struct channel_gk20a *c);
46
47 static void free_priv_cmdbuf(struct channel_gk20a *c,
48                              struct priv_cmd_entry *e);
49 static void recycle_priv_cmdbuf(struct channel_gk20a *c);
50
51 static int channel_gk20a_alloc_priv_cmdbuf(struct channel_gk20a *c);
52 static void channel_gk20a_free_priv_cmdbuf(struct channel_gk20a *c);
53
54 static int channel_gk20a_commit_userd(struct channel_gk20a *c);
55 static int channel_gk20a_setup_userd(struct channel_gk20a *c);
56
57 static void channel_gk20a_bind(struct channel_gk20a *ch_gk20a);
58
59 static int channel_gk20a_update_runlist(struct channel_gk20a *c,
60                                         bool add);
61 static void gk20a_free_error_notifiers(struct channel_gk20a *ch);
62
63 static struct channel_gk20a *acquire_unused_channel(struct fifo_gk20a *f)
64 {
65         struct channel_gk20a *ch = NULL;
66         int chid;
67
68         mutex_lock(&f->ch_inuse_mutex);
69         for (chid = 0; chid < f->num_channels; chid++) {
70                 if (!f->channel[chid].in_use) {
71                         f->channel[chid].in_use = true;
72                         ch = &f->channel[chid];
73                         break;
74                 }
75         }
76         mutex_unlock(&f->ch_inuse_mutex);
77
78         return ch;
79 }
80
81 static void release_used_channel(struct fifo_gk20a *f, struct channel_gk20a *c)
82 {
83         mutex_lock(&f->ch_inuse_mutex);
84         f->channel[c->hw_chid].in_use = false;
85         mutex_unlock(&f->ch_inuse_mutex);
86 }
87
88 int channel_gk20a_commit_va(struct channel_gk20a *c)
89 {
90         gk20a_dbg_fn("");
91
92         if (!c->inst_block.cpuva)
93                 return -ENOMEM;
94
95         gk20a_init_inst_block(&c->inst_block, c->vm,
96                         c->vm->gmmu_page_sizes[gmmu_page_size_big]);
97
98         return 0;
99 }
100
101 static int channel_gk20a_commit_userd(struct channel_gk20a *c)
102 {
103         u32 addr_lo;
104         u32 addr_hi;
105         void *inst_ptr;
106
107         gk20a_dbg_fn("");
108
109         inst_ptr = c->inst_block.cpuva;
110         if (!inst_ptr)
111                 return -ENOMEM;
112
113         addr_lo = u64_lo32(c->userd_iova >> ram_userd_base_shift_v());
114         addr_hi = u64_hi32(c->userd_iova);
115
116         gk20a_dbg_info("channel %d : set ramfc userd 0x%16llx",
117                 c->hw_chid, (u64)c->userd_iova);
118
119         gk20a_mem_wr32(inst_ptr, ram_in_ramfc_w() + ram_fc_userd_w(),
120                  pbdma_userd_target_vid_mem_f() |
121                  pbdma_userd_addr_f(addr_lo));
122
123         gk20a_mem_wr32(inst_ptr, ram_in_ramfc_w() + ram_fc_userd_hi_w(),
124                  pbdma_userd_target_vid_mem_f() |
125                  pbdma_userd_hi_addr_f(addr_hi));
126
127         return 0;
128 }
129
130 static int channel_gk20a_set_schedule_params(struct channel_gk20a *c,
131                                 u32 timeslice_timeout)
132 {
133         void *inst_ptr;
134         int shift = 3;
135         int value = timeslice_timeout;
136
137         inst_ptr = c->inst_block.cpuva;
138         if (!inst_ptr)
139                 return -ENOMEM;
140
141         /* disable channel */
142         c->g->ops.fifo.disable_channel(c);
143
144         /* preempt the channel */
145         WARN_ON(c->g->ops.fifo.preempt_channel(c->g, c->hw_chid));
146
147         /* value field is 8 bits long */
148         while (value >= 1 << 8) {
149                 value >>= 1;
150                 shift++;
151         }
152
153         /* time slice register is only 18bits long */
154         if ((value << shift) >= 1<<19) {
155                 pr_err("Requested timeslice value is clamped to 18 bits\n");
156                 value = 255;
157                 shift = 10;
158         }
159
160         /* set new timeslice */
161         gk20a_mem_wr32(inst_ptr, ram_fc_runlist_timeslice_w(),
162                 value | (shift << 12) |
163                 fifo_runlist_timeslice_enable_true_f());
164
165         /* enable channel */
166         gk20a_writel(c->g, ccsr_channel_r(c->hw_chid),
167                 gk20a_readl(c->g, ccsr_channel_r(c->hw_chid)) |
168                 ccsr_channel_enable_set_true_f());
169
170         return 0;
171 }
172
173 int channel_gk20a_setup_ramfc(struct channel_gk20a *c,
174                         u64 gpfifo_base, u32 gpfifo_entries)
175 {
176         void *inst_ptr;
177
178         gk20a_dbg_fn("");
179
180         inst_ptr = c->inst_block.cpuva;
181         if (!inst_ptr)
182                 return -ENOMEM;
183
184         memset(inst_ptr, 0, ram_fc_size_val_v());
185
186         gk20a_mem_wr32(inst_ptr, ram_fc_gp_base_w(),
187                 pbdma_gp_base_offset_f(
188                 u64_lo32(gpfifo_base >> pbdma_gp_base_rsvd_s())));
189
190         gk20a_mem_wr32(inst_ptr, ram_fc_gp_base_hi_w(),
191                 pbdma_gp_base_hi_offset_f(u64_hi32(gpfifo_base)) |
192                 pbdma_gp_base_hi_limit2_f(ilog2(gpfifo_entries)));
193
194         gk20a_mem_wr32(inst_ptr, ram_fc_signature_w(),
195                  pbdma_signature_hw_valid_f() | pbdma_signature_sw_zero_f());
196
197         gk20a_mem_wr32(inst_ptr, ram_fc_formats_w(),
198                 pbdma_formats_gp_fermi0_f() |
199                 pbdma_formats_pb_fermi1_f() |
200                 pbdma_formats_mp_fermi0_f());
201
202         gk20a_mem_wr32(inst_ptr, ram_fc_pb_header_w(),
203                 pbdma_pb_header_priv_user_f() |
204                 pbdma_pb_header_method_zero_f() |
205                 pbdma_pb_header_subchannel_zero_f() |
206                 pbdma_pb_header_level_main_f() |
207                 pbdma_pb_header_first_true_f() |
208                 pbdma_pb_header_type_inc_f());
209
210         gk20a_mem_wr32(inst_ptr, ram_fc_subdevice_w(),
211                 pbdma_subdevice_id_f(1) |
212                 pbdma_subdevice_status_active_f() |
213                 pbdma_subdevice_channel_dma_enable_f());
214
215         gk20a_mem_wr32(inst_ptr, ram_fc_target_w(), pbdma_target_engine_sw_f());
216
217         gk20a_mem_wr32(inst_ptr, ram_fc_acquire_w(),
218                 pbdma_acquire_retry_man_2_f() |
219                 pbdma_acquire_retry_exp_2_f() |
220                 pbdma_acquire_timeout_exp_max_f() |
221                 pbdma_acquire_timeout_man_max_f() |
222                 pbdma_acquire_timeout_en_disable_f());
223
224         gk20a_mem_wr32(inst_ptr, ram_fc_runlist_timeslice_w(),
225                 fifo_runlist_timeslice_timeout_128_f() |
226                 fifo_runlist_timeslice_timescale_3_f() |
227                 fifo_runlist_timeslice_enable_true_f());
228
229         gk20a_mem_wr32(inst_ptr, ram_fc_pb_timeslice_w(),
230                 fifo_pb_timeslice_timeout_16_f() |
231                 fifo_pb_timeslice_timescale_0_f() |
232                 fifo_pb_timeslice_enable_true_f());
233
234         gk20a_mem_wr32(inst_ptr, ram_fc_chid_w(), ram_fc_chid_id_f(c->hw_chid));
235
236         return channel_gk20a_commit_userd(c);
237 }
238
239 static int channel_gk20a_setup_userd(struct channel_gk20a *c)
240 {
241         BUG_ON(!c->userd_cpu_va);
242
243         gk20a_dbg_fn("");
244
245         gk20a_mem_wr32(c->userd_cpu_va, ram_userd_put_w(), 0);
246         gk20a_mem_wr32(c->userd_cpu_va, ram_userd_get_w(), 0);
247         gk20a_mem_wr32(c->userd_cpu_va, ram_userd_ref_w(), 0);
248         gk20a_mem_wr32(c->userd_cpu_va, ram_userd_put_hi_w(), 0);
249         gk20a_mem_wr32(c->userd_cpu_va, ram_userd_ref_threshold_w(), 0);
250         gk20a_mem_wr32(c->userd_cpu_va, ram_userd_gp_top_level_get_w(), 0);
251         gk20a_mem_wr32(c->userd_cpu_va, ram_userd_gp_top_level_get_hi_w(), 0);
252         gk20a_mem_wr32(c->userd_cpu_va, ram_userd_get_hi_w(), 0);
253         gk20a_mem_wr32(c->userd_cpu_va, ram_userd_gp_get_w(), 0);
254         gk20a_mem_wr32(c->userd_cpu_va, ram_userd_gp_put_w(), 0);
255
256         return 0;
257 }
258
259 static void channel_gk20a_bind(struct channel_gk20a *ch_gk20a)
260 {
261         struct gk20a *g = ch_gk20a->g;
262         struct fifo_gk20a *f = &g->fifo;
263         struct fifo_engine_info_gk20a *engine_info =
264                 f->engine_info + ENGINE_GR_GK20A;
265
266         u32 inst_ptr = ch_gk20a->inst_block.cpu_pa
267                 >> ram_in_base_shift_v();
268
269         gk20a_dbg_info("bind channel %d inst ptr 0x%08x",
270                 ch_gk20a->hw_chid, inst_ptr);
271
272         ch_gk20a->bound = true;
273
274         gk20a_writel(g, ccsr_channel_r(ch_gk20a->hw_chid),
275                 (gk20a_readl(g, ccsr_channel_r(ch_gk20a->hw_chid)) &
276                  ~ccsr_channel_runlist_f(~0)) |
277                  ccsr_channel_runlist_f(engine_info->runlist_id));
278
279         gk20a_writel(g, ccsr_channel_inst_r(ch_gk20a->hw_chid),
280                 ccsr_channel_inst_ptr_f(inst_ptr) |
281                 ccsr_channel_inst_target_vid_mem_f() |
282                 ccsr_channel_inst_bind_true_f());
283
284         gk20a_writel(g, ccsr_channel_r(ch_gk20a->hw_chid),
285                 (gk20a_readl(g, ccsr_channel_r(ch_gk20a->hw_chid)) &
286                  ~ccsr_channel_enable_set_f(~0)) |
287                  ccsr_channel_enable_set_true_f());
288 }
289
290 void channel_gk20a_unbind(struct channel_gk20a *ch_gk20a)
291 {
292         struct gk20a *g = ch_gk20a->g;
293
294         gk20a_dbg_fn("");
295
296         if (ch_gk20a->bound)
297                 gk20a_writel(g, ccsr_channel_inst_r(ch_gk20a->hw_chid),
298                         ccsr_channel_inst_ptr_f(0) |
299                         ccsr_channel_inst_bind_false_f());
300
301         ch_gk20a->bound = false;
302
303         /*
304          * if we are agrressive then we can destroy the syncpt
305          * resource at this point
306          * if not, then it will be destroyed at channel_free()
307          */
308         if (ch_gk20a->sync && ch_gk20a->sync->aggressive_destroy) {
309                 ch_gk20a->sync->destroy(ch_gk20a->sync);
310                 ch_gk20a->sync = NULL;
311         }
312 }
313
314 int channel_gk20a_alloc_inst(struct gk20a *g, struct channel_gk20a *ch)
315 {
316         int err;
317
318         gk20a_dbg_fn("");
319
320         err = gk20a_alloc_inst_block(g, &ch->inst_block);
321         if (err)
322                 return err;
323
324         gk20a_dbg_info("channel %d inst block physical addr: 0x%16llx",
325                 ch->hw_chid, (u64)ch->inst_block.cpu_pa);
326
327         gk20a_dbg_fn("done");
328         return 0;
329 }
330
331 void channel_gk20a_free_inst(struct gk20a *g, struct channel_gk20a *ch)
332 {
333         gk20a_free_inst_block(g, &ch->inst_block);
334 }
335
336 static int channel_gk20a_update_runlist(struct channel_gk20a *c, bool add)
337 {
338         return c->g->ops.fifo.update_runlist(c->g, 0, c->hw_chid, add, true);
339 }
340
341 void channel_gk20a_enable(struct channel_gk20a *ch)
342 {
343         /* enable channel */
344         gk20a_writel(ch->g, ccsr_channel_r(ch->hw_chid),
345                 gk20a_readl(ch->g, ccsr_channel_r(ch->hw_chid)) |
346                 ccsr_channel_enable_set_true_f());
347 }
348
349 void channel_gk20a_disable(struct channel_gk20a *ch)
350 {
351         /* disable channel */
352         gk20a_writel(ch->g, ccsr_channel_r(ch->hw_chid),
353                 gk20a_readl(ch->g,
354                         ccsr_channel_r(ch->hw_chid)) |
355                         ccsr_channel_enable_clr_true_f());
356 }
357
358 void gk20a_channel_abort(struct channel_gk20a *ch)
359 {
360         struct channel_gk20a_job *job, *n;
361         bool released_job_semaphore = false;
362
363         /* ensure no fences are pending */
364         mutex_lock(&ch->submit_lock);
365         if (ch->sync)
366                 ch->sync->set_min_eq_max(ch->sync);
367         mutex_unlock(&ch->submit_lock);
368
369         /* release all job semaphores (applies only to jobs that use
370            semaphore synchronization) */
371         mutex_lock(&ch->jobs_lock);
372         list_for_each_entry_safe(job, n, &ch->jobs, list) {
373                 if (job->post_fence->semaphore) {
374                         gk20a_semaphore_release(job->post_fence->semaphore);
375                         released_job_semaphore = true;
376                 }
377         }
378         mutex_unlock(&ch->jobs_lock);
379
380         ch->g->ops.fifo.disable_channel(ch);
381
382         if (released_job_semaphore) {
383                 wake_up_interruptible_all(&ch->semaphore_wq);
384                 gk20a_channel_update(ch, 0);
385         }
386 }
387
388 int gk20a_wait_channel_idle(struct channel_gk20a *ch)
389 {
390         bool channel_idle = false;
391         unsigned long end_jiffies = jiffies +
392                 msecs_to_jiffies(gk20a_get_gr_idle_timeout(ch->g));
393
394         do {
395                 mutex_lock(&ch->jobs_lock);
396                 channel_idle = list_empty(&ch->jobs);
397                 mutex_unlock(&ch->jobs_lock);
398                 if (channel_idle)
399                         break;
400
401                 usleep_range(1000, 3000);
402         } while (time_before(jiffies, end_jiffies)
403                         || !tegra_platform_is_silicon());
404
405         if (!channel_idle) {
406                 gk20a_err(dev_from_gk20a(ch->g), "jobs not freed for channel %d\n",
407                                 ch->hw_chid);
408                 return -EBUSY;
409         }
410
411         return 0;
412 }
413
414 void gk20a_disable_channel(struct channel_gk20a *ch,
415                            bool finish,
416                            unsigned long finish_timeout)
417 {
418         if (finish) {
419                 int err = gk20a_channel_finish(ch, finish_timeout);
420                 WARN_ON(err);
421         }
422
423         /* disable the channel from hw and increment syncpoints */
424         gk20a_channel_abort(ch);
425
426         gk20a_wait_channel_idle(ch);
427
428         /* preempt the channel */
429         ch->g->ops.fifo.preempt_channel(ch->g, ch->hw_chid);
430
431         /* remove channel from runlist */
432         channel_gk20a_update_runlist(ch, false);
433 }
434
435 #if defined(CONFIG_GK20A_CYCLE_STATS)
436
437 static void gk20a_free_cycle_stats_buffer(struct channel_gk20a *ch)
438 {
439         /* disable existing cyclestats buffer */
440         mutex_lock(&ch->cyclestate.cyclestate_buffer_mutex);
441         if (ch->cyclestate.cyclestate_buffer_handler) {
442                 dma_buf_vunmap(ch->cyclestate.cyclestate_buffer_handler,
443                                 ch->cyclestate.cyclestate_buffer);
444                 dma_buf_put(ch->cyclestate.cyclestate_buffer_handler);
445                 ch->cyclestate.cyclestate_buffer_handler = NULL;
446                 ch->cyclestate.cyclestate_buffer = NULL;
447                 ch->cyclestate.cyclestate_buffer_size = 0;
448         }
449         mutex_unlock(&ch->cyclestate.cyclestate_buffer_mutex);
450 }
451
452 static int gk20a_channel_cycle_stats(struct channel_gk20a *ch,
453                        struct nvgpu_cycle_stats_args *args)
454 {
455         struct dma_buf *dmabuf;
456         void *virtual_address;
457
458         if (args->dmabuf_fd && !ch->cyclestate.cyclestate_buffer_handler) {
459
460                 /* set up new cyclestats buffer */
461                 dmabuf = dma_buf_get(args->dmabuf_fd);
462                 if (IS_ERR(dmabuf))
463                         return PTR_ERR(dmabuf);
464                 virtual_address = dma_buf_vmap(dmabuf);
465                 if (!virtual_address)
466                         return -ENOMEM;
467
468                 ch->cyclestate.cyclestate_buffer_handler = dmabuf;
469                 ch->cyclestate.cyclestate_buffer = virtual_address;
470                 ch->cyclestate.cyclestate_buffer_size = dmabuf->size;
471                 return 0;
472
473         } else if (!args->dmabuf_fd &&
474                         ch->cyclestate.cyclestate_buffer_handler) {
475                 gk20a_free_cycle_stats_buffer(ch);
476                 return 0;
477
478         } else if (!args->dmabuf_fd &&
479                         !ch->cyclestate.cyclestate_buffer_handler) {
480                 /* no requst from GL */
481                 return 0;
482
483         } else {
484                 pr_err("channel already has cyclestats buffer\n");
485                 return -EINVAL;
486         }
487 }
488 #endif
489
490 static int gk20a_init_error_notifier(struct channel_gk20a *ch,
491                 struct nvgpu_set_error_notifier *args) {
492         void *va;
493
494         struct dma_buf *dmabuf;
495
496         if (!args->mem) {
497                 pr_err("gk20a_init_error_notifier: invalid memory handle\n");
498                 return -EINVAL;
499         }
500
501         dmabuf = dma_buf_get(args->mem);
502
503         if (ch->error_notifier_ref)
504                 gk20a_free_error_notifiers(ch);
505
506         if (IS_ERR(dmabuf)) {
507                 pr_err("Invalid handle: %d\n", args->mem);
508                 return -EINVAL;
509         }
510         /* map handle */
511         va = dma_buf_vmap(dmabuf);
512         if (!va) {
513                 dma_buf_put(dmabuf);
514                 pr_err("Cannot map notifier handle\n");
515                 return -ENOMEM;
516         }
517
518         /* set channel notifiers pointer */
519         ch->error_notifier_ref = dmabuf;
520         ch->error_notifier = va + args->offset;
521         ch->error_notifier_va = va;
522         memset(ch->error_notifier, 0, sizeof(struct nvgpu_notification));
523         return 0;
524 }
525
526 void gk20a_set_error_notifier(struct channel_gk20a *ch, __u32 error)
527 {
528         if (ch->error_notifier_ref) {
529                 struct timespec time_data;
530                 u64 nsec;
531                 getnstimeofday(&time_data);
532                 nsec = ((u64)time_data.tv_sec) * 1000000000u +
533                                 (u64)time_data.tv_nsec;
534                 ch->error_notifier->time_stamp.nanoseconds[0] =
535                                 (u32)nsec;
536                 ch->error_notifier->time_stamp.nanoseconds[1] =
537                                 (u32)(nsec >> 32);
538                 ch->error_notifier->info32 = error;
539                 ch->error_notifier->status = 0xffff;
540                 gk20a_err(dev_from_gk20a(ch->g),
541                     "error notifier set to %d for ch %d\n", error, ch->hw_chid);
542         }
543 }
544
545 static void gk20a_free_error_notifiers(struct channel_gk20a *ch)
546 {
547         if (ch->error_notifier_ref) {
548                 dma_buf_vunmap(ch->error_notifier_ref, ch->error_notifier_va);
549                 dma_buf_put(ch->error_notifier_ref);
550                 ch->error_notifier_ref = NULL;
551                 ch->error_notifier = NULL;
552                 ch->error_notifier_va = NULL;
553         }
554 }
555
556 void gk20a_free_channel(struct channel_gk20a *ch, bool finish)
557 {
558         struct gk20a *g = ch->g;
559         struct device *d = dev_from_gk20a(g);
560         struct fifo_gk20a *f = &g->fifo;
561         struct gr_gk20a *gr = &g->gr;
562         struct vm_gk20a *ch_vm = ch->vm;
563         unsigned long timeout = gk20a_get_gr_idle_timeout(g);
564         struct dbg_session_gk20a *dbg_s;
565
566         gk20a_dbg_fn("");
567
568         /* if engine reset was deferred, perform it now */
569         mutex_lock(&f->deferred_reset_mutex);
570         if (g->fifo.deferred_reset_pending) {
571                 gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, "engine reset was"
572                            " deferred, running now");
573                 fifo_gk20a_finish_mmu_fault_handling(g, g->fifo.mmu_fault_engines);
574                 g->fifo.mmu_fault_engines = 0;
575                 g->fifo.deferred_reset_pending = false;
576         }
577         mutex_unlock(&f->deferred_reset_mutex);
578
579         if (!ch->bound)
580                 return;
581
582         if (!gk20a_channel_as_bound(ch))
583                 goto unbind;
584
585         gk20a_dbg_info("freeing bound channel context, timeout=%ld",
586                         timeout);
587
588         gk20a_disable_channel(ch, finish && !ch->has_timedout, timeout);
589
590         gk20a_free_error_notifiers(ch);
591
592         /* release channel ctx */
593         g->ops.gr.free_channel_ctx(ch);
594
595         gk20a_gr_flush_channel_tlb(gr);
596
597         memset(&ch->ramfc, 0, sizeof(struct mem_desc_sub));
598
599         /* free gpfifo */
600         if (ch->gpfifo.gpu_va)
601                 gk20a_gmmu_unmap(ch_vm, ch->gpfifo.gpu_va,
602                         ch->gpfifo.size, gk20a_mem_flag_none);
603         if (ch->gpfifo.cpu_va)
604                 dma_free_coherent(d, ch->gpfifo.size,
605                         ch->gpfifo.cpu_va, ch->gpfifo.iova);
606         ch->gpfifo.cpu_va = NULL;
607         ch->gpfifo.iova = 0;
608
609         memset(&ch->gpfifo, 0, sizeof(struct gpfifo_desc));
610
611 #if defined(CONFIG_GK20A_CYCLE_STATS)
612         gk20a_free_cycle_stats_buffer(ch);
613 #endif
614
615         channel_gk20a_free_priv_cmdbuf(ch);
616
617         /* sync must be destroyed before releasing channel vm */
618         if (ch->sync) {
619                 ch->sync->destroy(ch->sync);
620                 ch->sync = NULL;
621         }
622
623         /* release channel binding to the as_share */
624         if (ch_vm->as_share)
625                 gk20a_as_release_share(ch_vm->as_share);
626         else
627                 gk20a_vm_put(ch_vm);
628
629         spin_lock(&ch->update_fn_lock);
630         ch->update_fn = NULL;
631         ch->update_fn_data = NULL;
632         spin_unlock(&ch->update_fn_lock);
633         cancel_work_sync(&ch->update_fn_work);
634
635 unbind:
636         if (gk20a_is_channel_marked_as_tsg(ch))
637                 gk20a_tsg_unbind_channel(ch);
638
639         g->ops.fifo.unbind_channel(ch);
640         g->ops.fifo.free_inst(g, ch);
641
642         ch->vpr = false;
643         ch->vm = NULL;
644
645         mutex_lock(&ch->submit_lock);
646         gk20a_fence_put(ch->last_submit.pre_fence);
647         gk20a_fence_put(ch->last_submit.post_fence);
648         ch->last_submit.pre_fence = NULL;
649         ch->last_submit.post_fence = NULL;
650         mutex_unlock(&ch->submit_lock);
651         WARN_ON(ch->sync);
652
653         /* unlink all debug sessions */
654         mutex_lock(&ch->dbg_s_lock);
655
656         list_for_each_entry(dbg_s, &ch->dbg_s_list, dbg_s_list_node) {
657                 dbg_s->ch = NULL;
658                 list_del_init(&dbg_s->dbg_s_list_node);
659         }
660
661         mutex_unlock(&ch->dbg_s_lock);
662
663         /* ALWAYS last */
664         release_used_channel(f, ch);
665 }
666
667 int gk20a_channel_release(struct inode *inode, struct file *filp)
668 {
669         struct channel_gk20a *ch = (struct channel_gk20a *)filp->private_data;
670         struct gk20a *g = ch ? ch->g : NULL;
671         int err;
672
673         if (!ch)
674                 return 0;
675
676         trace_gk20a_channel_release(dev_name(&g->dev->dev));
677
678         err = gk20a_busy(ch->g->dev);
679         if (err) {
680                 gk20a_err(dev_from_gk20a(g), "failed to release channel %d",
681                         ch->hw_chid);
682                 return err;
683         }
684         gk20a_free_channel(ch, true);
685         gk20a_idle(ch->g->dev);
686
687         filp->private_data = NULL;
688         return 0;
689 }
690
691 static void gk20a_channel_update_runcb_fn(struct work_struct *work)
692 {
693         struct channel_gk20a *ch =
694                 container_of(work, struct channel_gk20a, update_fn_work);
695         void (*update_fn)(struct channel_gk20a *, void *);
696         void *update_fn_data;
697
698         spin_lock(&ch->update_fn_lock);
699         update_fn = ch->update_fn;
700         update_fn_data = ch->update_fn_data;
701         spin_unlock(&ch->update_fn_lock);
702
703         if (update_fn)
704                 update_fn(ch, update_fn_data);
705 }
706
707 struct channel_gk20a *gk20a_open_new_channel_with_cb(struct gk20a *g,
708                 void (*update_fn)(struct channel_gk20a *, void *),
709                 void *update_fn_data)
710 {
711         struct channel_gk20a *ch = gk20a_open_new_channel(g);
712
713         if (ch) {
714                 spin_lock(&ch->update_fn_lock);
715                 ch->update_fn = update_fn;
716                 ch->update_fn_data = update_fn_data;
717                 spin_unlock(&ch->update_fn_lock);
718         }
719
720         return ch;
721 }
722
723 struct channel_gk20a *gk20a_open_new_channel(struct gk20a *g)
724 {
725         struct fifo_gk20a *f = &g->fifo;
726         struct channel_gk20a *ch;
727
728         ch = acquire_unused_channel(f);
729         if (ch == NULL) {
730                 /* TBD: we want to make this virtualizable */
731                 gk20a_err(dev_from_gk20a(g), "out of hw chids");
732                 return NULL;
733         }
734
735         ch->g = g;
736
737         if (g->ops.fifo.alloc_inst(g, ch)) {
738                 ch->in_use = false;
739                 gk20a_err(dev_from_gk20a(g),
740                            "failed to open gk20a channel, out of inst mem");
741
742                 return NULL;
743         }
744         ch->pid = current->pid;
745
746         /* By default, channel is regular (non-TSG) channel */
747         ch->tsgid = NVGPU_INVALID_TSG_ID;
748
749         /* reset timeout counter and update timestamp */
750         ch->timeout_accumulated_ms = 0;
751         ch->timeout_gpfifo_get = 0;
752         /* set gr host default timeout */
753         ch->timeout_ms_max = gk20a_get_gr_idle_timeout(g);
754         ch->timeout_debug_dump = true;
755         ch->has_timedout = false;
756         ch->obj_class = 0;
757
758         /* The channel is *not* runnable at this point. It still needs to have
759          * an address space bound and allocate a gpfifo and grctx. */
760
761         init_waitqueue_head(&ch->notifier_wq);
762         init_waitqueue_head(&ch->semaphore_wq);
763         init_waitqueue_head(&ch->submit_wq);
764
765         mutex_init(&ch->poll_events.lock);
766         ch->poll_events.events_enabled = false;
767         ch->poll_events.num_pending_events = 0;
768
769         ch->update_fn = NULL;
770         ch->update_fn_data = NULL;
771         spin_lock_init(&ch->update_fn_lock);
772         INIT_WORK(&ch->update_fn_work, gk20a_channel_update_runcb_fn);
773
774         return ch;
775 }
776
777 static int __gk20a_channel_open(struct gk20a *g, struct file *filp)
778 {
779         int err;
780         struct channel_gk20a *ch;
781
782         trace_gk20a_channel_open(dev_name(&g->dev->dev));
783
784         err = gk20a_busy(g->dev);
785         if (err) {
786                 gk20a_err(dev_from_gk20a(g), "failed to power on, %d", err);
787                 return err;
788         }
789         ch = gk20a_open_new_channel(g);
790         gk20a_idle(g->dev);
791         if (!ch) {
792                 gk20a_err(dev_from_gk20a(g),
793                         "failed to get f");
794                 return -ENOMEM;
795         }
796
797         filp->private_data = ch;
798         return 0;
799 }
800
801 int gk20a_channel_open(struct inode *inode, struct file *filp)
802 {
803         struct gk20a *g = container_of(inode->i_cdev,
804                         struct gk20a, channel.cdev);
805         int ret;
806
807         gk20a_dbg_fn("start");
808         ret = __gk20a_channel_open(g, filp);
809
810         gk20a_dbg_fn("end");
811         return ret;
812 }
813
814 int gk20a_channel_open_ioctl(struct gk20a *g,
815                 struct nvgpu_channel_open_args *args)
816 {
817         int err;
818         int fd;
819         struct file *file;
820         char *name;
821
822         err = get_unused_fd_flags(O_RDWR);
823         if (err < 0)
824                 return err;
825         fd = err;
826
827         name = kasprintf(GFP_KERNEL, "nvhost-%s-fd%d",
828                         dev_name(&g->dev->dev), fd);
829         if (!name) {
830                 err = -ENOMEM;
831                 goto clean_up;
832         }
833
834         file = anon_inode_getfile(name, g->channel.cdev.ops, NULL, O_RDWR);
835         kfree(name);
836         if (IS_ERR(file)) {
837                 err = PTR_ERR(file);
838                 goto clean_up;
839         }
840         fd_install(fd, file);
841
842         err = __gk20a_channel_open(g, file);
843         if (err)
844                 goto clean_up_file;
845
846         args->channel_fd = fd;
847         return 0;
848
849 clean_up_file:
850         fput(file);
851 clean_up:
852         put_unused_fd(fd);
853         return err;
854 }
855
856 /* allocate private cmd buffer.
857    used for inserting commands before/after user submitted buffers. */
858 static int channel_gk20a_alloc_priv_cmdbuf(struct channel_gk20a *c)
859 {
860         struct device *d = dev_from_gk20a(c->g);
861         struct vm_gk20a *ch_vm = c->vm;
862         struct priv_cmd_queue *q = &c->priv_cmd_q;
863         struct priv_cmd_entry *e;
864         u32 i = 0, size;
865         int err = 0;
866         struct sg_table *sgt;
867         dma_addr_t iova;
868
869         /* Kernel can insert gpfifos before and after user gpfifos.
870            Before user gpfifos, kernel inserts fence_wait, which takes
871            syncpoint_a (2 dwords) + syncpoint_b (2 dwords) = 4 dwords.
872            After user gpfifos, kernel inserts fence_get, which takes
873            wfi (2 dwords) + syncpoint_a (2 dwords) + syncpoint_b (2 dwords)
874            = 6 dwords.
875            Worse case if kernel adds both of them for every user gpfifo,
876            max size of priv_cmdbuf is :
877            (gpfifo entry number * (2 / 3) * (4 + 6) * 4 bytes */
878         size = roundup_pow_of_two(
879                 c->gpfifo.entry_num * 2 * 10 * sizeof(u32) / 3);
880
881         q->mem.base_cpuva = dma_alloc_coherent(d, size,
882                                         &iova,
883                                         GFP_KERNEL);
884         if (!q->mem.base_cpuva) {
885                 gk20a_err(d, "%s: memory allocation failed\n", __func__);
886                 err = -ENOMEM;
887                 goto clean_up;
888         }
889
890         q->mem.base_iova = iova;
891         q->mem.size = size;
892
893         err = gk20a_get_sgtable(d, &sgt,
894                         q->mem.base_cpuva, q->mem.base_iova, size);
895         if (err) {
896                 gk20a_err(d, "%s: failed to create sg table\n", __func__);
897                 goto clean_up;
898         }
899
900         memset(q->mem.base_cpuva, 0, size);
901
902         q->base_gpuva = gk20a_gmmu_map(ch_vm, &sgt,
903                                         size,
904                                         0, /* flags */
905                                         gk20a_mem_flag_none);
906         if (!q->base_gpuva) {
907                 gk20a_err(d, "ch %d : failed to map gpu va"
908                            "for priv cmd buffer", c->hw_chid);
909                 err = -ENOMEM;
910                 goto clean_up_sgt;
911         }
912
913         q->size = q->mem.size / sizeof (u32);
914
915         INIT_LIST_HEAD(&q->head);
916         INIT_LIST_HEAD(&q->free);
917
918         /* pre-alloc 25% of priv cmdbuf entries and put them on free list */
919         for (i = 0; i < q->size / 4; i++) {
920                 e = kzalloc(sizeof(struct priv_cmd_entry), GFP_KERNEL);
921                 if (!e) {
922                         gk20a_err(d, "ch %d: fail to pre-alloc cmd entry",
923                                 c->hw_chid);
924                         err = -ENOMEM;
925                         goto clean_up_sgt;
926                 }
927                 e->pre_alloc = true;
928                 list_add(&e->list, &q->free);
929         }
930
931         gk20a_free_sgtable(&sgt);
932
933         return 0;
934
935 clean_up_sgt:
936         gk20a_free_sgtable(&sgt);
937 clean_up:
938         channel_gk20a_free_priv_cmdbuf(c);
939         return err;
940 }
941
942 static void channel_gk20a_free_priv_cmdbuf(struct channel_gk20a *c)
943 {
944         struct device *d = dev_from_gk20a(c->g);
945         struct vm_gk20a *ch_vm = c->vm;
946         struct priv_cmd_queue *q = &c->priv_cmd_q;
947         struct priv_cmd_entry *e;
948         struct list_head *pos, *tmp, *head;
949
950         if (q->size == 0)
951                 return;
952
953         if (q->base_gpuva)
954                 gk20a_gmmu_unmap(ch_vm, q->base_gpuva,
955                                 q->mem.size, gk20a_mem_flag_none);
956         if (q->mem.base_cpuva)
957                 dma_free_coherent(d, q->mem.size,
958                         q->mem.base_cpuva, q->mem.base_iova);
959         q->mem.base_cpuva = NULL;
960         q->mem.base_iova = 0;
961
962         /* free used list */
963         head = &q->head;
964         list_for_each_safe(pos, tmp, head) {
965                 e = container_of(pos, struct priv_cmd_entry, list);
966                 free_priv_cmdbuf(c, e);
967         }
968
969         /* free free list */
970         head = &q->free;
971         list_for_each_safe(pos, tmp, head) {
972                 e = container_of(pos, struct priv_cmd_entry, list);
973                 e->pre_alloc = false;
974                 free_priv_cmdbuf(c, e);
975         }
976
977         memset(q, 0, sizeof(struct priv_cmd_queue));
978 }
979
980 /* allocate a cmd buffer with given size. size is number of u32 entries */
981 int gk20a_channel_alloc_priv_cmdbuf(struct channel_gk20a *c, u32 orig_size,
982                              struct priv_cmd_entry **entry)
983 {
984         struct priv_cmd_queue *q = &c->priv_cmd_q;
985         struct priv_cmd_entry *e;
986         struct list_head *node;
987         u32 free_count;
988         u32 size = orig_size;
989         bool no_retry = false;
990
991         gk20a_dbg_fn("size %d", orig_size);
992
993         *entry = NULL;
994
995         /* if free space in the end is less than requested, increase the size
996          * to make the real allocated space start from beginning. */
997         if (q->put + size > q->size)
998                 size = orig_size + (q->size - q->put);
999
1000         gk20a_dbg_info("ch %d: priv cmd queue get:put %d:%d",
1001                         c->hw_chid, q->get, q->put);
1002
1003 TRY_AGAIN:
1004         free_count = (q->size - (q->put - q->get) - 1) % q->size;
1005
1006         if (size > free_count) {
1007                 if (!no_retry) {
1008                         recycle_priv_cmdbuf(c);
1009                         no_retry = true;
1010                         goto TRY_AGAIN;
1011                 } else
1012                         return -EAGAIN;
1013         }
1014
1015         if (unlikely(list_empty(&q->free))) {
1016
1017                 gk20a_dbg_info("ch %d: run out of pre-alloc entries",
1018                         c->hw_chid);
1019
1020                 e = kzalloc(sizeof(struct priv_cmd_entry), GFP_KERNEL);
1021                 if (!e) {
1022                         gk20a_err(dev_from_gk20a(c->g),
1023                                 "ch %d: fail to allocate priv cmd entry",
1024                                 c->hw_chid);
1025                         return -ENOMEM;
1026                 }
1027         } else  {
1028                 node = q->free.next;
1029                 list_del(node);
1030                 e = container_of(node, struct priv_cmd_entry, list);
1031         }
1032
1033         e->size = orig_size;
1034         e->gp_get = c->gpfifo.get;
1035         e->gp_put = c->gpfifo.put;
1036         e->gp_wrap = c->gpfifo.wrap;
1037
1038         /* if we have increased size to skip free space in the end, set put
1039            to beginning of cmd buffer (0) + size */
1040         if (size != orig_size) {
1041                 e->ptr = q->mem.base_cpuva;
1042                 e->gva = q->base_gpuva;
1043                 q->put = orig_size;
1044         } else {
1045                 e->ptr = q->mem.base_cpuva + q->put;
1046                 e->gva = q->base_gpuva + q->put * sizeof(u32);
1047                 q->put = (q->put + orig_size) & (q->size - 1);
1048         }
1049
1050         /* we already handled q->put + size > q->size so BUG_ON this */
1051         BUG_ON(q->put > q->size);
1052
1053         /* add new entry to head since we free from head */
1054         list_add(&e->list, &q->head);
1055
1056         *entry = e;
1057
1058         gk20a_dbg_fn("done");
1059
1060         return 0;
1061 }
1062
1063 /* Don't call this to free an explict cmd entry.
1064  * It doesn't update priv_cmd_queue get/put */
1065 static void free_priv_cmdbuf(struct channel_gk20a *c,
1066                              struct priv_cmd_entry *e)
1067 {
1068         struct priv_cmd_queue *q = &c->priv_cmd_q;
1069
1070         if (!e)
1071                 return;
1072
1073         list_del(&e->list);
1074
1075         if (unlikely(!e->pre_alloc))
1076                 kfree(e);
1077         else {
1078                 memset(e, 0, sizeof(struct priv_cmd_entry));
1079                 e->pre_alloc = true;
1080                 list_add(&e->list, &q->free);
1081         }
1082 }
1083
1084 /* free entries if they're no longer being used */
1085 static void recycle_priv_cmdbuf(struct channel_gk20a *c)
1086 {
1087         struct priv_cmd_queue *q = &c->priv_cmd_q;
1088         struct priv_cmd_entry *e, *tmp;
1089         struct list_head *head = &q->head;
1090         bool wrap_around, found = false;
1091
1092         gk20a_dbg_fn("");
1093
1094         /* Find the most recent free entry. Free it and everything before it */
1095         list_for_each_entry(e, head, list) {
1096
1097                 gk20a_dbg_info("ch %d: cmd entry get:put:wrap %d:%d:%d "
1098                         "curr get:put:wrap %d:%d:%d",
1099                         c->hw_chid, e->gp_get, e->gp_put, e->gp_wrap,
1100                         c->gpfifo.get, c->gpfifo.put, c->gpfifo.wrap);
1101
1102                 wrap_around = (c->gpfifo.wrap != e->gp_wrap);
1103                 if (e->gp_get < e->gp_put) {
1104                         if (c->gpfifo.get >= e->gp_put ||
1105                             wrap_around) {
1106                                 found = true;
1107                                 break;
1108                         } else
1109                                 e->gp_get = c->gpfifo.get;
1110                 } else if (e->gp_get > e->gp_put) {
1111                         if (wrap_around &&
1112                             c->gpfifo.get >= e->gp_put) {
1113                                 found = true;
1114                                 break;
1115                         } else
1116                                 e->gp_get = c->gpfifo.get;
1117                 }
1118         }
1119
1120         if (found)
1121                 q->get = (e->ptr - q->mem.base_cpuva) + e->size;
1122         else {
1123                 gk20a_dbg_info("no free entry recycled");
1124                 return;
1125         }
1126
1127         list_for_each_entry_safe_continue(e, tmp, head, list) {
1128                 free_priv_cmdbuf(c, e);
1129         }
1130
1131         gk20a_dbg_fn("done");
1132 }
1133
1134
1135 int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c,
1136                 struct nvgpu_alloc_gpfifo_args *args)
1137 {
1138         struct gk20a *g = c->g;
1139         struct device *d = dev_from_gk20a(g);
1140         struct vm_gk20a *ch_vm;
1141         u32 gpfifo_size;
1142         int err = 0;
1143         struct sg_table *sgt;
1144         dma_addr_t iova;
1145
1146         /* Kernel can insert one extra gpfifo entry before user submitted gpfifos
1147            and another one after, for internal usage. Triple the requested size. */
1148         gpfifo_size = roundup_pow_of_two(args->num_entries * 3);
1149
1150         if (args->flags & NVGPU_ALLOC_GPFIFO_FLAGS_VPR_ENABLED)
1151                 c->vpr = true;
1152
1153         /* an address space needs to have been bound at this point. */
1154         if (!gk20a_channel_as_bound(c)) {
1155                 gk20a_err(d,
1156                             "not bound to an address space at time of gpfifo"
1157                             " allocation.");
1158                 return -EINVAL;
1159         }
1160         ch_vm = c->vm;
1161
1162         c->cmds_pending = false;
1163         mutex_lock(&c->submit_lock);
1164         gk20a_fence_put(c->last_submit.pre_fence);
1165         gk20a_fence_put(c->last_submit.post_fence);
1166         c->last_submit.pre_fence = NULL;
1167         c->last_submit.post_fence = NULL;
1168         mutex_unlock(&c->submit_lock);
1169
1170         c->ramfc.offset = 0;
1171         c->ramfc.size = ram_in_ramfc_s() / 8;
1172
1173         if (c->gpfifo.cpu_va) {
1174                 gk20a_err(d, "channel %d :"
1175                            "gpfifo already allocated", c->hw_chid);
1176                 return -EEXIST;
1177         }
1178
1179         c->gpfifo.size = gpfifo_size * sizeof(struct gpfifo);
1180         c->gpfifo.cpu_va = (struct gpfifo *)dma_alloc_coherent(d,
1181                                                 c->gpfifo.size,
1182                                                 &iova,
1183                                                 GFP_KERNEL);
1184         if (!c->gpfifo.cpu_va) {
1185                 gk20a_err(d, "%s: memory allocation failed\n", __func__);
1186                 err = -ENOMEM;
1187                 goto clean_up;
1188         }
1189
1190         c->gpfifo.iova = iova;
1191         c->gpfifo.entry_num = gpfifo_size;
1192
1193         c->gpfifo.get = c->gpfifo.put = 0;
1194
1195         err = gk20a_get_sgtable(d, &sgt,
1196                         c->gpfifo.cpu_va, c->gpfifo.iova, c->gpfifo.size);
1197         if (err) {
1198                 gk20a_err(d, "%s: failed to allocate sg table\n", __func__);
1199                 goto clean_up;
1200         }
1201
1202         c->gpfifo.gpu_va = gk20a_gmmu_map(ch_vm,
1203                                         &sgt,
1204                                         c->gpfifo.size,
1205                                         0, /* flags */
1206                                         gk20a_mem_flag_none);
1207         if (!c->gpfifo.gpu_va) {
1208                 gk20a_err(d, "channel %d : failed to map"
1209                            " gpu_va for gpfifo", c->hw_chid);
1210                 err = -ENOMEM;
1211                 goto clean_up_sgt;
1212         }
1213
1214         gk20a_dbg_info("channel %d : gpfifo_base 0x%016llx, size %d",
1215                 c->hw_chid, c->gpfifo.gpu_va, c->gpfifo.entry_num);
1216
1217         channel_gk20a_setup_userd(c);
1218
1219         err = g->ops.fifo.setup_ramfc(c, c->gpfifo.gpu_va, c->gpfifo.entry_num);
1220         if (err)
1221                 goto clean_up_unmap;
1222
1223         /* TBD: setup engine contexts */
1224
1225         err = channel_gk20a_alloc_priv_cmdbuf(c);
1226         if (err)
1227                 goto clean_up_unmap;
1228
1229         err = channel_gk20a_update_runlist(c, true);
1230         if (err)
1231                 goto clean_up_unmap;
1232
1233         g->ops.fifo.bind_channel(c);
1234
1235         gk20a_free_sgtable(&sgt);
1236
1237         gk20a_dbg_fn("done");
1238         return 0;
1239
1240 clean_up_unmap:
1241         gk20a_gmmu_unmap(ch_vm, c->gpfifo.gpu_va,
1242                 c->gpfifo.size, gk20a_mem_flag_none);
1243 clean_up_sgt:
1244         gk20a_free_sgtable(&sgt);
1245 clean_up:
1246         dma_free_coherent(d, c->gpfifo.size,
1247                 c->gpfifo.cpu_va, c->gpfifo.iova);
1248         c->gpfifo.cpu_va = NULL;
1249         c->gpfifo.iova = 0;
1250         memset(&c->gpfifo, 0, sizeof(struct gpfifo_desc));
1251         gk20a_err(d, "fail");
1252         return err;
1253 }
1254
1255 static inline bool check_gp_put(struct gk20a *g,
1256                                 struct channel_gk20a *c)
1257 {
1258         u32 put;
1259         /* gp_put changed unexpectedly since last update? */
1260         put = gk20a_bar1_readl(g,
1261                c->userd_gpu_va + 4 * ram_userd_gp_put_w());
1262         if (c->gpfifo.put != put) {
1263                 /*TBD: BUG_ON/teardown on this*/
1264                 gk20a_err(dev_from_gk20a(g), "gp_put changed unexpectedly "
1265                            "since last update");
1266                 c->gpfifo.put = put;
1267                 return false; /* surprise! */
1268         }
1269         return true; /* checked out ok */
1270 }
1271
1272 /* Update with this periodically to determine how the gpfifo is draining. */
1273 static inline u32 update_gp_get(struct gk20a *g,
1274                                 struct channel_gk20a *c)
1275 {
1276         u32 new_get = gk20a_bar1_readl(g,
1277                 c->userd_gpu_va + sizeof(u32) * ram_userd_gp_get_w());
1278         if (new_get < c->gpfifo.get)
1279                 c->gpfifo.wrap = !c->gpfifo.wrap;
1280         c->gpfifo.get = new_get;
1281         return new_get;
1282 }
1283
1284 static inline u32 gp_free_count(struct channel_gk20a *c)
1285 {
1286         return (c->gpfifo.entry_num - (c->gpfifo.put - c->gpfifo.get) - 1) %
1287                 c->gpfifo.entry_num;
1288 }
1289
1290 bool gk20a_channel_update_and_check_timeout(struct channel_gk20a *ch,
1291                 u32 timeout_delta_ms)
1292 {
1293         u32 gpfifo_get = update_gp_get(ch->g, ch);
1294         /* Count consequent timeout isr */
1295         if (gpfifo_get == ch->timeout_gpfifo_get) {
1296                 /* we didn't advance since previous channel timeout check */
1297                 ch->timeout_accumulated_ms += timeout_delta_ms;
1298         } else {
1299                 /* first timeout isr encountered */
1300                 ch->timeout_accumulated_ms = timeout_delta_ms;
1301         }
1302
1303         ch->timeout_gpfifo_get = gpfifo_get;
1304
1305         return ch->g->timeouts_enabled &&
1306                 ch->timeout_accumulated_ms > ch->timeout_ms_max;
1307 }
1308
1309
1310 /* Issue a syncpoint increment *preceded* by a wait-for-idle
1311  * command.  All commands on the channel will have been
1312  * consumed at the time the fence syncpoint increment occurs.
1313  */
1314 static int gk20a_channel_submit_wfi(struct channel_gk20a *c)
1315 {
1316         struct priv_cmd_entry *cmd = NULL;
1317         struct gk20a *g = c->g;
1318         u32 free_count;
1319         int err;
1320
1321         if (c->has_timedout)
1322                 return -ETIMEDOUT;
1323
1324         update_gp_get(g, c);
1325         free_count = gp_free_count(c);
1326         if (unlikely(!free_count)) {
1327                 gk20a_err(dev_from_gk20a(g),
1328                            "not enough gpfifo space");
1329                 return -EAGAIN;
1330         }
1331
1332         mutex_lock(&c->submit_lock);
1333
1334         if (!c->sync) {
1335                 c->sync = gk20a_channel_sync_create(c);
1336                 if (!c->sync) {
1337                         mutex_unlock(&c->submit_lock);
1338                         return -ENOMEM;
1339                 }
1340         }
1341
1342         gk20a_fence_put(c->last_submit.pre_fence);
1343         gk20a_fence_put(c->last_submit.post_fence);
1344         c->last_submit.pre_fence = NULL;
1345         c->last_submit.post_fence = NULL;
1346
1347         err = c->sync->incr_wfi(c->sync, &cmd, &c->last_submit.post_fence);
1348         if (unlikely(err)) {
1349                 mutex_unlock(&c->submit_lock);
1350                 return err;
1351         }
1352
1353         WARN_ON(!c->last_submit.post_fence->wfi);
1354
1355         c->gpfifo.cpu_va[c->gpfifo.put].entry0 = u64_lo32(cmd->gva);
1356         c->gpfifo.cpu_va[c->gpfifo.put].entry1 = u64_hi32(cmd->gva) |
1357                 pbdma_gp_entry1_length_f(cmd->size);
1358
1359         c->gpfifo.put = (c->gpfifo.put + 1) & (c->gpfifo.entry_num - 1);
1360
1361         /* save gp_put */
1362         cmd->gp_put = c->gpfifo.put;
1363
1364         gk20a_bar1_writel(g,
1365                 c->userd_gpu_va + 4 * ram_userd_gp_put_w(),
1366                 c->gpfifo.put);
1367
1368         mutex_unlock(&c->submit_lock);
1369
1370         gk20a_dbg_info("post-submit put %d, get %d, size %d",
1371                 c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num);
1372
1373         return 0;
1374 }
1375
1376 static u32 get_gp_free_count(struct channel_gk20a *c)
1377 {
1378         update_gp_get(c->g, c);
1379         return gp_free_count(c);
1380 }
1381
1382 static void trace_write_pushbuffer(struct channel_gk20a *c,
1383                                    struct nvgpu_gpfifo *g)
1384 {
1385         void *mem = NULL;
1386         unsigned int words;
1387         u64 offset;
1388         struct dma_buf *dmabuf = NULL;
1389
1390         if (gk20a_debug_trace_cmdbuf) {
1391                 u64 gpu_va = (u64)g->entry0 |
1392                         (u64)((u64)pbdma_gp_entry1_get_hi_v(g->entry1) << 32);
1393                 int err;
1394
1395                 words = pbdma_gp_entry1_length_v(g->entry1);
1396                 err = gk20a_vm_find_buffer(c->vm, gpu_va, &dmabuf, &offset);
1397                 if (!err)
1398                         mem = dma_buf_vmap(dmabuf);
1399         }
1400
1401         if (mem) {
1402                 u32 i;
1403                 /*
1404                  * Write in batches of 128 as there seems to be a limit
1405                  * of how much you can output to ftrace at once.
1406                  */
1407                 for (i = 0; i < words; i += 128U) {
1408                         trace_gk20a_push_cmdbuf(
1409                                 c->g->dev->name,
1410                                 0,
1411                                 min(words - i, 128U),
1412                                 offset + i * sizeof(u32),
1413                                 mem);
1414                 }
1415                 dma_buf_vunmap(dmabuf, mem);
1416         }
1417 }
1418
1419 static void trace_write_pushbuffer_range(struct channel_gk20a *c,
1420                                          struct nvgpu_gpfifo *g,
1421                                          int count)
1422 {
1423         if (gk20a_debug_trace_cmdbuf) {
1424                 int i;
1425                 struct nvgpu_gpfifo *gp = g;
1426                 for (i = 0; i < count; i++, gp++)
1427                         trace_write_pushbuffer(c, gp);
1428         }
1429 }
1430
1431 static int gk20a_channel_add_job(struct channel_gk20a *c,
1432                                  struct gk20a_fence *pre_fence,
1433                                  struct gk20a_fence *post_fence)
1434 {
1435         struct vm_gk20a *vm = c->vm;
1436         struct channel_gk20a_job *job = NULL;
1437         struct mapped_buffer_node **mapped_buffers = NULL;
1438         int err = 0, num_mapped_buffers;
1439
1440         /* job needs reference to this vm */
1441         gk20a_vm_get(vm);
1442
1443         err = gk20a_vm_get_buffers(vm, &mapped_buffers, &num_mapped_buffers);
1444         if (err) {
1445                 gk20a_vm_put(vm);
1446                 return err;
1447         }
1448
1449         job = kzalloc(sizeof(*job), GFP_KERNEL);
1450         if (!job) {
1451                 gk20a_vm_put_buffers(vm, mapped_buffers, num_mapped_buffers);
1452                 gk20a_vm_put(vm);
1453                 return -ENOMEM;
1454         }
1455
1456         job->num_mapped_buffers = num_mapped_buffers;
1457         job->mapped_buffers = mapped_buffers;
1458         job->pre_fence = gk20a_fence_get(pre_fence);
1459         job->post_fence = gk20a_fence_get(post_fence);
1460
1461         mutex_lock(&c->jobs_lock);
1462         list_add_tail(&job->list, &c->jobs);
1463         mutex_unlock(&c->jobs_lock);
1464
1465         return 0;
1466 }
1467
1468 void gk20a_channel_update(struct channel_gk20a *c, int nr_completed)
1469 {
1470         struct vm_gk20a *vm = c->vm;
1471         struct channel_gk20a_job *job, *n;
1472
1473         trace_gk20a_channel_update(c);
1474
1475         wake_up(&c->submit_wq);
1476
1477         mutex_lock(&c->submit_lock);
1478         mutex_lock(&c->jobs_lock);
1479         list_for_each_entry_safe(job, n, &c->jobs, list) {
1480                 bool completed = gk20a_fence_is_expired(job->post_fence);
1481                 if (!completed)
1482                         break;
1483
1484                 c->sync->signal_timeline(c->sync);
1485
1486                 gk20a_vm_put_buffers(vm, job->mapped_buffers,
1487                                 job->num_mapped_buffers);
1488
1489                 /* Close the fences (this will unref the semaphores and release
1490                  * them to the pool). */
1491                 gk20a_fence_put(job->pre_fence);
1492                 gk20a_fence_put(job->post_fence);
1493
1494                 /* job is done. release its reference to vm */
1495                 gk20a_vm_put(vm);
1496
1497                 list_del_init(&job->list);
1498                 kfree(job);
1499                 gk20a_idle(c->g->dev);
1500         }
1501
1502         /*
1503          * If job list is empty then channel is idle and we can free
1504          * the syncpt here (given aggressive_destroy flag is set)
1505          * Note: check if last submit is complete before destroying
1506          * the sync resource
1507          */
1508         if (list_empty(&c->jobs)) {
1509                 if (c->sync && c->sync->aggressive_destroy &&
1510                           gk20a_fence_is_expired(c->last_submit.post_fence)) {
1511                         c->sync->destroy(c->sync);
1512                         c->sync = NULL;
1513                 }
1514         }
1515         mutex_unlock(&c->jobs_lock);
1516         mutex_unlock(&c->submit_lock);
1517
1518         if (c->update_fn)
1519                 schedule_work(&c->update_fn_work);
1520 }
1521
1522 int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
1523                                 struct nvgpu_gpfifo *gpfifo,
1524                                 u32 num_entries,
1525                                 u32 flags,
1526                                 struct nvgpu_fence *fence,
1527                                 struct gk20a_fence **fence_out)
1528 {
1529         struct gk20a *g = c->g;
1530         struct device *d = dev_from_gk20a(g);
1531         int err = 0;
1532         int start, end;
1533         int wait_fence_fd = -1;
1534         struct priv_cmd_entry *wait_cmd = NULL;
1535         struct priv_cmd_entry *incr_cmd = NULL;
1536         struct gk20a_fence *pre_fence = NULL;
1537         struct gk20a_fence *post_fence = NULL;
1538         /* we might need two extra gpfifo entries - one for pre fence
1539          * and one for post fence. */
1540         const int extra_entries = 2;
1541         bool need_wfi = !(flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SUPPRESS_WFI);
1542
1543         if (c->has_timedout)
1544                 return -ETIMEDOUT;
1545
1546         /* fifo not large enough for request. Return error immediately */
1547         if (c->gpfifo.entry_num < num_entries) {
1548                 gk20a_err(d, "not enough gpfifo space allocated");
1549                 return -ENOMEM;
1550         }
1551
1552         if ((flags & (NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT |
1553                       NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET)) &&
1554             !fence)
1555                 return -EINVAL;
1556
1557         /* an address space needs to have been bound at this point. */
1558         if (!gk20a_channel_as_bound(c)) {
1559                 gk20a_err(d,
1560                             "not bound to an address space at time of gpfifo"
1561                             " submission.");
1562                 return -EINVAL;
1563         }
1564
1565 #ifdef CONFIG_DEBUG_FS
1566         /* update debug settings */
1567         if (g->ops.ltc.sync_debugfs)
1568                 g->ops.ltc.sync_debugfs(g);
1569 #endif
1570
1571         gk20a_dbg_info("channel %d", c->hw_chid);
1572
1573         /* gk20a_channel_update releases this ref. */
1574         err = gk20a_busy(g->dev);
1575         if (err) {
1576                 gk20a_err(d, "failed to host gk20a to submit gpfifo");
1577                 return err;
1578         }
1579
1580         trace_gk20a_channel_submit_gpfifo(c->g->dev->name,
1581                                           c->hw_chid,
1582                                           num_entries,
1583                                           flags,
1584                                           fence ? fence->id : 0,
1585                                           fence ? fence->value : 0);
1586         check_gp_put(g, c);
1587         update_gp_get(g, c);
1588
1589         gk20a_dbg_info("pre-submit put %d, get %d, size %d",
1590                 c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num);
1591
1592         /* Invalidate tlb if it's dirty...                                   */
1593         /* TBD: this should be done in the cmd stream, not with PRIs.        */
1594         /* We don't know what context is currently running...                */
1595         /* Note also: there can be more than one context associated with the */
1596         /* address space (vm).   */
1597         g->ops.mm.tlb_invalidate(c->vm);
1598
1599         /* Make sure we have enough space for gpfifo entries. If not,
1600          * wait for signals from completed submits */
1601         if (gp_free_count(c) < num_entries + extra_entries) {
1602                 trace_gk20a_gpfifo_submit_wait_for_space(c->g->dev->name);
1603                 err = wait_event_interruptible(c->submit_wq,
1604                         get_gp_free_count(c) >= num_entries + extra_entries ||
1605                         c->has_timedout);
1606                 trace_gk20a_gpfifo_submit_wait_for_space_done(c->g->dev->name);
1607         }
1608
1609         if (c->has_timedout) {
1610                 err = -ETIMEDOUT;
1611                 goto clean_up;
1612         }
1613
1614         if (err) {
1615                 gk20a_err(d, "timeout waiting for gpfifo space");
1616                 err = -EAGAIN;
1617                 goto clean_up;
1618         }
1619
1620         mutex_lock(&c->submit_lock);
1621
1622         if (!c->sync) {
1623                 c->sync = gk20a_channel_sync_create(c);
1624                 if (!c->sync) {
1625                         err = -ENOMEM;
1626                         mutex_unlock(&c->submit_lock);
1627                         goto clean_up;
1628                 }
1629         }
1630
1631         /*
1632          * optionally insert syncpt wait in the beginning of gpfifo submission
1633          * when user requested and the wait hasn't expired.
1634          * validate that the id makes sense, elide if not
1635          * the only reason this isn't being unceremoniously killed is to
1636          * keep running some tests which trigger this condition
1637          */
1638         if (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT) {
1639                 if (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE) {
1640                         wait_fence_fd = fence->id;
1641                         err = c->sync->wait_fd(c->sync, wait_fence_fd,
1642                                         &wait_cmd, &pre_fence);
1643                 } else {
1644                         err = c->sync->wait_syncpt(c->sync, fence->id,
1645                                         fence->value, &wait_cmd, &pre_fence);
1646                 }
1647         }
1648         if (err) {
1649                 mutex_unlock(&c->submit_lock);
1650                 goto clean_up;
1651         }
1652
1653
1654         /* always insert syncpt increment at end of gpfifo submission
1655            to keep track of method completion for idle railgating */
1656         if (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET)
1657                 err = c->sync->incr_user(c->sync, wait_fence_fd, &incr_cmd,
1658                                          &post_fence, need_wfi);
1659         else
1660                 err = c->sync->incr(c->sync, &incr_cmd,
1661                                     &post_fence);
1662         if (err) {
1663                 mutex_unlock(&c->submit_lock);
1664                 goto clean_up;
1665         }
1666
1667         if (wait_cmd) {
1668                 c->gpfifo.cpu_va[c->gpfifo.put].entry0 =
1669                         u64_lo32(wait_cmd->gva);
1670                 c->gpfifo.cpu_va[c->gpfifo.put].entry1 =
1671                         u64_hi32(wait_cmd->gva) |
1672                         pbdma_gp_entry1_length_f(wait_cmd->size);
1673                 trace_gk20a_push_cmdbuf(c->g->dev->name,
1674                         0, wait_cmd->size, 0, wait_cmd->ptr);
1675
1676                 c->gpfifo.put = (c->gpfifo.put + 1) &
1677                         (c->gpfifo.entry_num - 1);
1678
1679                 /* save gp_put */
1680                 wait_cmd->gp_put = c->gpfifo.put;
1681         }
1682
1683         /*
1684          * Copy source gpfifo entries into the gpfifo ring buffer,
1685          * potentially splitting into two memcpies to handle the
1686          * ring buffer wrap-around case.
1687          */
1688         start = c->gpfifo.put;
1689         end = start + num_entries;
1690
1691         if (end > c->gpfifo.entry_num) {
1692                 int length0 = c->gpfifo.entry_num - start;
1693                 int length1 = num_entries - length0;
1694
1695                 memcpy(c->gpfifo.cpu_va + start, gpfifo,
1696                        length0 * sizeof(*gpfifo));
1697
1698                 memcpy(c->gpfifo.cpu_va, gpfifo + length0,
1699                        length1 * sizeof(*gpfifo));
1700
1701                 trace_write_pushbuffer_range(c, gpfifo, length0);
1702                 trace_write_pushbuffer_range(c, gpfifo + length0, length1);
1703         } else {
1704                 memcpy(c->gpfifo.cpu_va + start, gpfifo,
1705                        num_entries * sizeof(*gpfifo));
1706
1707                 trace_write_pushbuffer_range(c, gpfifo, num_entries);
1708         }
1709         c->gpfifo.put = (c->gpfifo.put + num_entries) &
1710                 (c->gpfifo.entry_num - 1);
1711
1712         if (incr_cmd) {
1713                 c->gpfifo.cpu_va[c->gpfifo.put].entry0 =
1714                         u64_lo32(incr_cmd->gva);
1715                 c->gpfifo.cpu_va[c->gpfifo.put].entry1 =
1716                         u64_hi32(incr_cmd->gva) |
1717                         pbdma_gp_entry1_length_f(incr_cmd->size);
1718                 trace_gk20a_push_cmdbuf(c->g->dev->name,
1719                         0, incr_cmd->size, 0, incr_cmd->ptr);
1720
1721                 c->gpfifo.put = (c->gpfifo.put + 1) &
1722                         (c->gpfifo.entry_num - 1);
1723
1724                 /* save gp_put */
1725                 incr_cmd->gp_put = c->gpfifo.put;
1726         }
1727
1728         gk20a_fence_put(c->last_submit.pre_fence);
1729         gk20a_fence_put(c->last_submit.post_fence);
1730         c->last_submit.pre_fence = pre_fence;
1731         c->last_submit.post_fence = post_fence;
1732         if (fence_out)
1733                 *fence_out = gk20a_fence_get(post_fence);
1734
1735         /* TODO! Check for errors... */
1736         gk20a_channel_add_job(c, pre_fence, post_fence);
1737
1738         c->cmds_pending = true;
1739         gk20a_bar1_writel(g,
1740                 c->userd_gpu_va + 4 * ram_userd_gp_put_w(),
1741                 c->gpfifo.put);
1742
1743         mutex_unlock(&c->submit_lock);
1744
1745         trace_gk20a_channel_submitted_gpfifo(c->g->dev->name,
1746                                              c->hw_chid,
1747                                              num_entries,
1748                                              flags,
1749                                              post_fence->syncpt_id,
1750                                              post_fence->syncpt_value);
1751
1752         gk20a_dbg_info("post-submit put %d, get %d, size %d",
1753                 c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num);
1754
1755         gk20a_dbg_fn("done");
1756         return err;
1757
1758 clean_up:
1759         gk20a_err(d, "fail");
1760         free_priv_cmdbuf(c, wait_cmd);
1761         free_priv_cmdbuf(c, incr_cmd);
1762         gk20a_fence_put(pre_fence);
1763         gk20a_fence_put(post_fence);
1764         gk20a_idle(g->dev);
1765         return err;
1766 }
1767
1768 int gk20a_init_channel_support(struct gk20a *g, u32 chid)
1769 {
1770         struct channel_gk20a *c = g->fifo.channel+chid;
1771         c->g = g;
1772         c->in_use = false;
1773         c->hw_chid = chid;
1774         c->bound = false;
1775         mutex_init(&c->jobs_lock);
1776         mutex_init(&c->submit_lock);
1777         INIT_LIST_HEAD(&c->jobs);
1778 #if defined(CONFIG_GK20A_CYCLE_STATS)
1779         mutex_init(&c->cyclestate.cyclestate_buffer_mutex);
1780 #endif
1781         INIT_LIST_HEAD(&c->dbg_s_list);
1782         mutex_init(&c->dbg_s_lock);
1783
1784         return 0;
1785 }
1786
1787 int gk20a_channel_finish(struct channel_gk20a *ch, unsigned long timeout)
1788 {
1789         int err = 0;
1790         struct gk20a_fence *fence = ch->last_submit.post_fence;
1791
1792         if (!ch->cmds_pending)
1793                 return 0;
1794
1795         /* Do not wait for a timedout channel */
1796         if (ch->has_timedout)
1797                 return -ETIMEDOUT;
1798
1799         if (!(fence && fence->wfi) && ch->obj_class != KEPLER_C) {
1800                 gk20a_dbg_fn("issuing wfi, incr to finish the channel");
1801                 err = gk20a_channel_submit_wfi(ch);
1802                 fence = ch->last_submit.post_fence;
1803         }
1804         if (err)
1805                 return err;
1806
1807         BUG_ON(!(fence && fence->wfi) && ch->obj_class != KEPLER_C);
1808
1809         gk20a_dbg_fn("waiting for channel to finish thresh:%d sema:%p",
1810                      fence->syncpt_value, fence->semaphore);
1811
1812         err = gk20a_fence_wait(fence, timeout);
1813         if (WARN_ON(err))
1814                 dev_warn(dev_from_gk20a(ch->g),
1815                        "timed out waiting for gk20a channel to finish");
1816         else
1817                 ch->cmds_pending = false;
1818
1819         return err;
1820 }
1821
1822 static int gk20a_channel_wait_semaphore(struct channel_gk20a *ch,
1823                                         ulong id, u32 offset,
1824                                         u32 payload, long timeout)
1825 {
1826         struct platform_device *pdev = ch->g->dev;
1827         struct dma_buf *dmabuf;
1828         void *data;
1829         u32 *semaphore;
1830         int ret = 0;
1831         long remain;
1832
1833         /* do not wait if channel has timed out */
1834         if (ch->has_timedout)
1835                 return -ETIMEDOUT;
1836
1837         dmabuf = dma_buf_get(id);
1838         if (IS_ERR(dmabuf)) {
1839                 gk20a_err(&pdev->dev, "invalid notifier nvmap handle 0x%lx",
1840                            id);
1841                 return -EINVAL;
1842         }
1843
1844         data = dma_buf_kmap(dmabuf, offset >> PAGE_SHIFT);
1845         if (!data) {
1846                 gk20a_err(&pdev->dev, "failed to map notifier memory");
1847                 ret = -EINVAL;
1848                 goto cleanup_put;
1849         }
1850
1851         semaphore = data + (offset & ~PAGE_MASK);
1852
1853         remain = wait_event_interruptible_timeout(
1854                         ch->semaphore_wq,
1855                         *semaphore == payload || ch->has_timedout,
1856                         timeout);
1857
1858         if (remain == 0 && *semaphore != payload)
1859                 ret = -ETIMEDOUT;
1860         else if (remain < 0)
1861                 ret = remain;
1862
1863         dma_buf_kunmap(dmabuf, offset >> PAGE_SHIFT, data);
1864 cleanup_put:
1865         dma_buf_put(dmabuf);
1866         return ret;
1867 }
1868
1869 static int gk20a_channel_wait(struct channel_gk20a *ch,
1870                               struct nvgpu_wait_args *args)
1871 {
1872         struct device *d = dev_from_gk20a(ch->g);
1873         struct dma_buf *dmabuf;
1874         struct notification *notif;
1875         struct timespec tv;
1876         u64 jiffies;
1877         ulong id;
1878         u32 offset;
1879         unsigned long timeout;
1880         int remain, ret = 0;
1881
1882         gk20a_dbg_fn("");
1883
1884         if (ch->has_timedout)
1885                 return -ETIMEDOUT;
1886
1887         if (args->timeout == NVGPU_NO_TIMEOUT)
1888                 timeout = MAX_SCHEDULE_TIMEOUT;
1889         else
1890                 timeout = (u32)msecs_to_jiffies(args->timeout);
1891
1892         switch (args->type) {
1893         case NVGPU_WAIT_TYPE_NOTIFIER:
1894                 id = args->condition.notifier.dmabuf_fd;
1895                 offset = args->condition.notifier.offset;
1896
1897                 dmabuf = dma_buf_get(id);
1898                 if (IS_ERR(dmabuf)) {
1899                         gk20a_err(d, "invalid notifier nvmap handle 0x%lx",
1900                                    id);
1901                         return -EINVAL;
1902                 }
1903
1904                 notif = dma_buf_vmap(dmabuf);
1905                 if (!notif) {
1906                         gk20a_err(d, "failed to map notifier memory");
1907                         return -ENOMEM;
1908                 }
1909
1910                 notif = (struct notification *)((uintptr_t)notif + offset);
1911
1912                 /* user should set status pending before
1913                  * calling this ioctl */
1914                 remain = wait_event_interruptible_timeout(
1915                                 ch->notifier_wq,
1916                                 notif->status == 0 || ch->has_timedout,
1917                                 timeout);
1918
1919                 if (remain == 0 && notif->status != 0) {
1920                         ret = -ETIMEDOUT;
1921                         goto notif_clean_up;
1922                 } else if (remain < 0) {
1923                         ret = -EINTR;
1924                         goto notif_clean_up;
1925                 }
1926
1927                 /* TBD: fill in correct information */
1928                 jiffies = get_jiffies_64();
1929                 jiffies_to_timespec(jiffies, &tv);
1930                 notif->timestamp.nanoseconds[0] = tv.tv_nsec;
1931                 notif->timestamp.nanoseconds[1] = tv.tv_sec;
1932                 notif->info32 = 0xDEADBEEF; /* should be object name */
1933                 notif->info16 = ch->hw_chid; /* should be method offset */
1934
1935 notif_clean_up:
1936                 dma_buf_vunmap(dmabuf, notif);
1937                 return ret;
1938
1939         case NVGPU_WAIT_TYPE_SEMAPHORE:
1940                 ret = gk20a_channel_wait_semaphore(ch,
1941                                 args->condition.semaphore.dmabuf_fd,
1942                                 args->condition.semaphore.offset,
1943                                 args->condition.semaphore.payload,
1944                                 timeout);
1945
1946                 break;
1947
1948         default:
1949                 ret = -EINVAL;
1950                 break;
1951         }
1952
1953         return ret;
1954 }
1955
1956 /* poll events for semaphores */
1957
1958 static void gk20a_channel_events_enable(struct channel_gk20a_poll_events *ev)
1959 {
1960         gk20a_dbg_fn("");
1961
1962         mutex_lock(&ev->lock);
1963
1964         ev->events_enabled = true;
1965         ev->num_pending_events = 0;
1966
1967         mutex_unlock(&ev->lock);
1968 }
1969
1970 static void gk20a_channel_events_disable(struct channel_gk20a_poll_events *ev)
1971 {
1972         gk20a_dbg_fn("");
1973
1974         mutex_lock(&ev->lock);
1975
1976         ev->events_enabled = false;
1977         ev->num_pending_events = 0;
1978
1979         mutex_unlock(&ev->lock);
1980 }
1981
1982 static void gk20a_channel_events_clear(struct channel_gk20a_poll_events *ev)
1983 {
1984         gk20a_dbg_fn("");
1985
1986         mutex_lock(&ev->lock);
1987
1988         if (ev->events_enabled &&
1989                         ev->num_pending_events > 0)
1990                 ev->num_pending_events--;
1991
1992         mutex_unlock(&ev->lock);
1993 }
1994
1995 static int gk20a_channel_events_ctrl(struct channel_gk20a *ch,
1996                           struct nvgpu_channel_events_ctrl_args *args)
1997 {
1998         int ret = 0;
1999
2000         gk20a_dbg(gpu_dbg_fn | gpu_dbg_info,
2001                         "channel events ctrl cmd %d", args->cmd);
2002
2003         switch (args->cmd) {
2004         case NVGPU_IOCTL_CHANNEL_EVENTS_CTRL_CMD_ENABLE:
2005                 gk20a_channel_events_enable(&ch->poll_events);
2006                 break;
2007
2008         case NVGPU_IOCTL_CHANNEL_EVENTS_CTRL_CMD_DISABLE:
2009                 gk20a_channel_events_disable(&ch->poll_events);
2010                 break;
2011
2012         case NVGPU_IOCTL_CHANNEL_EVENTS_CTRL_CMD_CLEAR:
2013                 gk20a_channel_events_clear(&ch->poll_events);
2014                 break;
2015
2016         default:
2017                 gk20a_err(dev_from_gk20a(ch->g),
2018                            "unrecognized channel events ctrl cmd: 0x%x",
2019                            args->cmd);
2020                 ret = -EINVAL;
2021                 break;
2022         }
2023
2024         return ret;
2025 }
2026
2027 void gk20a_channel_event(struct channel_gk20a *ch)
2028 {
2029         mutex_lock(&ch->poll_events.lock);
2030
2031         if (ch->poll_events.events_enabled) {
2032                 gk20a_dbg_info("posting event on channel id %d",
2033                                 ch->hw_chid);
2034                 gk20a_dbg_info("%d channel events pending",
2035                                 ch->poll_events.num_pending_events);
2036
2037                 ch->poll_events.num_pending_events++;
2038                 /* not waking up here, caller does that */
2039         }
2040
2041         mutex_unlock(&ch->poll_events.lock);
2042 }
2043
2044 unsigned int gk20a_channel_poll(struct file *filep, poll_table *wait)
2045 {
2046         unsigned int mask = 0;
2047         struct channel_gk20a *ch = filep->private_data;
2048
2049         gk20a_dbg(gpu_dbg_fn | gpu_dbg_info, "");
2050
2051         poll_wait(filep, &ch->semaphore_wq, wait);
2052
2053         mutex_lock(&ch->poll_events.lock);
2054
2055         if (ch->poll_events.events_enabled &&
2056                         ch->poll_events.num_pending_events > 0) {
2057                 gk20a_dbg_info("found pending event on channel id %d",
2058                                 ch->hw_chid);
2059                 gk20a_dbg_info("%d channel events pending",
2060                                 ch->poll_events.num_pending_events);
2061                 mask = (POLLPRI | POLLIN);
2062         }
2063
2064         mutex_unlock(&ch->poll_events.lock);
2065
2066         return mask;
2067 }
2068
2069 static int gk20a_channel_set_priority(struct channel_gk20a *ch,
2070                 u32 priority)
2071 {
2072         u32 timeslice_timeout;
2073         /* set priority of graphics channel */
2074         switch (priority) {
2075         case NVGPU_PRIORITY_LOW:
2076                 /* 64 << 3 = 512us */
2077                 timeslice_timeout = 64;
2078                 break;
2079         case NVGPU_PRIORITY_MEDIUM:
2080                 /* 128 << 3 = 1024us */
2081                 timeslice_timeout = 128;
2082                 break;
2083         case NVGPU_PRIORITY_HIGH:
2084                 /* 255 << 3 = 2048us */
2085                 timeslice_timeout = 255;
2086                 break;
2087         default:
2088                 pr_err("Unsupported priority");
2089                 return -EINVAL;
2090         }
2091         channel_gk20a_set_schedule_params(ch,
2092                         timeslice_timeout);
2093         return 0;
2094 }
2095
2096 static int gk20a_channel_zcull_bind(struct channel_gk20a *ch,
2097                             struct nvgpu_zcull_bind_args *args)
2098 {
2099         struct gk20a *g = ch->g;
2100         struct gr_gk20a *gr = &g->gr;
2101
2102         gk20a_dbg_fn("");
2103
2104         return g->ops.gr.bind_ctxsw_zcull(g, gr, ch,
2105                                 args->gpu_va, args->mode);
2106 }
2107
2108 /* in this context the "channel" is the host1x channel which
2109  * maps to *all* gk20a channels */
2110 int gk20a_channel_suspend(struct gk20a *g)
2111 {
2112         struct fifo_gk20a *f = &g->fifo;
2113         u32 chid;
2114         bool channels_in_use = false;
2115         int err;
2116
2117         gk20a_dbg_fn("");
2118
2119         /* wait for engine idle */
2120         err = g->ops.fifo.wait_engine_idle(g);
2121         if (err)
2122                 return err;
2123
2124         for (chid = 0; chid < f->num_channels; chid++) {
2125                 struct channel_gk20a *ch = &f->channel[chid];
2126                 if (ch->in_use) {
2127
2128                         gk20a_dbg_info("suspend channel %d", chid);
2129                         /* disable channel */
2130                         g->ops.fifo.disable_channel(ch);
2131                         /* preempt the channel */
2132                         g->ops.fifo.preempt_channel(g, chid);
2133                         /* wait for channel update notifiers */
2134                         if (ch->update_fn &&
2135                                         work_pending(&ch->update_fn_work))
2136                                 flush_work(&ch->update_fn_work);
2137
2138                         channels_in_use = true;
2139                 }
2140         }
2141
2142         if (channels_in_use) {
2143                 g->ops.fifo.update_runlist(g, 0, ~0, false, true);
2144
2145                 for (chid = 0; chid < f->num_channels; chid++) {
2146                         if (f->channel[chid].in_use)
2147                                 g->ops.fifo.unbind_channel(&f->channel[chid]);
2148                 }
2149         }
2150
2151         gk20a_dbg_fn("done");
2152         return 0;
2153 }
2154
2155 /* in this context the "channel" is the host1x channel which
2156  * maps to *all* gk20a channels */
2157 int gk20a_channel_resume(struct gk20a *g)
2158 {
2159         struct fifo_gk20a *f = &g->fifo;
2160         u32 chid;
2161         bool channels_in_use = false;
2162
2163         gk20a_dbg_fn("");
2164
2165         for (chid = 0; chid < f->num_channels; chid++) {
2166                 if (f->channel[chid].in_use) {
2167                         gk20a_dbg_info("resume channel %d", chid);
2168                         g->ops.fifo.bind_channel(&f->channel[chid]);
2169                         channels_in_use = true;
2170                 }
2171         }
2172
2173         if (channels_in_use)
2174                 g->ops.fifo.update_runlist(g, 0, ~0, true, true);
2175
2176         gk20a_dbg_fn("done");
2177         return 0;
2178 }
2179
2180 void gk20a_channel_semaphore_wakeup(struct gk20a *g)
2181 {
2182         struct fifo_gk20a *f = &g->fifo;
2183         u32 chid;
2184
2185         gk20a_dbg_fn("");
2186
2187         for (chid = 0; chid < f->num_channels; chid++) {
2188                 struct channel_gk20a *c = g->fifo.channel+chid;
2189                 if (c->in_use) {
2190                         wake_up_interruptible_all(&c->semaphore_wq);
2191                         gk20a_channel_update(c, 0);
2192                 }
2193         }
2194 }
2195
2196 static int gk20a_ioctl_channel_submit_gpfifo(
2197         struct channel_gk20a *ch,
2198         struct nvgpu_submit_gpfifo_args *args)
2199 {
2200         struct gk20a_fence *fence_out;
2201         void *gpfifo;
2202         u32 size;
2203         int ret = 0;
2204
2205         gk20a_dbg_fn("");
2206
2207         if (ch->has_timedout)
2208                 return -ETIMEDOUT;
2209
2210         size = args->num_entries * sizeof(struct nvgpu_gpfifo);
2211
2212         gpfifo = kzalloc(size, GFP_KERNEL);
2213         if (!gpfifo)
2214                 return -ENOMEM;
2215
2216         if (copy_from_user(gpfifo,
2217                            (void __user *)(uintptr_t)args->gpfifo, size)) {
2218                 ret = -EINVAL;
2219                 goto clean_up;
2220         }
2221
2222         ret = gk20a_submit_channel_gpfifo(ch, gpfifo, args->num_entries,
2223                                           args->flags, &args->fence,
2224                                           &fence_out);
2225
2226         if (ret)
2227                 goto clean_up;
2228
2229         /* Convert fence_out to something we can pass back to user space. */
2230         if (args->flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET) {
2231                 if (args->flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE) {
2232                         int fd = gk20a_fence_install_fd(fence_out);
2233                         if (fd < 0)
2234                                 ret = fd;
2235                         else
2236                                 args->fence.id = fd;
2237                 } else {
2238                         args->fence.id = fence_out->syncpt_id;
2239                         args->fence.value = fence_out->syncpt_value;
2240                 }
2241         }
2242         gk20a_fence_put(fence_out);
2243
2244 clean_up:
2245         kfree(gpfifo);
2246         return ret;
2247 }
2248
2249 void gk20a_init_channel(struct gpu_ops *gops)
2250 {
2251         gops->fifo.bind_channel = channel_gk20a_bind;
2252         gops->fifo.unbind_channel = channel_gk20a_unbind;
2253         gops->fifo.disable_channel = channel_gk20a_disable;
2254         gops->fifo.alloc_inst = channel_gk20a_alloc_inst;
2255         gops->fifo.free_inst = channel_gk20a_free_inst;
2256         gops->fifo.setup_ramfc = channel_gk20a_setup_ramfc;
2257 }
2258
2259 long gk20a_channel_ioctl(struct file *filp,
2260         unsigned int cmd, unsigned long arg)
2261 {
2262         struct channel_gk20a *ch = filp->private_data;
2263         struct platform_device *dev = ch->g->dev;
2264         u8 buf[NVGPU_IOCTL_CHANNEL_MAX_ARG_SIZE];
2265         int err = 0;
2266
2267         gk20a_dbg_fn("start %d", _IOC_NR(cmd));
2268
2269         if ((_IOC_TYPE(cmd) != NVGPU_IOCTL_MAGIC) ||
2270                 (_IOC_NR(cmd) == 0) ||
2271                 (_IOC_NR(cmd) > NVGPU_IOCTL_CHANNEL_LAST) ||
2272                 (_IOC_SIZE(cmd) > NVGPU_IOCTL_CHANNEL_MAX_ARG_SIZE))
2273                 return -EINVAL;
2274
2275         if (_IOC_DIR(cmd) & _IOC_WRITE) {
2276                 if (copy_from_user(buf, (void __user *)arg, _IOC_SIZE(cmd)))
2277                         return -EFAULT;
2278         }
2279
2280         switch (cmd) {
2281         case NVGPU_IOCTL_CHANNEL_OPEN:
2282                 err = gk20a_channel_open_ioctl(ch->g,
2283                         (struct nvgpu_channel_open_args *)buf);
2284                 break;
2285         case NVGPU_IOCTL_CHANNEL_SET_NVMAP_FD:
2286                 break;
2287         case NVGPU_IOCTL_CHANNEL_ALLOC_OBJ_CTX:
2288                 err = gk20a_busy(dev);
2289                 if (err) {
2290                         dev_err(&dev->dev,
2291                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2292                                 __func__, cmd);
2293                         return err;
2294                 }
2295                 err = ch->g->ops.gr.alloc_obj_ctx(ch,
2296                                 (struct nvgpu_alloc_obj_ctx_args *)buf);
2297                 gk20a_idle(dev);
2298                 break;
2299         case NVGPU_IOCTL_CHANNEL_FREE_OBJ_CTX:
2300                 err = gk20a_busy(dev);
2301                 if (err) {
2302                         dev_err(&dev->dev,
2303                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2304                                 __func__, cmd);
2305                         return err;
2306                 }
2307                 err = ch->g->ops.gr.free_obj_ctx(ch,
2308                                 (struct nvgpu_free_obj_ctx_args *)buf);
2309                 gk20a_idle(dev);
2310                 break;
2311         case NVGPU_IOCTL_CHANNEL_ALLOC_GPFIFO:
2312                 err = gk20a_busy(dev);
2313                 if (err) {
2314                         dev_err(&dev->dev,
2315                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2316                                 __func__, cmd);
2317                         return err;
2318                 }
2319                 err = gk20a_alloc_channel_gpfifo(ch,
2320                                 (struct nvgpu_alloc_gpfifo_args *)buf);
2321                 gk20a_idle(dev);
2322                 break;
2323         case NVGPU_IOCTL_CHANNEL_SUBMIT_GPFIFO:
2324                 err = gk20a_ioctl_channel_submit_gpfifo(ch,
2325                                 (struct nvgpu_submit_gpfifo_args *)buf);
2326                 break;
2327         case NVGPU_IOCTL_CHANNEL_WAIT:
2328                 err = gk20a_busy(dev);
2329                 if (err) {
2330                         dev_err(&dev->dev,
2331                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2332                                 __func__, cmd);
2333                         return err;
2334                 }
2335                 err = gk20a_channel_wait(ch,
2336                                 (struct nvgpu_wait_args *)buf);
2337                 gk20a_idle(dev);
2338                 break;
2339         case NVGPU_IOCTL_CHANNEL_ZCULL_BIND:
2340                 err = gk20a_busy(dev);
2341                 if (err) {
2342                         dev_err(&dev->dev,
2343                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2344                                 __func__, cmd);
2345                         return err;
2346                 }
2347                 err = gk20a_channel_zcull_bind(ch,
2348                                 (struct nvgpu_zcull_bind_args *)buf);
2349                 gk20a_idle(dev);
2350                 break;
2351         case NVGPU_IOCTL_CHANNEL_SET_ERROR_NOTIFIER:
2352                 err = gk20a_busy(dev);
2353                 if (err) {
2354                         dev_err(&dev->dev,
2355                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2356                                 __func__, cmd);
2357                         return err;
2358                 }
2359                 err = gk20a_init_error_notifier(ch,
2360                                 (struct nvgpu_set_error_notifier *)buf);
2361                 gk20a_idle(dev);
2362                 break;
2363 #ifdef CONFIG_GK20A_CYCLE_STATS
2364         case NVGPU_IOCTL_CHANNEL_CYCLE_STATS:
2365                 err = gk20a_busy(dev);
2366                 if (err) {
2367                         dev_err(&dev->dev,
2368                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2369                                 __func__, cmd);
2370                         return err;
2371                 }
2372                 err = gk20a_channel_cycle_stats(ch,
2373                                 (struct nvgpu_cycle_stats_args *)buf);
2374                 gk20a_idle(dev);
2375                 break;
2376 #endif
2377         case NVGPU_IOCTL_CHANNEL_SET_TIMEOUT:
2378         {
2379                 u32 timeout =
2380                         (u32)((struct nvgpu_set_timeout_args *)buf)->timeout;
2381                 gk20a_dbg(gpu_dbg_gpu_dbg, "setting timeout (%d ms) for chid %d",
2382                            timeout, ch->hw_chid);
2383                 ch->timeout_ms_max = timeout;
2384                 break;
2385         }
2386         case NVGPU_IOCTL_CHANNEL_SET_TIMEOUT_EX:
2387         {
2388                 u32 timeout =
2389                         (u32)((struct nvgpu_set_timeout_args *)buf)->timeout;
2390                 bool timeout_debug_dump = !((u32)
2391                         ((struct nvgpu_set_timeout_ex_args *)buf)->flags &
2392                         (1 << NVGPU_TIMEOUT_FLAG_DISABLE_DUMP));
2393                 gk20a_dbg(gpu_dbg_gpu_dbg, "setting timeout (%d ms) for chid %d",
2394                            timeout, ch->hw_chid);
2395                 ch->timeout_ms_max = timeout;
2396                 ch->timeout_debug_dump = timeout_debug_dump;
2397                 break;
2398         }
2399         case NVGPU_IOCTL_CHANNEL_GET_TIMEDOUT:
2400                 ((struct nvgpu_get_param_args *)buf)->value =
2401                         ch->has_timedout;
2402                 break;
2403         case NVGPU_IOCTL_CHANNEL_SET_PRIORITY:
2404                 err = gk20a_busy(dev);
2405                 if (err) {
2406                         dev_err(&dev->dev,
2407                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2408                                 __func__, cmd);
2409                         return err;
2410                 }
2411                 gk20a_channel_set_priority(ch,
2412                         ((struct nvgpu_set_priority_args *)buf)->priority);
2413                 gk20a_idle(dev);
2414                 break;
2415         case NVGPU_IOCTL_CHANNEL_ENABLE:
2416                 err = gk20a_busy(dev);
2417                 if (err) {
2418                         dev_err(&dev->dev,
2419                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2420                                 __func__, cmd);
2421                         return err;
2422                 }
2423                 /* enable channel */
2424                 gk20a_writel(ch->g, ccsr_channel_r(ch->hw_chid),
2425                         gk20a_readl(ch->g, ccsr_channel_r(ch->hw_chid)) |
2426                         ccsr_channel_enable_set_true_f());
2427                 gk20a_idle(dev);
2428                 break;
2429         case NVGPU_IOCTL_CHANNEL_DISABLE:
2430                 err = gk20a_busy(dev);
2431                 if (err) {
2432                         dev_err(&dev->dev,
2433                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2434                                 __func__, cmd);
2435                         return err;
2436                 }
2437                 /* disable channel */
2438                 gk20a_writel(ch->g, ccsr_channel_r(ch->hw_chid),
2439                         gk20a_readl(ch->g, ccsr_channel_r(ch->hw_chid)) |
2440                         ccsr_channel_enable_clr_true_f());
2441                 gk20a_idle(dev);
2442                 break;
2443         case NVGPU_IOCTL_CHANNEL_PREEMPT:
2444                 err = gk20a_busy(dev);
2445                 if (err) {
2446                         dev_err(&dev->dev,
2447                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2448                                 __func__, cmd);
2449                         return err;
2450                 }
2451                 err = gk20a_fifo_preempt(ch->g, ch);
2452                 gk20a_idle(dev);
2453                 break;
2454         case NVGPU_IOCTL_CHANNEL_FORCE_RESET:
2455                 err = gk20a_busy(dev);
2456                 if (err) {
2457                         dev_err(&dev->dev,
2458                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2459                                 __func__, cmd);
2460                         return err;
2461                 }
2462                 err = gk20a_fifo_force_reset_ch(ch, true);
2463                 gk20a_idle(dev);
2464                 break;
2465         case NVGPU_IOCTL_CHANNEL_EVENTS_CTRL:
2466                 err = gk20a_channel_events_ctrl(ch,
2467                            (struct nvgpu_channel_events_ctrl_args *)buf);
2468                 break;
2469         default:
2470                 dev_dbg(&dev->dev, "unrecognized ioctl cmd: 0x%x", cmd);
2471                 err = -ENOTTY;
2472                 break;
2473         }
2474
2475         if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ))
2476                 err = copy_to_user((void __user *)arg, buf, _IOC_SIZE(cmd));
2477
2478         gk20a_dbg_fn("end");
2479
2480         return err;
2481 }