f554cf77c2fbeb4f7758303a9b5344f5014a7d36
[linux-3.10.git] / drivers / gpu / nvgpu / gk20a / channel_gk20a.c
1 /*
2  * GK20A Graphics channel
3  *
4  * Copyright (c) 2011-2014, NVIDIA CORPORATION.  All rights reserved.
5  *
6  * This program is free software; you can redistribute it and/or modify it
7  * under the terms and conditions of the GNU General Public License,
8  * version 2, as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope it will be useful, but WITHOUT
11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13  * more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
17  */
18
19 #include <linux/nvhost.h>
20 #include <linux/list.h>
21 #include <linux/delay.h>
22 #include <linux/highmem.h> /* need for nvmap.h*/
23 #include <trace/events/gk20a.h>
24 #include <linux/scatterlist.h>
25 #include <linux/file.h>
26 #include <linux/anon_inodes.h>
27 #include <linux/dma-buf.h>
28
29 #include "debug_gk20a.h"
30
31 #include "gk20a.h"
32 #include "dbg_gpu_gk20a.h"
33 #include "fence_gk20a.h"
34 #include "semaphore_gk20a.h"
35
36 #include "hw_ram_gk20a.h"
37 #include "hw_fifo_gk20a.h"
38 #include "hw_pbdma_gk20a.h"
39 #include "hw_ccsr_gk20a.h"
40 #include "hw_ltc_gk20a.h"
41
42 #define NVMAP_HANDLE_PARAM_SIZE 1
43
44 static struct channel_gk20a *acquire_unused_channel(struct fifo_gk20a *f);
45 static void release_used_channel(struct fifo_gk20a *f, struct channel_gk20a *c);
46
47 static void free_priv_cmdbuf(struct channel_gk20a *c,
48                              struct priv_cmd_entry *e);
49 static void recycle_priv_cmdbuf(struct channel_gk20a *c);
50
51 static int channel_gk20a_alloc_priv_cmdbuf(struct channel_gk20a *c);
52 static void channel_gk20a_free_priv_cmdbuf(struct channel_gk20a *c);
53
54 static int channel_gk20a_commit_userd(struct channel_gk20a *c);
55 static int channel_gk20a_setup_userd(struct channel_gk20a *c);
56
57 static void channel_gk20a_bind(struct channel_gk20a *ch_gk20a);
58
59 static int channel_gk20a_update_runlist(struct channel_gk20a *c,
60                                         bool add);
61 static void gk20a_free_error_notifiers(struct channel_gk20a *ch);
62
63 static struct channel_gk20a *acquire_unused_channel(struct fifo_gk20a *f)
64 {
65         struct channel_gk20a *ch = NULL;
66         int chid;
67
68         mutex_lock(&f->ch_inuse_mutex);
69         for (chid = 0; chid < f->num_channels; chid++) {
70                 if (!f->channel[chid].in_use) {
71                         f->channel[chid].in_use = true;
72                         ch = &f->channel[chid];
73                         break;
74                 }
75         }
76         mutex_unlock(&f->ch_inuse_mutex);
77
78         return ch;
79 }
80
81 static void release_used_channel(struct fifo_gk20a *f, struct channel_gk20a *c)
82 {
83         mutex_lock(&f->ch_inuse_mutex);
84         f->channel[c->hw_chid].in_use = false;
85         mutex_unlock(&f->ch_inuse_mutex);
86 }
87
88 int channel_gk20a_commit_va(struct channel_gk20a *c)
89 {
90         u64 addr;
91         u32 addr_lo;
92         u32 addr_hi;
93         void *inst_ptr;
94
95         gk20a_dbg_fn("");
96
97         inst_ptr = c->inst_block.cpuva;
98         if (!inst_ptr)
99                 return -ENOMEM;
100
101         addr = gk20a_mm_iova_addr(c->vm->pdes.sgt->sgl);
102         addr_lo = u64_lo32(addr >> 12);
103         addr_hi = u64_hi32(addr);
104
105         gk20a_dbg_info("pde pa=0x%llx addr_lo=0x%x addr_hi=0x%x",
106                    (u64)addr, addr_lo, addr_hi);
107
108         gk20a_mem_wr32(inst_ptr, ram_in_page_dir_base_lo_w(),
109                 ram_in_page_dir_base_target_vid_mem_f() |
110                 ram_in_page_dir_base_vol_true_f() |
111                 ram_in_page_dir_base_lo_f(addr_lo));
112
113         gk20a_mem_wr32(inst_ptr, ram_in_page_dir_base_hi_w(),
114                 ram_in_page_dir_base_hi_f(addr_hi));
115
116         gk20a_mem_wr32(inst_ptr, ram_in_adr_limit_lo_w(),
117                  u64_lo32(c->vm->va_limit) | 0xFFF);
118
119         gk20a_mem_wr32(inst_ptr, ram_in_adr_limit_hi_w(),
120                 ram_in_adr_limit_hi_f(u64_hi32(c->vm->va_limit)));
121
122         if (c->g->ops.mm.set_big_page_size)
123                 c->g->ops.mm.set_big_page_size(c->g, inst_ptr,
124                                                c->vm->gmmu_page_sizes[gmmu_page_size_big]);
125
126         return 0;
127 }
128
129 static int channel_gk20a_commit_userd(struct channel_gk20a *c)
130 {
131         u32 addr_lo;
132         u32 addr_hi;
133         void *inst_ptr;
134
135         gk20a_dbg_fn("");
136
137         inst_ptr = c->inst_block.cpuva;
138         if (!inst_ptr)
139                 return -ENOMEM;
140
141         addr_lo = u64_lo32(c->userd_iova >> ram_userd_base_shift_v());
142         addr_hi = u64_hi32(c->userd_iova);
143
144         gk20a_dbg_info("channel %d : set ramfc userd 0x%16llx",
145                 c->hw_chid, (u64)c->userd_iova);
146
147         gk20a_mem_wr32(inst_ptr, ram_in_ramfc_w() + ram_fc_userd_w(),
148                  pbdma_userd_target_vid_mem_f() |
149                  pbdma_userd_addr_f(addr_lo));
150
151         gk20a_mem_wr32(inst_ptr, ram_in_ramfc_w() + ram_fc_userd_hi_w(),
152                  pbdma_userd_target_vid_mem_f() |
153                  pbdma_userd_hi_addr_f(addr_hi));
154
155         return 0;
156 }
157
158 static int channel_gk20a_set_schedule_params(struct channel_gk20a *c,
159                                 u32 timeslice_timeout)
160 {
161         void *inst_ptr;
162         int shift = 3;
163         int value = timeslice_timeout;
164
165         inst_ptr = c->inst_block.cpuva;
166         if (!inst_ptr)
167                 return -ENOMEM;
168
169         /* disable channel */
170         c->g->ops.fifo.disable_channel(c);
171
172         /* preempt the channel */
173         WARN_ON(c->g->ops.fifo.preempt_channel(c->g, c->hw_chid));
174
175         /* value field is 8 bits long */
176         while (value >= 1 << 8) {
177                 value >>= 1;
178                 shift++;
179         }
180
181         /* time slice register is only 18bits long */
182         if ((value << shift) >= 1<<19) {
183                 pr_err("Requested timeslice value is clamped to 18 bits\n");
184                 value = 255;
185                 shift = 10;
186         }
187
188         /* set new timeslice */
189         gk20a_mem_wr32(inst_ptr, ram_fc_runlist_timeslice_w(),
190                 value | (shift << 12) |
191                 fifo_runlist_timeslice_enable_true_f());
192
193         /* enable channel */
194         gk20a_writel(c->g, ccsr_channel_r(c->hw_chid),
195                 gk20a_readl(c->g, ccsr_channel_r(c->hw_chid)) |
196                 ccsr_channel_enable_set_true_f());
197
198         return 0;
199 }
200
201 int channel_gk20a_setup_ramfc(struct channel_gk20a *c,
202                         u64 gpfifo_base, u32 gpfifo_entries)
203 {
204         void *inst_ptr;
205
206         gk20a_dbg_fn("");
207
208         inst_ptr = c->inst_block.cpuva;
209         if (!inst_ptr)
210                 return -ENOMEM;
211
212         memset(inst_ptr, 0, ram_fc_size_val_v());
213
214         gk20a_mem_wr32(inst_ptr, ram_fc_gp_base_w(),
215                 pbdma_gp_base_offset_f(
216                 u64_lo32(gpfifo_base >> pbdma_gp_base_rsvd_s())));
217
218         gk20a_mem_wr32(inst_ptr, ram_fc_gp_base_hi_w(),
219                 pbdma_gp_base_hi_offset_f(u64_hi32(gpfifo_base)) |
220                 pbdma_gp_base_hi_limit2_f(ilog2(gpfifo_entries)));
221
222         gk20a_mem_wr32(inst_ptr, ram_fc_signature_w(),
223                  pbdma_signature_hw_valid_f() | pbdma_signature_sw_zero_f());
224
225         gk20a_mem_wr32(inst_ptr, ram_fc_formats_w(),
226                 pbdma_formats_gp_fermi0_f() |
227                 pbdma_formats_pb_fermi1_f() |
228                 pbdma_formats_mp_fermi0_f());
229
230         gk20a_mem_wr32(inst_ptr, ram_fc_pb_header_w(),
231                 pbdma_pb_header_priv_user_f() |
232                 pbdma_pb_header_method_zero_f() |
233                 pbdma_pb_header_subchannel_zero_f() |
234                 pbdma_pb_header_level_main_f() |
235                 pbdma_pb_header_first_true_f() |
236                 pbdma_pb_header_type_inc_f());
237
238         gk20a_mem_wr32(inst_ptr, ram_fc_subdevice_w(),
239                 pbdma_subdevice_id_f(1) |
240                 pbdma_subdevice_status_active_f() |
241                 pbdma_subdevice_channel_dma_enable_f());
242
243         gk20a_mem_wr32(inst_ptr, ram_fc_target_w(), pbdma_target_engine_sw_f());
244
245         gk20a_mem_wr32(inst_ptr, ram_fc_acquire_w(),
246                 pbdma_acquire_retry_man_2_f() |
247                 pbdma_acquire_retry_exp_2_f() |
248                 pbdma_acquire_timeout_exp_max_f() |
249                 pbdma_acquire_timeout_man_max_f() |
250                 pbdma_acquire_timeout_en_disable_f());
251
252         gk20a_mem_wr32(inst_ptr, ram_fc_runlist_timeslice_w(),
253                 fifo_runlist_timeslice_timeout_128_f() |
254                 fifo_runlist_timeslice_timescale_3_f() |
255                 fifo_runlist_timeslice_enable_true_f());
256
257         gk20a_mem_wr32(inst_ptr, ram_fc_pb_timeslice_w(),
258                 fifo_pb_timeslice_timeout_16_f() |
259                 fifo_pb_timeslice_timescale_0_f() |
260                 fifo_pb_timeslice_enable_true_f());
261
262         gk20a_mem_wr32(inst_ptr, ram_fc_chid_w(), ram_fc_chid_id_f(c->hw_chid));
263
264         return channel_gk20a_commit_userd(c);
265 }
266
267 static int channel_gk20a_setup_userd(struct channel_gk20a *c)
268 {
269         BUG_ON(!c->userd_cpu_va);
270
271         gk20a_dbg_fn("");
272
273         gk20a_mem_wr32(c->userd_cpu_va, ram_userd_put_w(), 0);
274         gk20a_mem_wr32(c->userd_cpu_va, ram_userd_get_w(), 0);
275         gk20a_mem_wr32(c->userd_cpu_va, ram_userd_ref_w(), 0);
276         gk20a_mem_wr32(c->userd_cpu_va, ram_userd_put_hi_w(), 0);
277         gk20a_mem_wr32(c->userd_cpu_va, ram_userd_ref_threshold_w(), 0);
278         gk20a_mem_wr32(c->userd_cpu_va, ram_userd_gp_top_level_get_w(), 0);
279         gk20a_mem_wr32(c->userd_cpu_va, ram_userd_gp_top_level_get_hi_w(), 0);
280         gk20a_mem_wr32(c->userd_cpu_va, ram_userd_get_hi_w(), 0);
281         gk20a_mem_wr32(c->userd_cpu_va, ram_userd_gp_get_w(), 0);
282         gk20a_mem_wr32(c->userd_cpu_va, ram_userd_gp_put_w(), 0);
283
284         return 0;
285 }
286
287 static void channel_gk20a_bind(struct channel_gk20a *ch_gk20a)
288 {
289         struct gk20a *g = ch_gk20a->g;
290         struct fifo_gk20a *f = &g->fifo;
291         struct fifo_engine_info_gk20a *engine_info =
292                 f->engine_info + ENGINE_GR_GK20A;
293
294         u32 inst_ptr = ch_gk20a->inst_block.cpu_pa
295                 >> ram_in_base_shift_v();
296
297         gk20a_dbg_info("bind channel %d inst ptr 0x%08x",
298                 ch_gk20a->hw_chid, inst_ptr);
299
300         ch_gk20a->bound = true;
301
302         gk20a_writel(g, ccsr_channel_r(ch_gk20a->hw_chid),
303                 (gk20a_readl(g, ccsr_channel_r(ch_gk20a->hw_chid)) &
304                  ~ccsr_channel_runlist_f(~0)) |
305                  ccsr_channel_runlist_f(engine_info->runlist_id));
306
307         gk20a_writel(g, ccsr_channel_inst_r(ch_gk20a->hw_chid),
308                 ccsr_channel_inst_ptr_f(inst_ptr) |
309                 ccsr_channel_inst_target_vid_mem_f() |
310                 ccsr_channel_inst_bind_true_f());
311
312         gk20a_writel(g, ccsr_channel_r(ch_gk20a->hw_chid),
313                 (gk20a_readl(g, ccsr_channel_r(ch_gk20a->hw_chid)) &
314                  ~ccsr_channel_enable_set_f(~0)) |
315                  ccsr_channel_enable_set_true_f());
316 }
317
318 void channel_gk20a_unbind(struct channel_gk20a *ch_gk20a)
319 {
320         struct gk20a *g = ch_gk20a->g;
321
322         gk20a_dbg_fn("");
323
324         if (ch_gk20a->bound)
325                 gk20a_writel(g, ccsr_channel_inst_r(ch_gk20a->hw_chid),
326                         ccsr_channel_inst_ptr_f(0) |
327                         ccsr_channel_inst_bind_false_f());
328
329         ch_gk20a->bound = false;
330
331         /*
332          * if we are agrressive then we can destroy the syncpt
333          * resource at this point
334          * if not, then it will be destroyed at channel_free()
335          */
336         if (ch_gk20a->sync && ch_gk20a->sync->aggressive_destroy) {
337                 ch_gk20a->sync->destroy(ch_gk20a->sync);
338                 ch_gk20a->sync = NULL;
339         }
340 }
341
342 int channel_gk20a_alloc_inst(struct gk20a *g, struct channel_gk20a *ch)
343 {
344         struct device *d = dev_from_gk20a(g);
345         int err = 0;
346         dma_addr_t iova;
347
348         gk20a_dbg_fn("");
349
350         ch->inst_block.size = ram_in_alloc_size_v();
351         ch->inst_block.cpuva = dma_alloc_coherent(d,
352                                         ch->inst_block.size,
353                                         &iova,
354                                         GFP_KERNEL);
355         if (!ch->inst_block.cpuva) {
356                 gk20a_err(d, "%s: memory allocation failed\n", __func__);
357                 err = -ENOMEM;
358                 goto clean_up;
359         }
360
361         ch->inst_block.iova = iova;
362         ch->inst_block.cpu_pa = gk20a_get_phys_from_iova(d,
363                                                         ch->inst_block.iova);
364         if (!ch->inst_block.cpu_pa) {
365                 gk20a_err(d, "%s: failed to get physical address\n", __func__);
366                 err = -ENOMEM;
367                 goto clean_up;
368         }
369
370         gk20a_dbg_info("channel %d inst block physical addr: 0x%16llx",
371                 ch->hw_chid, (u64)ch->inst_block.cpu_pa);
372
373         gk20a_dbg_fn("done");
374         return 0;
375
376 clean_up:
377         gk20a_err(d, "fail");
378         g->ops.fifo.free_inst(g, ch);
379         return err;
380 }
381
382 void channel_gk20a_free_inst(struct gk20a *g, struct channel_gk20a *ch)
383 {
384         struct device *d = dev_from_gk20a(g);
385
386         if (ch->inst_block.cpuva)
387                 dma_free_coherent(d, ch->inst_block.size,
388                                 ch->inst_block.cpuva, ch->inst_block.iova);
389         ch->inst_block.cpuva = NULL;
390         ch->inst_block.iova = 0;
391         memset(&ch->inst_block, 0, sizeof(struct inst_desc));
392 }
393
394 static int channel_gk20a_update_runlist(struct channel_gk20a *c, bool add)
395 {
396         return c->g->ops.fifo.update_runlist(c->g, 0, c->hw_chid, add, true);
397 }
398
399 void channel_gk20a_enable(struct channel_gk20a *ch)
400 {
401         /* enable channel */
402         gk20a_writel(ch->g, ccsr_channel_r(ch->hw_chid),
403                 gk20a_readl(ch->g, ccsr_channel_r(ch->hw_chid)) |
404                 ccsr_channel_enable_set_true_f());
405 }
406
407 void channel_gk20a_disable(struct channel_gk20a *ch)
408 {
409         /* disable channel */
410         gk20a_writel(ch->g, ccsr_channel_r(ch->hw_chid),
411                 gk20a_readl(ch->g,
412                         ccsr_channel_r(ch->hw_chid)) |
413                         ccsr_channel_enable_clr_true_f());
414 }
415
416 void gk20a_channel_abort(struct channel_gk20a *ch)
417 {
418         struct channel_gk20a_job *job, *n;
419         bool released_job_semaphore = false;
420
421         /* ensure no fences are pending */
422         if (ch->sync)
423                 ch->sync->set_min_eq_max(ch->sync);
424
425         /* release all job semaphores (applies only to jobs that use
426            semaphore synchronization) */
427         mutex_lock(&ch->jobs_lock);
428         list_for_each_entry_safe(job, n, &ch->jobs, list) {
429                 if (job->post_fence->semaphore) {
430                         gk20a_semaphore_release(job->post_fence->semaphore);
431                         released_job_semaphore = true;
432                 }
433         }
434         mutex_unlock(&ch->jobs_lock);
435
436         ch->g->ops.fifo.disable_channel(ch);
437
438         if (released_job_semaphore) {
439                 wake_up_interruptible_all(&ch->semaphore_wq);
440                 gk20a_channel_update(ch, 0);
441         }
442 }
443
444 int gk20a_wait_channel_idle(struct channel_gk20a *ch)
445 {
446         bool channel_idle = false;
447         unsigned long end_jiffies = jiffies +
448                 msecs_to_jiffies(gk20a_get_gr_idle_timeout(ch->g));
449
450         do {
451                 mutex_lock(&ch->jobs_lock);
452                 channel_idle = list_empty(&ch->jobs);
453                 mutex_unlock(&ch->jobs_lock);
454                 if (channel_idle)
455                         break;
456
457                 usleep_range(1000, 3000);
458         } while (time_before(jiffies, end_jiffies)
459                         || !tegra_platform_is_silicon());
460
461         if (!channel_idle) {
462                 gk20a_err(dev_from_gk20a(ch->g), "jobs not freed for channel %d\n",
463                                 ch->hw_chid);
464                 return -EBUSY;
465         }
466
467         return 0;
468 }
469
470 void gk20a_disable_channel(struct channel_gk20a *ch,
471                            bool finish,
472                            unsigned long finish_timeout)
473 {
474         if (finish) {
475                 int err = gk20a_channel_finish(ch, finish_timeout);
476                 WARN_ON(err);
477         }
478
479         /* disable the channel from hw and increment syncpoints */
480         gk20a_channel_abort(ch);
481
482         gk20a_wait_channel_idle(ch);
483
484         /* preempt the channel */
485         ch->g->ops.fifo.preempt_channel(ch->g, ch->hw_chid);
486
487         /* remove channel from runlist */
488         channel_gk20a_update_runlist(ch, false);
489 }
490
491 #if defined(CONFIG_GK20A_CYCLE_STATS)
492
493 static void gk20a_free_cycle_stats_buffer(struct channel_gk20a *ch)
494 {
495         /* disable existing cyclestats buffer */
496         mutex_lock(&ch->cyclestate.cyclestate_buffer_mutex);
497         if (ch->cyclestate.cyclestate_buffer_handler) {
498                 dma_buf_vunmap(ch->cyclestate.cyclestate_buffer_handler,
499                                 ch->cyclestate.cyclestate_buffer);
500                 dma_buf_put(ch->cyclestate.cyclestate_buffer_handler);
501                 ch->cyclestate.cyclestate_buffer_handler = NULL;
502                 ch->cyclestate.cyclestate_buffer = NULL;
503                 ch->cyclestate.cyclestate_buffer_size = 0;
504         }
505         mutex_unlock(&ch->cyclestate.cyclestate_buffer_mutex);
506 }
507
508 static int gk20a_channel_cycle_stats(struct channel_gk20a *ch,
509                        struct nvgpu_cycle_stats_args *args)
510 {
511         struct dma_buf *dmabuf;
512         void *virtual_address;
513
514         if (args->dmabuf_fd && !ch->cyclestate.cyclestate_buffer_handler) {
515
516                 /* set up new cyclestats buffer */
517                 dmabuf = dma_buf_get(args->dmabuf_fd);
518                 if (IS_ERR(dmabuf))
519                         return PTR_ERR(dmabuf);
520                 virtual_address = dma_buf_vmap(dmabuf);
521                 if (!virtual_address)
522                         return -ENOMEM;
523
524                 ch->cyclestate.cyclestate_buffer_handler = dmabuf;
525                 ch->cyclestate.cyclestate_buffer = virtual_address;
526                 ch->cyclestate.cyclestate_buffer_size = dmabuf->size;
527                 return 0;
528
529         } else if (!args->dmabuf_fd &&
530                         ch->cyclestate.cyclestate_buffer_handler) {
531                 gk20a_free_cycle_stats_buffer(ch);
532                 return 0;
533
534         } else if (!args->dmabuf_fd &&
535                         !ch->cyclestate.cyclestate_buffer_handler) {
536                 /* no requst from GL */
537                 return 0;
538
539         } else {
540                 pr_err("channel already has cyclestats buffer\n");
541                 return -EINVAL;
542         }
543 }
544 #endif
545
546 static int gk20a_init_error_notifier(struct channel_gk20a *ch,
547                 struct nvgpu_set_error_notifier *args) {
548         void *va;
549
550         struct dma_buf *dmabuf;
551
552         if (!args->mem) {
553                 pr_err("gk20a_init_error_notifier: invalid memory handle\n");
554                 return -EINVAL;
555         }
556
557         dmabuf = dma_buf_get(args->mem);
558
559         if (ch->error_notifier_ref)
560                 gk20a_free_error_notifiers(ch);
561
562         if (IS_ERR(dmabuf)) {
563                 pr_err("Invalid handle: %d\n", args->mem);
564                 return -EINVAL;
565         }
566         /* map handle */
567         va = dma_buf_vmap(dmabuf);
568         if (!va) {
569                 dma_buf_put(dmabuf);
570                 pr_err("Cannot map notifier handle\n");
571                 return -ENOMEM;
572         }
573
574         /* set channel notifiers pointer */
575         ch->error_notifier_ref = dmabuf;
576         ch->error_notifier = va + args->offset;
577         ch->error_notifier_va = va;
578         memset(ch->error_notifier, 0, sizeof(struct nvgpu_notification));
579         return 0;
580 }
581
582 void gk20a_set_error_notifier(struct channel_gk20a *ch, __u32 error)
583 {
584         if (ch->error_notifier_ref) {
585                 struct timespec time_data;
586                 u64 nsec;
587                 getnstimeofday(&time_data);
588                 nsec = ((u64)time_data.tv_sec) * 1000000000u +
589                                 (u64)time_data.tv_nsec;
590                 ch->error_notifier->time_stamp.nanoseconds[0] =
591                                 (u32)nsec;
592                 ch->error_notifier->time_stamp.nanoseconds[1] =
593                                 (u32)(nsec >> 32);
594                 ch->error_notifier->info32 = error;
595                 ch->error_notifier->status = 0xffff;
596                 gk20a_err(dev_from_gk20a(ch->g),
597                     "error notifier set to %d for ch %d\n", error, ch->hw_chid);
598         }
599 }
600
601 static void gk20a_free_error_notifiers(struct channel_gk20a *ch)
602 {
603         if (ch->error_notifier_ref) {
604                 dma_buf_vunmap(ch->error_notifier_ref, ch->error_notifier_va);
605                 dma_buf_put(ch->error_notifier_ref);
606                 ch->error_notifier_ref = 0;
607                 ch->error_notifier = 0;
608                 ch->error_notifier_va = 0;
609         }
610 }
611
612 void gk20a_free_channel(struct channel_gk20a *ch, bool finish)
613 {
614         struct gk20a *g = ch->g;
615         struct device *d = dev_from_gk20a(g);
616         struct fifo_gk20a *f = &g->fifo;
617         struct gr_gk20a *gr = &g->gr;
618         struct vm_gk20a *ch_vm = ch->vm;
619         unsigned long timeout = gk20a_get_gr_idle_timeout(g);
620         struct dbg_session_gk20a *dbg_s;
621
622         gk20a_dbg_fn("");
623
624         /* if engine reset was deferred, perform it now */
625         mutex_lock(&f->deferred_reset_mutex);
626         if (g->fifo.deferred_reset_pending) {
627                 gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, "engine reset was"
628                            " deferred, running now");
629                 fifo_gk20a_finish_mmu_fault_handling(g, g->fifo.mmu_fault_engines);
630                 g->fifo.mmu_fault_engines = 0;
631                 g->fifo.deferred_reset_pending = false;
632         }
633         mutex_unlock(&f->deferred_reset_mutex);
634
635         if (!ch->bound)
636                 return;
637
638         if (!gk20a_channel_as_bound(ch))
639                 goto unbind;
640
641         gk20a_dbg_info("freeing bound channel context, timeout=%ld",
642                         timeout);
643
644         gk20a_disable_channel(ch, finish && !ch->has_timedout, timeout);
645
646         gk20a_free_error_notifiers(ch);
647
648         /* release channel ctx */
649         g->ops.gr.free_channel_ctx(ch);
650
651         gk20a_gr_flush_channel_tlb(gr);
652
653         memset(&ch->ramfc, 0, sizeof(struct mem_desc_sub));
654
655         /* free gpfifo */
656         if (ch->gpfifo.gpu_va)
657                 gk20a_gmmu_unmap(ch_vm, ch->gpfifo.gpu_va,
658                         ch->gpfifo.size, gk20a_mem_flag_none);
659         if (ch->gpfifo.cpu_va)
660                 dma_free_coherent(d, ch->gpfifo.size,
661                         ch->gpfifo.cpu_va, ch->gpfifo.iova);
662         ch->gpfifo.cpu_va = NULL;
663         ch->gpfifo.iova = 0;
664
665         memset(&ch->gpfifo, 0, sizeof(struct gpfifo_desc));
666
667 #if defined(CONFIG_GK20A_CYCLE_STATS)
668         gk20a_free_cycle_stats_buffer(ch);
669 #endif
670
671         channel_gk20a_free_priv_cmdbuf(ch);
672
673         /* sync must be destroyed before releasing channel vm */
674         if (ch->sync) {
675                 ch->sync->destroy(ch->sync);
676                 ch->sync = NULL;
677         }
678
679         /* release channel binding to the as_share */
680         if (ch_vm->as_share)
681                 gk20a_as_release_share(ch_vm->as_share);
682         else
683                 gk20a_vm_put(ch_vm);
684
685         ch->update_fn = NULL;
686         ch->update_fn_data = NULL;
687
688 unbind:
689         if (gk20a_is_channel_marked_as_tsg(ch))
690                 gk20a_tsg_unbind_channel(ch);
691
692         g->ops.fifo.unbind_channel(ch);
693         g->ops.fifo.free_inst(g, ch);
694
695         ch->vpr = false;
696         ch->vm = NULL;
697
698         mutex_lock(&ch->submit_lock);
699         gk20a_fence_put(ch->last_submit.pre_fence);
700         gk20a_fence_put(ch->last_submit.post_fence);
701         ch->last_submit.pre_fence = NULL;
702         ch->last_submit.post_fence = NULL;
703         mutex_unlock(&ch->submit_lock);
704         WARN_ON(ch->sync);
705
706         /* unlink all debug sessions */
707         mutex_lock(&ch->dbg_s_lock);
708
709         list_for_each_entry(dbg_s, &ch->dbg_s_list, dbg_s_list_node) {
710                 dbg_s->ch = NULL;
711                 list_del_init(&dbg_s->dbg_s_list_node);
712         }
713
714         mutex_unlock(&ch->dbg_s_lock);
715
716         /* ALWAYS last */
717         release_used_channel(f, ch);
718 }
719
720 int gk20a_channel_release(struct inode *inode, struct file *filp)
721 {
722         struct channel_gk20a *ch = (struct channel_gk20a *)filp->private_data;
723         struct gk20a *g = ch ? ch->g : NULL;
724         int err;
725
726         if (!ch)
727                 return 0;
728
729         trace_gk20a_channel_release(dev_name(&g->dev->dev));
730
731         err = gk20a_busy(ch->g->dev);
732         if (err) {
733                 gk20a_err(dev_from_gk20a(g), "failed to release channel %d",
734                         ch->hw_chid);
735                 return err;
736         }
737         gk20a_free_channel(ch, true);
738         gk20a_idle(ch->g->dev);
739
740         filp->private_data = NULL;
741         return 0;
742 }
743
744 static void gk20a_channel_update_runcb_fn(struct work_struct *work)
745 {
746         struct channel_gk20a *ch =
747                 container_of(work, struct channel_gk20a, update_fn_work);
748         ch->update_fn(ch, ch->update_fn_data);
749 }
750
751 struct channel_gk20a *gk20a_open_new_channel_with_cb(struct gk20a *g,
752                 void (*update_fn)(struct channel_gk20a *, void *),
753                 void *update_fn_data)
754 {
755         struct channel_gk20a *ch = gk20a_open_new_channel(g);
756
757         if (ch) {
758                 ch->update_fn = update_fn;
759                 ch->update_fn_data = update_fn_data;
760         }
761
762         return ch;
763 }
764
765 struct channel_gk20a *gk20a_open_new_channel(struct gk20a *g)
766 {
767         struct fifo_gk20a *f = &g->fifo;
768         struct channel_gk20a *ch;
769
770         ch = acquire_unused_channel(f);
771         if (ch == NULL) {
772                 /* TBD: we want to make this virtualizable */
773                 gk20a_err(dev_from_gk20a(g), "out of hw chids");
774                 return 0;
775         }
776
777         ch->g = g;
778
779         if (g->ops.fifo.alloc_inst(g, ch)) {
780                 ch->in_use = false;
781                 gk20a_err(dev_from_gk20a(g),
782                            "failed to open gk20a channel, out of inst mem");
783
784                 return 0;
785         }
786         g->ops.fifo.bind_channel(ch);
787         ch->pid = current->pid;
788
789         /* By default, channel is regular (non-TSG) channel */
790         ch->tsgid = NVGPU_INVALID_TSG_ID;
791
792         /* reset timeout counter and update timestamp */
793         ch->timeout_accumulated_ms = 0;
794         ch->timeout_gpfifo_get = 0;
795         /* set gr host default timeout */
796         ch->timeout_ms_max = gk20a_get_gr_idle_timeout(g);
797         ch->timeout_debug_dump = true;
798         ch->has_timedout = false;
799         ch->obj_class = 0;
800
801         /* The channel is *not* runnable at this point. It still needs to have
802          * an address space bound and allocate a gpfifo and grctx. */
803
804         init_waitqueue_head(&ch->notifier_wq);
805         init_waitqueue_head(&ch->semaphore_wq);
806         init_waitqueue_head(&ch->submit_wq);
807
808         mutex_init(&ch->poll_events.lock);
809         ch->poll_events.events_enabled = false;
810         ch->poll_events.num_pending_events = 0;
811
812         ch->update_fn = NULL;
813         ch->update_fn_data = NULL;
814
815         INIT_WORK(&ch->update_fn_work, gk20a_channel_update_runcb_fn);
816
817         return ch;
818 }
819
820 static int __gk20a_channel_open(struct gk20a *g, struct file *filp)
821 {
822         int err;
823         struct channel_gk20a *ch;
824
825         trace_gk20a_channel_open(dev_name(&g->dev->dev));
826
827         err = gk20a_busy(g->dev);
828         if (err) {
829                 gk20a_err(dev_from_gk20a(g), "failed to power on, %d", err);
830                 return err;
831         }
832         ch = gk20a_open_new_channel(g);
833         gk20a_idle(g->dev);
834         if (!ch) {
835                 gk20a_err(dev_from_gk20a(g),
836                         "failed to get f");
837                 return -ENOMEM;
838         }
839
840         filp->private_data = ch;
841         return 0;
842 }
843
844 int gk20a_channel_open(struct inode *inode, struct file *filp)
845 {
846         struct gk20a *g = container_of(inode->i_cdev,
847                         struct gk20a, channel.cdev);
848         int ret;
849
850         gk20a_dbg_fn("start");
851         ret = __gk20a_channel_open(g, filp);
852
853         gk20a_dbg_fn("end");
854         return ret;
855 }
856
857 /* allocate private cmd buffer.
858    used for inserting commands before/after user submitted buffers. */
859 static int channel_gk20a_alloc_priv_cmdbuf(struct channel_gk20a *c)
860 {
861         struct device *d = dev_from_gk20a(c->g);
862         struct vm_gk20a *ch_vm = c->vm;
863         struct priv_cmd_queue *q = &c->priv_cmd_q;
864         struct priv_cmd_entry *e;
865         u32 i = 0, size;
866         int err = 0;
867         struct sg_table *sgt;
868         dma_addr_t iova;
869
870         /* Kernel can insert gpfifos before and after user gpfifos.
871            Before user gpfifos, kernel inserts fence_wait, which takes
872            syncpoint_a (2 dwords) + syncpoint_b (2 dwords) = 4 dwords.
873            After user gpfifos, kernel inserts fence_get, which takes
874            wfi (2 dwords) + syncpoint_a (2 dwords) + syncpoint_b (2 dwords)
875            = 6 dwords.
876            Worse case if kernel adds both of them for every user gpfifo,
877            max size of priv_cmdbuf is :
878            (gpfifo entry number * (2 / 3) * (4 + 6) * 4 bytes */
879         size = roundup_pow_of_two(
880                 c->gpfifo.entry_num * 2 * 10 * sizeof(u32) / 3);
881
882         q->mem.base_cpuva = dma_alloc_coherent(d, size,
883                                         &iova,
884                                         GFP_KERNEL);
885         if (!q->mem.base_cpuva) {
886                 gk20a_err(d, "%s: memory allocation failed\n", __func__);
887                 err = -ENOMEM;
888                 goto clean_up;
889         }
890
891         q->mem.base_iova = iova;
892         q->mem.size = size;
893
894         err = gk20a_get_sgtable(d, &sgt,
895                         q->mem.base_cpuva, q->mem.base_iova, size);
896         if (err) {
897                 gk20a_err(d, "%s: failed to create sg table\n", __func__);
898                 goto clean_up;
899         }
900
901         memset(q->mem.base_cpuva, 0, size);
902
903         q->base_gpuva = gk20a_gmmu_map(ch_vm, &sgt,
904                                         size,
905                                         0, /* flags */
906                                         gk20a_mem_flag_none);
907         if (!q->base_gpuva) {
908                 gk20a_err(d, "ch %d : failed to map gpu va"
909                            "for priv cmd buffer", c->hw_chid);
910                 err = -ENOMEM;
911                 goto clean_up_sgt;
912         }
913
914         q->size = q->mem.size / sizeof (u32);
915
916         INIT_LIST_HEAD(&q->head);
917         INIT_LIST_HEAD(&q->free);
918
919         /* pre-alloc 25% of priv cmdbuf entries and put them on free list */
920         for (i = 0; i < q->size / 4; i++) {
921                 e = kzalloc(sizeof(struct priv_cmd_entry), GFP_KERNEL);
922                 if (!e) {
923                         gk20a_err(d, "ch %d: fail to pre-alloc cmd entry",
924                                 c->hw_chid);
925                         err = -ENOMEM;
926                         goto clean_up_sgt;
927                 }
928                 e->pre_alloc = true;
929                 list_add(&e->list, &q->free);
930         }
931
932         gk20a_free_sgtable(&sgt);
933
934         return 0;
935
936 clean_up_sgt:
937         gk20a_free_sgtable(&sgt);
938 clean_up:
939         channel_gk20a_free_priv_cmdbuf(c);
940         return err;
941 }
942
943 static void channel_gk20a_free_priv_cmdbuf(struct channel_gk20a *c)
944 {
945         struct device *d = dev_from_gk20a(c->g);
946         struct vm_gk20a *ch_vm = c->vm;
947         struct priv_cmd_queue *q = &c->priv_cmd_q;
948         struct priv_cmd_entry *e;
949         struct list_head *pos, *tmp, *head;
950
951         if (q->size == 0)
952                 return;
953
954         if (q->base_gpuva)
955                 gk20a_gmmu_unmap(ch_vm, q->base_gpuva,
956                                 q->mem.size, gk20a_mem_flag_none);
957         if (q->mem.base_cpuva)
958                 dma_free_coherent(d, q->mem.size,
959                         q->mem.base_cpuva, q->mem.base_iova);
960         q->mem.base_cpuva = NULL;
961         q->mem.base_iova = 0;
962
963         /* free used list */
964         head = &q->head;
965         list_for_each_safe(pos, tmp, head) {
966                 e = container_of(pos, struct priv_cmd_entry, list);
967                 free_priv_cmdbuf(c, e);
968         }
969
970         /* free free list */
971         head = &q->free;
972         list_for_each_safe(pos, tmp, head) {
973                 e = container_of(pos, struct priv_cmd_entry, list);
974                 e->pre_alloc = false;
975                 free_priv_cmdbuf(c, e);
976         }
977
978         memset(q, 0, sizeof(struct priv_cmd_queue));
979 }
980
981 /* allocate a cmd buffer with given size. size is number of u32 entries */
982 int gk20a_channel_alloc_priv_cmdbuf(struct channel_gk20a *c, u32 orig_size,
983                              struct priv_cmd_entry **entry)
984 {
985         struct priv_cmd_queue *q = &c->priv_cmd_q;
986         struct priv_cmd_entry *e;
987         struct list_head *node;
988         u32 free_count;
989         u32 size = orig_size;
990         bool no_retry = false;
991
992         gk20a_dbg_fn("size %d", orig_size);
993
994         *entry = NULL;
995
996         /* if free space in the end is less than requested, increase the size
997          * to make the real allocated space start from beginning. */
998         if (q->put + size > q->size)
999                 size = orig_size + (q->size - q->put);
1000
1001         gk20a_dbg_info("ch %d: priv cmd queue get:put %d:%d",
1002                         c->hw_chid, q->get, q->put);
1003
1004 TRY_AGAIN:
1005         free_count = (q->size - (q->put - q->get) - 1) % q->size;
1006
1007         if (size > free_count) {
1008                 if (!no_retry) {
1009                         recycle_priv_cmdbuf(c);
1010                         no_retry = true;
1011                         goto TRY_AGAIN;
1012                 } else
1013                         return -EAGAIN;
1014         }
1015
1016         if (unlikely(list_empty(&q->free))) {
1017
1018                 gk20a_dbg_info("ch %d: run out of pre-alloc entries",
1019                         c->hw_chid);
1020
1021                 e = kzalloc(sizeof(struct priv_cmd_entry), GFP_KERNEL);
1022                 if (!e) {
1023                         gk20a_err(dev_from_gk20a(c->g),
1024                                 "ch %d: fail to allocate priv cmd entry",
1025                                 c->hw_chid);
1026                         return -ENOMEM;
1027                 }
1028         } else  {
1029                 node = q->free.next;
1030                 list_del(node);
1031                 e = container_of(node, struct priv_cmd_entry, list);
1032         }
1033
1034         e->size = orig_size;
1035         e->gp_get = c->gpfifo.get;
1036         e->gp_put = c->gpfifo.put;
1037         e->gp_wrap = c->gpfifo.wrap;
1038
1039         /* if we have increased size to skip free space in the end, set put
1040            to beginning of cmd buffer (0) + size */
1041         if (size != orig_size) {
1042                 e->ptr = q->mem.base_cpuva;
1043                 e->gva = q->base_gpuva;
1044                 q->put = orig_size;
1045         } else {
1046                 e->ptr = q->mem.base_cpuva + q->put;
1047                 e->gva = q->base_gpuva + q->put * sizeof(u32);
1048                 q->put = (q->put + orig_size) & (q->size - 1);
1049         }
1050
1051         /* we already handled q->put + size > q->size so BUG_ON this */
1052         BUG_ON(q->put > q->size);
1053
1054         /* add new entry to head since we free from head */
1055         list_add(&e->list, &q->head);
1056
1057         *entry = e;
1058
1059         gk20a_dbg_fn("done");
1060
1061         return 0;
1062 }
1063
1064 /* Don't call this to free an explict cmd entry.
1065  * It doesn't update priv_cmd_queue get/put */
1066 static void free_priv_cmdbuf(struct channel_gk20a *c,
1067                              struct priv_cmd_entry *e)
1068 {
1069         struct priv_cmd_queue *q = &c->priv_cmd_q;
1070
1071         if (!e)
1072                 return;
1073
1074         list_del(&e->list);
1075
1076         if (unlikely(!e->pre_alloc))
1077                 kfree(e);
1078         else {
1079                 memset(e, 0, sizeof(struct priv_cmd_entry));
1080                 e->pre_alloc = true;
1081                 list_add(&e->list, &q->free);
1082         }
1083 }
1084
1085 /* free entries if they're no longer being used */
1086 static void recycle_priv_cmdbuf(struct channel_gk20a *c)
1087 {
1088         struct priv_cmd_queue *q = &c->priv_cmd_q;
1089         struct priv_cmd_entry *e, *tmp;
1090         struct list_head *head = &q->head;
1091         bool wrap_around, found = false;
1092
1093         gk20a_dbg_fn("");
1094
1095         /* Find the most recent free entry. Free it and everything before it */
1096         list_for_each_entry(e, head, list) {
1097
1098                 gk20a_dbg_info("ch %d: cmd entry get:put:wrap %d:%d:%d "
1099                         "curr get:put:wrap %d:%d:%d",
1100                         c->hw_chid, e->gp_get, e->gp_put, e->gp_wrap,
1101                         c->gpfifo.get, c->gpfifo.put, c->gpfifo.wrap);
1102
1103                 wrap_around = (c->gpfifo.wrap != e->gp_wrap);
1104                 if (e->gp_get < e->gp_put) {
1105                         if (c->gpfifo.get >= e->gp_put ||
1106                             wrap_around) {
1107                                 found = true;
1108                                 break;
1109                         } else
1110                                 e->gp_get = c->gpfifo.get;
1111                 } else if (e->gp_get > e->gp_put) {
1112                         if (wrap_around &&
1113                             c->gpfifo.get >= e->gp_put) {
1114                                 found = true;
1115                                 break;
1116                         } else
1117                                 e->gp_get = c->gpfifo.get;
1118                 }
1119         }
1120
1121         if (found)
1122                 q->get = (e->ptr - q->mem.base_cpuva) + e->size;
1123         else {
1124                 gk20a_dbg_info("no free entry recycled");
1125                 return;
1126         }
1127
1128         list_for_each_entry_safe_continue(e, tmp, head, list) {
1129                 free_priv_cmdbuf(c, e);
1130         }
1131
1132         gk20a_dbg_fn("done");
1133 }
1134
1135
1136 int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c,
1137                 struct nvgpu_alloc_gpfifo_args *args)
1138 {
1139         struct gk20a *g = c->g;
1140         struct device *d = dev_from_gk20a(g);
1141         struct vm_gk20a *ch_vm;
1142         u32 gpfifo_size;
1143         int err = 0;
1144         struct sg_table *sgt;
1145         dma_addr_t iova;
1146
1147         /* Kernel can insert one extra gpfifo entry before user submitted gpfifos
1148            and another one after, for internal usage. Triple the requested size. */
1149         gpfifo_size = roundup_pow_of_two(args->num_entries * 3);
1150
1151         if (args->flags & NVGPU_ALLOC_GPFIFO_FLAGS_VPR_ENABLED)
1152                 c->vpr = true;
1153
1154         /* an address space needs to have been bound at this point. */
1155         if (!gk20a_channel_as_bound(c)) {
1156                 gk20a_err(d,
1157                             "not bound to an address space at time of gpfifo"
1158                             " allocation.");
1159                 return -EINVAL;
1160         }
1161         ch_vm = c->vm;
1162
1163         c->cmds_pending = false;
1164         mutex_lock(&c->submit_lock);
1165         gk20a_fence_put(c->last_submit.pre_fence);
1166         gk20a_fence_put(c->last_submit.post_fence);
1167         c->last_submit.pre_fence = NULL;
1168         c->last_submit.post_fence = NULL;
1169         mutex_unlock(&c->submit_lock);
1170
1171         c->ramfc.offset = 0;
1172         c->ramfc.size = ram_in_ramfc_s() / 8;
1173
1174         if (c->gpfifo.cpu_va) {
1175                 gk20a_err(d, "channel %d :"
1176                            "gpfifo already allocated", c->hw_chid);
1177                 return -EEXIST;
1178         }
1179
1180         c->gpfifo.size = gpfifo_size * sizeof(struct gpfifo);
1181         c->gpfifo.cpu_va = (struct gpfifo *)dma_alloc_coherent(d,
1182                                                 c->gpfifo.size,
1183                                                 &iova,
1184                                                 GFP_KERNEL);
1185         if (!c->gpfifo.cpu_va) {
1186                 gk20a_err(d, "%s: memory allocation failed\n", __func__);
1187                 err = -ENOMEM;
1188                 goto clean_up;
1189         }
1190
1191         c->gpfifo.iova = iova;
1192         c->gpfifo.entry_num = gpfifo_size;
1193
1194         c->gpfifo.get = c->gpfifo.put = 0;
1195
1196         err = gk20a_get_sgtable(d, &sgt,
1197                         c->gpfifo.cpu_va, c->gpfifo.iova, c->gpfifo.size);
1198         if (err) {
1199                 gk20a_err(d, "%s: failed to allocate sg table\n", __func__);
1200                 goto clean_up;
1201         }
1202
1203         c->gpfifo.gpu_va = gk20a_gmmu_map(ch_vm,
1204                                         &sgt,
1205                                         c->gpfifo.size,
1206                                         0, /* flags */
1207                                         gk20a_mem_flag_none);
1208         if (!c->gpfifo.gpu_va) {
1209                 gk20a_err(d, "channel %d : failed to map"
1210                            " gpu_va for gpfifo", c->hw_chid);
1211                 err = -ENOMEM;
1212                 goto clean_up_sgt;
1213         }
1214
1215         gk20a_dbg_info("channel %d : gpfifo_base 0x%016llx, size %d",
1216                 c->hw_chid, c->gpfifo.gpu_va, c->gpfifo.entry_num);
1217
1218         channel_gk20a_setup_userd(c);
1219
1220         err = g->ops.fifo.setup_ramfc(c, c->gpfifo.gpu_va, c->gpfifo.entry_num);
1221         if (err)
1222                 goto clean_up_unmap;
1223
1224         /* TBD: setup engine contexts */
1225
1226         err = channel_gk20a_alloc_priv_cmdbuf(c);
1227         if (err)
1228                 goto clean_up_unmap;
1229
1230         err = channel_gk20a_update_runlist(c, true);
1231         if (err)
1232                 goto clean_up_unmap;
1233
1234         gk20a_free_sgtable(&sgt);
1235
1236         gk20a_dbg_fn("done");
1237         return 0;
1238
1239 clean_up_unmap:
1240         gk20a_gmmu_unmap(ch_vm, c->gpfifo.gpu_va,
1241                 c->gpfifo.size, gk20a_mem_flag_none);
1242 clean_up_sgt:
1243         gk20a_free_sgtable(&sgt);
1244 clean_up:
1245         dma_free_coherent(d, c->gpfifo.size,
1246                 c->gpfifo.cpu_va, c->gpfifo.iova);
1247         c->gpfifo.cpu_va = NULL;
1248         c->gpfifo.iova = 0;
1249         memset(&c->gpfifo, 0, sizeof(struct gpfifo_desc));
1250         gk20a_err(d, "fail");
1251         return err;
1252 }
1253
1254 static inline int wfi_cmd_size(void)
1255 {
1256         return 2;
1257 }
1258 void add_wfi_cmd(struct priv_cmd_entry *cmd, int *i)
1259 {
1260         /* wfi */
1261         cmd->ptr[(*i)++] = 0x2001001E;
1262         /* handle, ignored */
1263         cmd->ptr[(*i)++] = 0x00000000;
1264 }
1265
1266 static inline bool check_gp_put(struct gk20a *g,
1267                                 struct channel_gk20a *c)
1268 {
1269         u32 put;
1270         /* gp_put changed unexpectedly since last update? */
1271         put = gk20a_bar1_readl(g,
1272                c->userd_gpu_va + 4 * ram_userd_gp_put_w());
1273         if (c->gpfifo.put != put) {
1274                 /*TBD: BUG_ON/teardown on this*/
1275                 gk20a_err(dev_from_gk20a(g), "gp_put changed unexpectedly "
1276                            "since last update");
1277                 c->gpfifo.put = put;
1278                 return false; /* surprise! */
1279         }
1280         return true; /* checked out ok */
1281 }
1282
1283 /* Update with this periodically to determine how the gpfifo is draining. */
1284 static inline u32 update_gp_get(struct gk20a *g,
1285                                 struct channel_gk20a *c)
1286 {
1287         u32 new_get = gk20a_bar1_readl(g,
1288                 c->userd_gpu_va + sizeof(u32) * ram_userd_gp_get_w());
1289         if (new_get < c->gpfifo.get)
1290                 c->gpfifo.wrap = !c->gpfifo.wrap;
1291         c->gpfifo.get = new_get;
1292         return new_get;
1293 }
1294
1295 static inline u32 gp_free_count(struct channel_gk20a *c)
1296 {
1297         return (c->gpfifo.entry_num - (c->gpfifo.put - c->gpfifo.get) - 1) %
1298                 c->gpfifo.entry_num;
1299 }
1300
1301 bool gk20a_channel_update_and_check_timeout(struct channel_gk20a *ch,
1302                 u32 timeout_delta_ms)
1303 {
1304         u32 gpfifo_get = update_gp_get(ch->g, ch);
1305         /* Count consequent timeout isr */
1306         if (gpfifo_get == ch->timeout_gpfifo_get) {
1307                 /* we didn't advance since previous channel timeout check */
1308                 ch->timeout_accumulated_ms += timeout_delta_ms;
1309         } else {
1310                 /* first timeout isr encountered */
1311                 ch->timeout_accumulated_ms = timeout_delta_ms;
1312         }
1313
1314         ch->timeout_gpfifo_get = gpfifo_get;
1315
1316         return ch->g->timeouts_enabled &&
1317                 ch->timeout_accumulated_ms > ch->timeout_ms_max;
1318 }
1319
1320
1321 /* Issue a syncpoint increment *preceded* by a wait-for-idle
1322  * command.  All commands on the channel will have been
1323  * consumed at the time the fence syncpoint increment occurs.
1324  */
1325 static int gk20a_channel_submit_wfi(struct channel_gk20a *c)
1326 {
1327         struct priv_cmd_entry *cmd = NULL;
1328         struct gk20a *g = c->g;
1329         u32 free_count;
1330         int err;
1331
1332         if (c->has_timedout)
1333                 return -ETIMEDOUT;
1334
1335         update_gp_get(g, c);
1336         free_count = gp_free_count(c);
1337         if (unlikely(!free_count)) {
1338                 gk20a_err(dev_from_gk20a(g),
1339                            "not enough gpfifo space");
1340                 return -EAGAIN;
1341         }
1342
1343         mutex_lock(&c->submit_lock);
1344
1345         if (!c->sync) {
1346                 c->sync = gk20a_channel_sync_create(c);
1347                 if (!c->sync) {
1348                         mutex_unlock(&c->submit_lock);
1349                         return -ENOMEM;
1350                 }
1351         }
1352
1353         gk20a_fence_put(c->last_submit.pre_fence);
1354         gk20a_fence_put(c->last_submit.post_fence);
1355         c->last_submit.pre_fence = NULL;
1356         c->last_submit.post_fence = NULL;
1357
1358         err = c->sync->incr_wfi(c->sync, &cmd, &c->last_submit.post_fence);
1359         if (unlikely(err)) {
1360                 mutex_unlock(&c->submit_lock);
1361                 return err;
1362         }
1363
1364         WARN_ON(!c->last_submit.post_fence->wfi);
1365
1366         c->gpfifo.cpu_va[c->gpfifo.put].entry0 = u64_lo32(cmd->gva);
1367         c->gpfifo.cpu_va[c->gpfifo.put].entry1 = u64_hi32(cmd->gva) |
1368                 pbdma_gp_entry1_length_f(cmd->size);
1369
1370         c->gpfifo.put = (c->gpfifo.put + 1) & (c->gpfifo.entry_num - 1);
1371
1372         /* save gp_put */
1373         cmd->gp_put = c->gpfifo.put;
1374
1375         gk20a_bar1_writel(g,
1376                 c->userd_gpu_va + 4 * ram_userd_gp_put_w(),
1377                 c->gpfifo.put);
1378
1379         mutex_unlock(&c->submit_lock);
1380
1381         gk20a_dbg_info("post-submit put %d, get %d, size %d",
1382                 c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num);
1383
1384         return 0;
1385 }
1386
1387 static u32 get_gp_free_count(struct channel_gk20a *c)
1388 {
1389         update_gp_get(c->g, c);
1390         return gp_free_count(c);
1391 }
1392
1393 static void trace_write_pushbuffer(struct channel_gk20a *c, struct gpfifo *g)
1394 {
1395         void *mem = NULL;
1396         unsigned int words;
1397         u64 offset;
1398         struct dma_buf *dmabuf = NULL;
1399
1400         if (gk20a_debug_trace_cmdbuf) {
1401                 u64 gpu_va = (u64)g->entry0 |
1402                         (u64)((u64)pbdma_gp_entry1_get_hi_v(g->entry1) << 32);
1403                 int err;
1404
1405                 words = pbdma_gp_entry1_length_v(g->entry1);
1406                 err = gk20a_vm_find_buffer(c->vm, gpu_va, &dmabuf, &offset);
1407                 if (!err)
1408                         mem = dma_buf_vmap(dmabuf);
1409         }
1410
1411         if (mem) {
1412                 u32 i;
1413                 /*
1414                  * Write in batches of 128 as there seems to be a limit
1415                  * of how much you can output to ftrace at once.
1416                  */
1417                 for (i = 0; i < words; i += 128U) {
1418                         trace_gk20a_push_cmdbuf(
1419                                 c->g->dev->name,
1420                                 0,
1421                                 min(words - i, 128U),
1422                                 offset + i * sizeof(u32),
1423                                 mem);
1424                 }
1425                 dma_buf_vunmap(dmabuf, mem);
1426         }
1427 }
1428
1429 static int gk20a_channel_add_job(struct channel_gk20a *c,
1430                                  struct gk20a_fence *pre_fence,
1431                                  struct gk20a_fence *post_fence)
1432 {
1433         struct vm_gk20a *vm = c->vm;
1434         struct channel_gk20a_job *job = NULL;
1435         struct mapped_buffer_node **mapped_buffers = NULL;
1436         int err = 0, num_mapped_buffers;
1437
1438         /* job needs reference to this vm */
1439         gk20a_vm_get(vm);
1440
1441         err = gk20a_vm_get_buffers(vm, &mapped_buffers, &num_mapped_buffers);
1442         if (err) {
1443                 gk20a_vm_put(vm);
1444                 return err;
1445         }
1446
1447         job = kzalloc(sizeof(*job), GFP_KERNEL);
1448         if (!job) {
1449                 gk20a_vm_put_buffers(vm, mapped_buffers, num_mapped_buffers);
1450                 gk20a_vm_put(vm);
1451                 return -ENOMEM;
1452         }
1453
1454         job->num_mapped_buffers = num_mapped_buffers;
1455         job->mapped_buffers = mapped_buffers;
1456         job->pre_fence = gk20a_fence_get(pre_fence);
1457         job->post_fence = gk20a_fence_get(post_fence);
1458
1459         mutex_lock(&c->jobs_lock);
1460         list_add_tail(&job->list, &c->jobs);
1461         mutex_unlock(&c->jobs_lock);
1462
1463         return 0;
1464 }
1465
1466 void gk20a_channel_update(struct channel_gk20a *c, int nr_completed)
1467 {
1468         struct vm_gk20a *vm = c->vm;
1469         struct channel_gk20a_job *job, *n;
1470
1471         wake_up(&c->submit_wq);
1472
1473         mutex_lock(&c->submit_lock);
1474         mutex_lock(&c->jobs_lock);
1475         list_for_each_entry_safe(job, n, &c->jobs, list) {
1476                 bool completed = gk20a_fence_is_expired(job->post_fence);
1477                 if (!completed)
1478                         break;
1479
1480                 c->sync->signal_timeline(c->sync);
1481
1482                 gk20a_vm_put_buffers(vm, job->mapped_buffers,
1483                                 job->num_mapped_buffers);
1484
1485                 /* Close the fences (this will unref the semaphores and release
1486                  * them to the pool). */
1487                 gk20a_fence_put(job->pre_fence);
1488                 gk20a_fence_put(job->post_fence);
1489
1490                 /* job is done. release its reference to vm */
1491                 gk20a_vm_put(vm);
1492
1493                 list_del_init(&job->list);
1494                 kfree(job);
1495                 gk20a_idle(c->g->dev);
1496         }
1497
1498         /*
1499          * If job list is empty then channel is idle and we can free
1500          * the syncpt here (given aggressive_destroy flag is set)
1501          * Note: check if last submit is complete before destroying
1502          * the sync resource
1503          */
1504         if (list_empty(&c->jobs)) {
1505                 if (c->sync && c->sync->aggressive_destroy &&
1506                           gk20a_fence_is_expired(c->last_submit.post_fence)) {
1507                         c->sync->destroy(c->sync);
1508                         c->sync = NULL;
1509                 }
1510         }
1511         mutex_unlock(&c->jobs_lock);
1512         mutex_unlock(&c->submit_lock);
1513
1514         if (c->update_fn)
1515                 schedule_work(&c->update_fn_work);
1516 }
1517
1518 void add_wait_cmd(u32 *ptr, u32 id, u32 thresh)
1519 {
1520         /* syncpoint_a */
1521         ptr[0] = 0x2001001C;
1522         /* payload */
1523         ptr[1] = thresh;
1524         /* syncpoint_b */
1525         ptr[2] = 0x2001001D;
1526         /* syncpt_id, switch_en, wait */
1527         ptr[3] = (id << 8) | 0x10;
1528 }
1529
1530 int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
1531                                 struct nvgpu_gpfifo *gpfifo,
1532                                 u32 num_entries,
1533                                 u32 flags,
1534                                 struct nvgpu_fence *fence,
1535                                 struct gk20a_fence **fence_out)
1536 {
1537         struct gk20a *g = c->g;
1538         struct device *d = dev_from_gk20a(g);
1539         int err = 0;
1540         int i;
1541         int wait_fence_fd = -1;
1542         struct priv_cmd_entry *wait_cmd = NULL;
1543         struct priv_cmd_entry *incr_cmd = NULL;
1544         struct gk20a_fence *pre_fence = NULL;
1545         struct gk20a_fence *post_fence = NULL;
1546         /* we might need two extra gpfifo entries - one for pre fence
1547          * and one for post fence. */
1548         const int extra_entries = 2;
1549         bool need_wfi = !(flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SUPPRESS_WFI);
1550
1551         if (c->has_timedout)
1552                 return -ETIMEDOUT;
1553
1554         if ((flags & (NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT |
1555                       NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET)) &&
1556             !fence)
1557                 return -EINVAL;
1558
1559         /* an address space needs to have been bound at this point. */
1560         if (!gk20a_channel_as_bound(c)) {
1561                 gk20a_err(d,
1562                             "not bound to an address space at time of gpfifo"
1563                             " submission.");
1564                 return -EINVAL;
1565         }
1566
1567 #ifdef CONFIG_DEBUG_FS
1568         /* update debug settings */
1569         if (g->ops.ltc.sync_debugfs)
1570                 g->ops.ltc.sync_debugfs(g);
1571 #endif
1572
1573         gk20a_dbg_info("channel %d", c->hw_chid);
1574
1575         /* gk20a_channel_update releases this ref. */
1576         err = gk20a_busy(g->dev);
1577         if (err) {
1578                 gk20a_err(d, "failed to host gk20a to submit gpfifo");
1579                 return err;
1580         }
1581
1582         trace_gk20a_channel_submit_gpfifo(c->g->dev->name,
1583                                           c->hw_chid,
1584                                           num_entries,
1585                                           flags,
1586                                           fence ? fence->id : 0,
1587                                           fence ? fence->value : 0);
1588         check_gp_put(g, c);
1589         update_gp_get(g, c);
1590
1591         gk20a_dbg_info("pre-submit put %d, get %d, size %d",
1592                 c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num);
1593
1594         /* Invalidate tlb if it's dirty...                                   */
1595         /* TBD: this should be done in the cmd stream, not with PRIs.        */
1596         /* We don't know what context is currently running...                */
1597         /* Note also: there can be more than one context associated with the */
1598         /* address space (vm).   */
1599         g->ops.mm.tlb_invalidate(c->vm);
1600
1601         /* Make sure we have enough space for gpfifo entries. If not,
1602          * wait for signals from completed submits */
1603         if (gp_free_count(c) < num_entries + extra_entries) {
1604                 err = wait_event_interruptible(c->submit_wq,
1605                         get_gp_free_count(c) >= num_entries + extra_entries ||
1606                         c->has_timedout);
1607         }
1608
1609         if (c->has_timedout) {
1610                 err = -ETIMEDOUT;
1611                 goto clean_up;
1612         }
1613
1614         if (err) {
1615                 gk20a_err(d, "not enough gpfifo space");
1616                 err = -EAGAIN;
1617                 goto clean_up;
1618         }
1619
1620         mutex_lock(&c->submit_lock);
1621
1622         if (!c->sync) {
1623                 c->sync = gk20a_channel_sync_create(c);
1624                 if (!c->sync) {
1625                         err = -ENOMEM;
1626                         mutex_unlock(&c->submit_lock);
1627                         goto clean_up;
1628                 }
1629         }
1630
1631         /*
1632          * optionally insert syncpt wait in the beginning of gpfifo submission
1633          * when user requested and the wait hasn't expired.
1634          * validate that the id makes sense, elide if not
1635          * the only reason this isn't being unceremoniously killed is to
1636          * keep running some tests which trigger this condition
1637          */
1638         if (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT) {
1639                 if (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE) {
1640                         wait_fence_fd = fence->id;
1641                         err = c->sync->wait_fd(c->sync, wait_fence_fd,
1642                                         &wait_cmd, &pre_fence);
1643                 } else {
1644                         err = c->sync->wait_syncpt(c->sync, fence->id,
1645                                         fence->value, &wait_cmd, &pre_fence);
1646                 }
1647         }
1648         if (err) {
1649                 mutex_unlock(&c->submit_lock);
1650                 goto clean_up;
1651         }
1652
1653
1654         /* always insert syncpt increment at end of gpfifo submission
1655            to keep track of method completion for idle railgating */
1656         if (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET)
1657                 err = c->sync->incr_user(c->sync, wait_fence_fd, &incr_cmd,
1658                                          &post_fence, need_wfi);
1659         else
1660                 err = c->sync->incr(c->sync, &incr_cmd,
1661                                     &post_fence);
1662         if (err) {
1663                 mutex_unlock(&c->submit_lock);
1664                 goto clean_up;
1665         }
1666
1667         if (wait_cmd) {
1668                 c->gpfifo.cpu_va[c->gpfifo.put].entry0 =
1669                         u64_lo32(wait_cmd->gva);
1670                 c->gpfifo.cpu_va[c->gpfifo.put].entry1 =
1671                         u64_hi32(wait_cmd->gva) |
1672                         pbdma_gp_entry1_length_f(wait_cmd->size);
1673                 trace_gk20a_push_cmdbuf(c->g->dev->name,
1674                         0, wait_cmd->size, 0, wait_cmd->ptr);
1675
1676                 c->gpfifo.put = (c->gpfifo.put + 1) &
1677                         (c->gpfifo.entry_num - 1);
1678
1679                 /* save gp_put */
1680                 wait_cmd->gp_put = c->gpfifo.put;
1681         }
1682
1683         for (i = 0; i < num_entries; i++) {
1684                 c->gpfifo.cpu_va[c->gpfifo.put].entry0 =
1685                         gpfifo[i].entry0; /* cmd buf va low 32 */
1686                 c->gpfifo.cpu_va[c->gpfifo.put].entry1 =
1687                         gpfifo[i].entry1; /* cmd buf va high 32 | words << 10 */
1688                 trace_write_pushbuffer(c, &c->gpfifo.cpu_va[c->gpfifo.put]);
1689                 c->gpfifo.put = (c->gpfifo.put + 1) &
1690                         (c->gpfifo.entry_num - 1);
1691         }
1692
1693         if (incr_cmd) {
1694                 c->gpfifo.cpu_va[c->gpfifo.put].entry0 =
1695                         u64_lo32(incr_cmd->gva);
1696                 c->gpfifo.cpu_va[c->gpfifo.put].entry1 =
1697                         u64_hi32(incr_cmd->gva) |
1698                         pbdma_gp_entry1_length_f(incr_cmd->size);
1699                 trace_gk20a_push_cmdbuf(c->g->dev->name,
1700                         0, incr_cmd->size, 0, incr_cmd->ptr);
1701
1702                 c->gpfifo.put = (c->gpfifo.put + 1) &
1703                         (c->gpfifo.entry_num - 1);
1704
1705                 /* save gp_put */
1706                 incr_cmd->gp_put = c->gpfifo.put;
1707         }
1708
1709         gk20a_fence_put(c->last_submit.pre_fence);
1710         gk20a_fence_put(c->last_submit.post_fence);
1711         c->last_submit.pre_fence = pre_fence;
1712         c->last_submit.post_fence = post_fence;
1713         if (fence_out)
1714                 *fence_out = gk20a_fence_get(post_fence);
1715
1716         /* TODO! Check for errors... */
1717         gk20a_channel_add_job(c, pre_fence, post_fence);
1718
1719         c->cmds_pending = true;
1720         gk20a_bar1_writel(g,
1721                 c->userd_gpu_va + 4 * ram_userd_gp_put_w(),
1722                 c->gpfifo.put);
1723
1724         mutex_unlock(&c->submit_lock);
1725
1726         trace_gk20a_channel_submitted_gpfifo(c->g->dev->name,
1727                                              c->hw_chid,
1728                                              num_entries,
1729                                              flags,
1730                                              post_fence->syncpt_id,
1731                                              post_fence->syncpt_value);
1732
1733         gk20a_dbg_info("post-submit put %d, get %d, size %d",
1734                 c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num);
1735
1736         gk20a_dbg_fn("done");
1737         return err;
1738
1739 clean_up:
1740         gk20a_err(d, "fail");
1741         free_priv_cmdbuf(c, wait_cmd);
1742         free_priv_cmdbuf(c, incr_cmd);
1743         gk20a_fence_put(pre_fence);
1744         gk20a_fence_put(post_fence);
1745         gk20a_idle(g->dev);
1746         return err;
1747 }
1748
1749 void gk20a_remove_channel_support(struct channel_gk20a *c)
1750 {
1751
1752 }
1753
1754 int gk20a_init_channel_support(struct gk20a *g, u32 chid)
1755 {
1756         struct channel_gk20a *c = g->fifo.channel+chid;
1757         c->g = g;
1758         c->in_use = false;
1759         c->hw_chid = chid;
1760         c->bound = false;
1761         c->remove_support = gk20a_remove_channel_support;
1762         mutex_init(&c->jobs_lock);
1763         mutex_init(&c->submit_lock);
1764         INIT_LIST_HEAD(&c->jobs);
1765 #if defined(CONFIG_GK20A_CYCLE_STATS)
1766         mutex_init(&c->cyclestate.cyclestate_buffer_mutex);
1767 #endif
1768         INIT_LIST_HEAD(&c->dbg_s_list);
1769         mutex_init(&c->dbg_s_lock);
1770
1771         return 0;
1772 }
1773
1774 int gk20a_channel_finish(struct channel_gk20a *ch, unsigned long timeout)
1775 {
1776         int err = 0;
1777         struct gk20a_fence *fence = ch->last_submit.post_fence;
1778
1779         if (!ch->cmds_pending)
1780                 return 0;
1781
1782         /* Do not wait for a timedout channel */
1783         if (ch->has_timedout)
1784                 return -ETIMEDOUT;
1785
1786         if (!(fence && fence->wfi) && ch->obj_class != KEPLER_C) {
1787                 gk20a_dbg_fn("issuing wfi, incr to finish the channel");
1788                 err = gk20a_channel_submit_wfi(ch);
1789                 fence = ch->last_submit.post_fence;
1790         }
1791         if (err)
1792                 return err;
1793
1794         BUG_ON(!(fence && fence->wfi) && ch->obj_class != KEPLER_C);
1795
1796         gk20a_dbg_fn("waiting for channel to finish thresh:%d sema:%p",
1797                      fence->syncpt_value, fence->semaphore);
1798
1799         err = gk20a_fence_wait(fence, timeout);
1800         if (WARN_ON(err))
1801                 dev_warn(dev_from_gk20a(ch->g),
1802                        "timed out waiting for gk20a channel to finish");
1803         else
1804                 ch->cmds_pending = false;
1805
1806         return err;
1807 }
1808
1809 static int gk20a_channel_wait_semaphore(struct channel_gk20a *ch,
1810                                         ulong id, u32 offset,
1811                                         u32 payload, long timeout)
1812 {
1813         struct platform_device *pdev = ch->g->dev;
1814         struct dma_buf *dmabuf;
1815         void *data;
1816         u32 *semaphore;
1817         int ret = 0;
1818         long remain;
1819
1820         /* do not wait if channel has timed out */
1821         if (ch->has_timedout)
1822                 return -ETIMEDOUT;
1823
1824         dmabuf = dma_buf_get(id);
1825         if (IS_ERR(dmabuf)) {
1826                 gk20a_err(&pdev->dev, "invalid notifier nvmap handle 0x%lx",
1827                            id);
1828                 return -EINVAL;
1829         }
1830
1831         data = dma_buf_kmap(dmabuf, offset >> PAGE_SHIFT);
1832         if (!data) {
1833                 gk20a_err(&pdev->dev, "failed to map notifier memory");
1834                 ret = -EINVAL;
1835                 goto cleanup_put;
1836         }
1837
1838         semaphore = data + (offset & ~PAGE_MASK);
1839
1840         remain = wait_event_interruptible_timeout(
1841                         ch->semaphore_wq,
1842                         *semaphore == payload || ch->has_timedout,
1843                         timeout);
1844
1845         if (remain == 0 && *semaphore != payload)
1846                 ret = -ETIMEDOUT;
1847         else if (remain < 0)
1848                 ret = remain;
1849
1850         dma_buf_kunmap(dmabuf, offset >> PAGE_SHIFT, data);
1851 cleanup_put:
1852         dma_buf_put(dmabuf);
1853         return ret;
1854 }
1855
1856 static int gk20a_channel_wait(struct channel_gk20a *ch,
1857                               struct nvgpu_wait_args *args)
1858 {
1859         struct device *d = dev_from_gk20a(ch->g);
1860         struct dma_buf *dmabuf;
1861         struct notification *notif;
1862         struct timespec tv;
1863         u64 jiffies;
1864         ulong id;
1865         u32 offset;
1866         unsigned long timeout;
1867         int remain, ret = 0;
1868
1869         gk20a_dbg_fn("");
1870
1871         if (ch->has_timedout)
1872                 return -ETIMEDOUT;
1873
1874         if (args->timeout == NVGPU_NO_TIMEOUT)
1875                 timeout = MAX_SCHEDULE_TIMEOUT;
1876         else
1877                 timeout = (u32)msecs_to_jiffies(args->timeout);
1878
1879         switch (args->type) {
1880         case NVGPU_WAIT_TYPE_NOTIFIER:
1881                 id = args->condition.notifier.dmabuf_fd;
1882                 offset = args->condition.notifier.offset;
1883
1884                 dmabuf = dma_buf_get(id);
1885                 if (IS_ERR(dmabuf)) {
1886                         gk20a_err(d, "invalid notifier nvmap handle 0x%lx",
1887                                    id);
1888                         return -EINVAL;
1889                 }
1890
1891                 notif = dma_buf_vmap(dmabuf);
1892                 if (!notif) {
1893                         gk20a_err(d, "failed to map notifier memory");
1894                         return -ENOMEM;
1895                 }
1896
1897                 notif = (struct notification *)((uintptr_t)notif + offset);
1898
1899                 /* user should set status pending before
1900                  * calling this ioctl */
1901                 remain = wait_event_interruptible_timeout(
1902                                 ch->notifier_wq,
1903                                 notif->status == 0 || ch->has_timedout,
1904                                 timeout);
1905
1906                 if (remain == 0 && notif->status != 0) {
1907                         ret = -ETIMEDOUT;
1908                         goto notif_clean_up;
1909                 } else if (remain < 0) {
1910                         ret = -EINTR;
1911                         goto notif_clean_up;
1912                 }
1913
1914                 /* TBD: fill in correct information */
1915                 jiffies = get_jiffies_64();
1916                 jiffies_to_timespec(jiffies, &tv);
1917                 notif->timestamp.nanoseconds[0] = tv.tv_nsec;
1918                 notif->timestamp.nanoseconds[1] = tv.tv_sec;
1919                 notif->info32 = 0xDEADBEEF; /* should be object name */
1920                 notif->info16 = ch->hw_chid; /* should be method offset */
1921
1922 notif_clean_up:
1923                 dma_buf_vunmap(dmabuf, notif);
1924                 return ret;
1925
1926         case NVGPU_WAIT_TYPE_SEMAPHORE:
1927                 ret = gk20a_channel_wait_semaphore(ch,
1928                                 args->condition.semaphore.dmabuf_fd,
1929                                 args->condition.semaphore.offset,
1930                                 args->condition.semaphore.payload,
1931                                 timeout);
1932
1933                 break;
1934
1935         default:
1936                 ret = -EINVAL;
1937                 break;
1938         }
1939
1940         return ret;
1941 }
1942
1943 /* poll events for semaphores */
1944
1945 static void gk20a_channel_events_enable(struct channel_gk20a_poll_events *ev)
1946 {
1947         gk20a_dbg_fn("");
1948
1949         mutex_lock(&ev->lock);
1950
1951         ev->events_enabled = true;
1952         ev->num_pending_events = 0;
1953
1954         mutex_unlock(&ev->lock);
1955 }
1956
1957 static void gk20a_channel_events_disable(struct channel_gk20a_poll_events *ev)
1958 {
1959         gk20a_dbg_fn("");
1960
1961         mutex_lock(&ev->lock);
1962
1963         ev->events_enabled = false;
1964         ev->num_pending_events = 0;
1965
1966         mutex_unlock(&ev->lock);
1967 }
1968
1969 static void gk20a_channel_events_clear(struct channel_gk20a_poll_events *ev)
1970 {
1971         gk20a_dbg_fn("");
1972
1973         mutex_lock(&ev->lock);
1974
1975         if (ev->events_enabled &&
1976                         ev->num_pending_events > 0)
1977                 ev->num_pending_events--;
1978
1979         mutex_unlock(&ev->lock);
1980 }
1981
1982 static int gk20a_channel_events_ctrl(struct channel_gk20a *ch,
1983                           struct nvgpu_channel_events_ctrl_args *args)
1984 {
1985         int ret = 0;
1986
1987         gk20a_dbg(gpu_dbg_fn | gpu_dbg_info,
1988                         "channel events ctrl cmd %d", args->cmd);
1989
1990         switch (args->cmd) {
1991         case NVGPU_IOCTL_CHANNEL_EVENTS_CTRL_CMD_ENABLE:
1992                 gk20a_channel_events_enable(&ch->poll_events);
1993                 break;
1994
1995         case NVGPU_IOCTL_CHANNEL_EVENTS_CTRL_CMD_DISABLE:
1996                 gk20a_channel_events_disable(&ch->poll_events);
1997                 break;
1998
1999         case NVGPU_IOCTL_CHANNEL_EVENTS_CTRL_CMD_CLEAR:
2000                 gk20a_channel_events_clear(&ch->poll_events);
2001                 break;
2002
2003         default:
2004                 gk20a_err(dev_from_gk20a(ch->g),
2005                            "unrecognized channel events ctrl cmd: 0x%x",
2006                            args->cmd);
2007                 ret = -EINVAL;
2008                 break;
2009         }
2010
2011         return ret;
2012 }
2013
2014 void gk20a_channel_event(struct channel_gk20a *ch)
2015 {
2016         mutex_lock(&ch->poll_events.lock);
2017
2018         if (ch->poll_events.events_enabled) {
2019                 gk20a_dbg_info("posting event on channel id %d",
2020                                 ch->hw_chid);
2021                 gk20a_dbg_info("%d channel events pending",
2022                                 ch->poll_events.num_pending_events);
2023
2024                 ch->poll_events.num_pending_events++;
2025                 /* not waking up here, caller does that */
2026         }
2027
2028         mutex_unlock(&ch->poll_events.lock);
2029 }
2030
2031 unsigned int gk20a_channel_poll(struct file *filep, poll_table *wait)
2032 {
2033         unsigned int mask = 0;
2034         struct channel_gk20a *ch = filep->private_data;
2035
2036         gk20a_dbg(gpu_dbg_fn | gpu_dbg_info, "");
2037
2038         poll_wait(filep, &ch->semaphore_wq, wait);
2039
2040         mutex_lock(&ch->poll_events.lock);
2041
2042         if (ch->poll_events.events_enabled &&
2043                         ch->poll_events.num_pending_events > 0) {
2044                 gk20a_dbg_info("found pending event on channel id %d",
2045                                 ch->hw_chid);
2046                 gk20a_dbg_info("%d channel events pending",
2047                                 ch->poll_events.num_pending_events);
2048                 mask = (POLLPRI | POLLIN);
2049         }
2050
2051         mutex_unlock(&ch->poll_events.lock);
2052
2053         return mask;
2054 }
2055
2056 static int gk20a_channel_set_priority(struct channel_gk20a *ch,
2057                 u32 priority)
2058 {
2059         u32 timeslice_timeout;
2060         /* set priority of graphics channel */
2061         switch (priority) {
2062         case NVGPU_PRIORITY_LOW:
2063                 /* 64 << 3 = 512us */
2064                 timeslice_timeout = 64;
2065                 break;
2066         case NVGPU_PRIORITY_MEDIUM:
2067                 /* 128 << 3 = 1024us */
2068                 timeslice_timeout = 128;
2069                 break;
2070         case NVGPU_PRIORITY_HIGH:
2071                 /* 255 << 3 = 2048us */
2072                 timeslice_timeout = 255;
2073                 break;
2074         default:
2075                 pr_err("Unsupported priority");
2076                 return -EINVAL;
2077         }
2078         channel_gk20a_set_schedule_params(ch,
2079                         timeslice_timeout);
2080         return 0;
2081 }
2082
2083 static int gk20a_channel_zcull_bind(struct channel_gk20a *ch,
2084                             struct nvgpu_zcull_bind_args *args)
2085 {
2086         struct gk20a *g = ch->g;
2087         struct gr_gk20a *gr = &g->gr;
2088
2089         gk20a_dbg_fn("");
2090
2091         return g->ops.gr.bind_ctxsw_zcull(g, gr, ch,
2092                                 args->gpu_va, args->mode);
2093 }
2094
2095 /* in this context the "channel" is the host1x channel which
2096  * maps to *all* gk20a channels */
2097 int gk20a_channel_suspend(struct gk20a *g)
2098 {
2099         struct fifo_gk20a *f = &g->fifo;
2100         u32 chid;
2101         bool channels_in_use = false;
2102         int err;
2103
2104         gk20a_dbg_fn("");
2105
2106         /* wait for engine idle */
2107         err = g->ops.fifo.wait_engine_idle(g);
2108         if (err)
2109                 return err;
2110
2111         for (chid = 0; chid < f->num_channels; chid++) {
2112                 if (f->channel[chid].in_use) {
2113
2114                         gk20a_dbg_info("suspend channel %d", chid);
2115                         /* disable channel */
2116                         g->ops.fifo.disable_channel(&f->channel[chid]);
2117                         /* preempt the channel */
2118                         g->ops.fifo.preempt_channel(g, chid);
2119
2120                         channels_in_use = true;
2121                 }
2122         }
2123
2124         if (channels_in_use) {
2125                 g->ops.fifo.update_runlist(g, 0, ~0, false, true);
2126
2127                 for (chid = 0; chid < f->num_channels; chid++) {
2128                         if (f->channel[chid].in_use)
2129                                 g->ops.fifo.unbind_channel(&f->channel[chid]);
2130                 }
2131         }
2132
2133         gk20a_dbg_fn("done");
2134         return 0;
2135 }
2136
2137 /* in this context the "channel" is the host1x channel which
2138  * maps to *all* gk20a channels */
2139 int gk20a_channel_resume(struct gk20a *g)
2140 {
2141         struct fifo_gk20a *f = &g->fifo;
2142         u32 chid;
2143         bool channels_in_use = false;
2144
2145         gk20a_dbg_fn("");
2146
2147         for (chid = 0; chid < f->num_channels; chid++) {
2148                 if (f->channel[chid].in_use) {
2149                         gk20a_dbg_info("resume channel %d", chid);
2150                         g->ops.fifo.bind_channel(&f->channel[chid]);
2151                         channels_in_use = true;
2152                 }
2153         }
2154
2155         if (channels_in_use)
2156                 g->ops.fifo.update_runlist(g, 0, ~0, true, true);
2157
2158         gk20a_dbg_fn("done");
2159         return 0;
2160 }
2161
2162 void gk20a_channel_semaphore_wakeup(struct gk20a *g)
2163 {
2164         struct fifo_gk20a *f = &g->fifo;
2165         u32 chid;
2166
2167         gk20a_dbg_fn("");
2168
2169         for (chid = 0; chid < f->num_channels; chid++) {
2170                 struct channel_gk20a *c = g->fifo.channel+chid;
2171                 if (c->in_use) {
2172                         wake_up_interruptible_all(&c->semaphore_wq);
2173                         gk20a_channel_update(c, 0);
2174                 }
2175         }
2176 }
2177
2178 static int gk20a_ioctl_channel_submit_gpfifo(
2179         struct channel_gk20a *ch,
2180         struct nvgpu_submit_gpfifo_args *args)
2181 {
2182         struct gk20a_fence *fence_out;
2183         void *gpfifo;
2184         u32 size;
2185         int ret = 0;
2186
2187         gk20a_dbg_fn("");
2188
2189         if (ch->has_timedout)
2190                 return -ETIMEDOUT;
2191
2192         size = args->num_entries * sizeof(struct nvgpu_gpfifo);
2193
2194         gpfifo = kzalloc(size, GFP_KERNEL);
2195         if (!gpfifo)
2196                 return -ENOMEM;
2197
2198         if (copy_from_user(gpfifo,
2199                            (void __user *)(uintptr_t)args->gpfifo, size)) {
2200                 ret = -EINVAL;
2201                 goto clean_up;
2202         }
2203
2204         ret = gk20a_submit_channel_gpfifo(ch, gpfifo, args->num_entries,
2205                                           args->flags, &args->fence,
2206                                           &fence_out);
2207
2208         if (ret)
2209                 goto clean_up;
2210
2211         /* Convert fence_out to something we can pass back to user space. */
2212         if (args->flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET) {
2213                 if (args->flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE) {
2214                         int fd = gk20a_fence_install_fd(fence_out);
2215                         if (fd < 0)
2216                                 ret = fd;
2217                         else
2218                                 args->fence.id = fd;
2219                 } else {
2220                         args->fence.id = fence_out->syncpt_id;
2221                         args->fence.value = fence_out->syncpt_value;
2222                 }
2223         }
2224         gk20a_fence_put(fence_out);
2225
2226 clean_up:
2227         kfree(gpfifo);
2228         return ret;
2229 }
2230
2231 void gk20a_init_channel(struct gpu_ops *gops)
2232 {
2233         gops->fifo.bind_channel = channel_gk20a_bind;
2234         gops->fifo.unbind_channel = channel_gk20a_unbind;
2235         gops->fifo.disable_channel = channel_gk20a_disable;
2236         gops->fifo.alloc_inst = channel_gk20a_alloc_inst;
2237         gops->fifo.free_inst = channel_gk20a_free_inst;
2238         gops->fifo.setup_ramfc = channel_gk20a_setup_ramfc;
2239 }
2240
2241 long gk20a_channel_ioctl(struct file *filp,
2242         unsigned int cmd, unsigned long arg)
2243 {
2244         struct channel_gk20a *ch = filp->private_data;
2245         struct platform_device *dev = ch->g->dev;
2246         u8 buf[NVGPU_IOCTL_CHANNEL_MAX_ARG_SIZE];
2247         int err = 0;
2248
2249         gk20a_dbg_fn("start %d", _IOC_NR(cmd));
2250
2251         if ((_IOC_TYPE(cmd) != NVGPU_IOCTL_MAGIC) ||
2252                 (_IOC_NR(cmd) == 0) ||
2253                 (_IOC_NR(cmd) > NVGPU_IOCTL_CHANNEL_LAST) ||
2254                 (_IOC_SIZE(cmd) > NVGPU_IOCTL_CHANNEL_MAX_ARG_SIZE))
2255                 return -EINVAL;
2256
2257         if (_IOC_DIR(cmd) & _IOC_WRITE) {
2258                 if (copy_from_user(buf, (void __user *)arg, _IOC_SIZE(cmd)))
2259                         return -EFAULT;
2260         }
2261
2262         switch (cmd) {
2263         case NVGPU_IOCTL_CHANNEL_OPEN:
2264         {
2265                 int fd;
2266                 struct file *file;
2267                 char *name;
2268
2269                 err = get_unused_fd_flags(O_RDWR);
2270                 if (err < 0)
2271                         break;
2272                 fd = err;
2273
2274                 name = kasprintf(GFP_KERNEL, "nvhost-%s-fd%d",
2275                                 dev_name(&dev->dev), fd);
2276                 if (!name) {
2277                         err = -ENOMEM;
2278                         put_unused_fd(fd);
2279                         break;
2280                 }
2281
2282                 file = anon_inode_getfile(name, filp->f_op, NULL, O_RDWR);
2283                 kfree(name);
2284                 if (IS_ERR(file)) {
2285                         err = PTR_ERR(file);
2286                         put_unused_fd(fd);
2287                         break;
2288                 }
2289                 fd_install(fd, file);
2290
2291                 err = __gk20a_channel_open(ch->g, file);
2292                 if (err) {
2293                         put_unused_fd(fd);
2294                         fput(file);
2295                         break;
2296                 }
2297
2298                 ((struct nvgpu_channel_open_args *)buf)->channel_fd = fd;
2299                 break;
2300         }
2301         case NVGPU_IOCTL_CHANNEL_SET_NVMAP_FD:
2302                 break;
2303         case NVGPU_IOCTL_CHANNEL_ALLOC_OBJ_CTX:
2304                 err = gk20a_busy(dev);
2305                 if (err) {
2306                         dev_err(&dev->dev,
2307                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2308                                 __func__, cmd);
2309                         return err;
2310                 }
2311                 err = ch->g->ops.gr.alloc_obj_ctx(ch,
2312                                 (struct nvgpu_alloc_obj_ctx_args *)buf);
2313                 gk20a_idle(dev);
2314                 break;
2315         case NVGPU_IOCTL_CHANNEL_FREE_OBJ_CTX:
2316                 err = gk20a_busy(dev);
2317                 if (err) {
2318                         dev_err(&dev->dev,
2319                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2320                                 __func__, cmd);
2321                         return err;
2322                 }
2323                 err = ch->g->ops.gr.free_obj_ctx(ch,
2324                                 (struct nvgpu_free_obj_ctx_args *)buf);
2325                 gk20a_idle(dev);
2326                 break;
2327         case NVGPU_IOCTL_CHANNEL_ALLOC_GPFIFO:
2328                 err = gk20a_busy(dev);
2329                 if (err) {
2330                         dev_err(&dev->dev,
2331                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2332                                 __func__, cmd);
2333                         return err;
2334                 }
2335                 err = gk20a_alloc_channel_gpfifo(ch,
2336                                 (struct nvgpu_alloc_gpfifo_args *)buf);
2337                 gk20a_idle(dev);
2338                 break;
2339         case NVGPU_IOCTL_CHANNEL_SUBMIT_GPFIFO:
2340                 err = gk20a_ioctl_channel_submit_gpfifo(ch,
2341                                 (struct nvgpu_submit_gpfifo_args *)buf);
2342                 break;
2343         case NVGPU_IOCTL_CHANNEL_WAIT:
2344                 err = gk20a_busy(dev);
2345                 if (err) {
2346                         dev_err(&dev->dev,
2347                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2348                                 __func__, cmd);
2349                         return err;
2350                 }
2351                 err = gk20a_channel_wait(ch,
2352                                 (struct nvgpu_wait_args *)buf);
2353                 gk20a_idle(dev);
2354                 break;
2355         case NVGPU_IOCTL_CHANNEL_ZCULL_BIND:
2356                 err = gk20a_busy(dev);
2357                 if (err) {
2358                         dev_err(&dev->dev,
2359                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2360                                 __func__, cmd);
2361                         return err;
2362                 }
2363                 err = gk20a_channel_zcull_bind(ch,
2364                                 (struct nvgpu_zcull_bind_args *)buf);
2365                 gk20a_idle(dev);
2366                 break;
2367         case NVGPU_IOCTL_CHANNEL_SET_ERROR_NOTIFIER:
2368                 err = gk20a_busy(dev);
2369                 if (err) {
2370                         dev_err(&dev->dev,
2371                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2372                                 __func__, cmd);
2373                         return err;
2374                 }
2375                 err = gk20a_init_error_notifier(ch,
2376                                 (struct nvgpu_set_error_notifier *)buf);
2377                 gk20a_idle(dev);
2378                 break;
2379 #ifdef CONFIG_GK20A_CYCLE_STATS
2380         case NVGPU_IOCTL_CHANNEL_CYCLE_STATS:
2381                 err = gk20a_busy(dev);
2382                 if (err) {
2383                         dev_err(&dev->dev,
2384                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2385                                 __func__, cmd);
2386                         return err;
2387                 }
2388                 err = gk20a_channel_cycle_stats(ch,
2389                                 (struct nvgpu_cycle_stats_args *)buf);
2390                 gk20a_idle(dev);
2391                 break;
2392 #endif
2393         case NVGPU_IOCTL_CHANNEL_SET_TIMEOUT:
2394         {
2395                 u32 timeout =
2396                         (u32)((struct nvgpu_set_timeout_args *)buf)->timeout;
2397                 gk20a_dbg(gpu_dbg_gpu_dbg, "setting timeout (%d ms) for chid %d",
2398                            timeout, ch->hw_chid);
2399                 ch->timeout_ms_max = timeout;
2400                 break;
2401         }
2402         case NVGPU_IOCTL_CHANNEL_SET_TIMEOUT_EX:
2403         {
2404                 u32 timeout =
2405                         (u32)((struct nvgpu_set_timeout_args *)buf)->timeout;
2406                 bool timeout_debug_dump = !((u32)
2407                         ((struct nvgpu_set_timeout_ex_args *)buf)->flags &
2408                         (1 << NVGPU_TIMEOUT_FLAG_DISABLE_DUMP));
2409                 gk20a_dbg(gpu_dbg_gpu_dbg, "setting timeout (%d ms) for chid %d",
2410                            timeout, ch->hw_chid);
2411                 ch->timeout_ms_max = timeout;
2412                 ch->timeout_debug_dump = timeout_debug_dump;
2413                 break;
2414         }
2415         case NVGPU_IOCTL_CHANNEL_GET_TIMEDOUT:
2416                 ((struct nvgpu_get_param_args *)buf)->value =
2417                         ch->has_timedout;
2418                 break;
2419         case NVGPU_IOCTL_CHANNEL_SET_PRIORITY:
2420                 err = gk20a_busy(dev);
2421                 if (err) {
2422                         dev_err(&dev->dev,
2423                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2424                                 __func__, cmd);
2425                         return err;
2426                 }
2427                 gk20a_channel_set_priority(ch,
2428                         ((struct nvgpu_set_priority_args *)buf)->priority);
2429                 gk20a_idle(dev);
2430                 break;
2431         case NVGPU_IOCTL_CHANNEL_ENABLE:
2432                 err = gk20a_busy(dev);
2433                 if (err) {
2434                         dev_err(&dev->dev,
2435                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2436                                 __func__, cmd);
2437                         return err;
2438                 }
2439                 /* enable channel */
2440                 gk20a_writel(ch->g, ccsr_channel_r(ch->hw_chid),
2441                         gk20a_readl(ch->g, ccsr_channel_r(ch->hw_chid)) |
2442                         ccsr_channel_enable_set_true_f());
2443                 gk20a_idle(dev);
2444                 break;
2445         case NVGPU_IOCTL_CHANNEL_DISABLE:
2446                 err = gk20a_busy(dev);
2447                 if (err) {
2448                         dev_err(&dev->dev,
2449                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2450                                 __func__, cmd);
2451                         return err;
2452                 }
2453                 /* disable channel */
2454                 gk20a_writel(ch->g, ccsr_channel_r(ch->hw_chid),
2455                         gk20a_readl(ch->g, ccsr_channel_r(ch->hw_chid)) |
2456                         ccsr_channel_enable_clr_true_f());
2457                 gk20a_idle(dev);
2458                 break;
2459         case NVGPU_IOCTL_CHANNEL_PREEMPT:
2460                 err = gk20a_busy(dev);
2461                 if (err) {
2462                         dev_err(&dev->dev,
2463                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2464                                 __func__, cmd);
2465                         return err;
2466                 }
2467                 err = gk20a_fifo_preempt(ch->g, ch);
2468                 gk20a_idle(dev);
2469                 break;
2470         case NVGPU_IOCTL_CHANNEL_FORCE_RESET:
2471                 err = gk20a_busy(dev);
2472                 if (err) {
2473                         dev_err(&dev->dev,
2474                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2475                                 __func__, cmd);
2476                         return err;
2477                 }
2478                 err = gk20a_fifo_force_reset_ch(ch, true);
2479                 gk20a_idle(dev);
2480                 break;
2481         case NVGPU_IOCTL_CHANNEL_EVENTS_CTRL:
2482                 err = gk20a_channel_events_ctrl(ch,
2483                            (struct nvgpu_channel_events_ctrl_args *)buf);
2484                 break;
2485         default:
2486                 dev_dbg(&dev->dev, "unrecognized ioctl cmd: 0x%x", cmd);
2487                 err = -ENOTTY;
2488                 break;
2489         }
2490
2491         if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ))
2492                 err = copy_to_user((void __user *)arg, buf, _IOC_SIZE(cmd));
2493
2494         gk20a_dbg_fn("end");
2495
2496         return err;
2497 }