gpu: nvgpu: support gk20a virtualization
[linux-3.10.git] / drivers / gpu / nvgpu / gk20a / channel_gk20a.c
1 /*
2  * drivers/video/tegra/host/gk20a/channel_gk20a.c
3  *
4  * GK20A Graphics channel
5  *
6  * Copyright (c) 2011-2014, NVIDIA CORPORATION.  All rights reserved.
7  *
8  * This program is free software; you can redistribute it and/or modify it
9  * under the terms and conditions of the GNU General Public License,
10  * version 2, as published by the Free Software Foundation.
11  *
12  * This program is distributed in the hope it will be useful, but WITHOUT
13  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
15  * more details.
16  *
17  * You should have received a copy of the GNU General Public License along with
18  * this program; if not, write to the Free Software Foundation, Inc.,
19  * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
20  */
21
22 #include <linux/nvhost.h>
23 #include <linux/list.h>
24 #include <linux/delay.h>
25 #include <linux/highmem.h> /* need for nvmap.h*/
26 #include <trace/events/gk20a.h>
27 #include <linux/scatterlist.h>
28 #include <linux/file.h>
29 #include <linux/anon_inodes.h>
30 #include <linux/dma-buf.h>
31
32 #include "debug_gk20a.h"
33
34 #include "gk20a.h"
35 #include "dbg_gpu_gk20a.h"
36 #include "fence_gk20a.h"
37 #include "semaphore_gk20a.h"
38
39 #include "hw_ram_gk20a.h"
40 #include "hw_fifo_gk20a.h"
41 #include "hw_pbdma_gk20a.h"
42 #include "hw_ccsr_gk20a.h"
43 #include "hw_ltc_gk20a.h"
44
45 #define NVMAP_HANDLE_PARAM_SIZE 1
46
47 static struct channel_gk20a *acquire_unused_channel(struct fifo_gk20a *f);
48 static void release_used_channel(struct fifo_gk20a *f, struct channel_gk20a *c);
49
50 static void free_priv_cmdbuf(struct channel_gk20a *c,
51                              struct priv_cmd_entry *e);
52 static void recycle_priv_cmdbuf(struct channel_gk20a *c);
53
54 static int channel_gk20a_alloc_priv_cmdbuf(struct channel_gk20a *c);
55 static void channel_gk20a_free_priv_cmdbuf(struct channel_gk20a *c);
56
57 static int channel_gk20a_commit_userd(struct channel_gk20a *c);
58 static int channel_gk20a_setup_userd(struct channel_gk20a *c);
59
60 static void channel_gk20a_bind(struct channel_gk20a *ch_gk20a);
61
62 static int channel_gk20a_update_runlist(struct channel_gk20a *c,
63                                         bool add);
64 static void gk20a_free_error_notifiers(struct channel_gk20a *ch);
65
66 static struct channel_gk20a *acquire_unused_channel(struct fifo_gk20a *f)
67 {
68         struct channel_gk20a *ch = NULL;
69         int chid;
70
71         mutex_lock(&f->ch_inuse_mutex);
72         for (chid = 0; chid < f->num_channels; chid++) {
73                 if (!f->channel[chid].in_use) {
74                         f->channel[chid].in_use = true;
75                         ch = &f->channel[chid];
76                         break;
77                 }
78         }
79         mutex_unlock(&f->ch_inuse_mutex);
80
81         return ch;
82 }
83
84 static void release_used_channel(struct fifo_gk20a *f, struct channel_gk20a *c)
85 {
86         mutex_lock(&f->ch_inuse_mutex);
87         f->channel[c->hw_chid].in_use = false;
88         mutex_unlock(&f->ch_inuse_mutex);
89 }
90
91 int channel_gk20a_commit_va(struct channel_gk20a *c)
92 {
93         u64 addr;
94         u32 addr_lo;
95         u32 addr_hi;
96         void *inst_ptr;
97
98         gk20a_dbg_fn("");
99
100         inst_ptr = c->inst_block.cpuva;
101         if (!inst_ptr)
102                 return -ENOMEM;
103
104         addr = gk20a_mm_iova_addr(c->vm->pdes.sgt->sgl);
105         addr_lo = u64_lo32(addr >> 12);
106         addr_hi = u64_hi32(addr);
107
108         gk20a_dbg_info("pde pa=0x%llx addr_lo=0x%x addr_hi=0x%x",
109                    (u64)addr, addr_lo, addr_hi);
110
111         gk20a_mem_wr32(inst_ptr, ram_in_page_dir_base_lo_w(),
112                 ram_in_page_dir_base_target_vid_mem_f() |
113                 ram_in_page_dir_base_vol_true_f() |
114                 ram_in_page_dir_base_lo_f(addr_lo));
115
116         gk20a_mem_wr32(inst_ptr, ram_in_page_dir_base_hi_w(),
117                 ram_in_page_dir_base_hi_f(addr_hi));
118
119         gk20a_mem_wr32(inst_ptr, ram_in_adr_limit_lo_w(),
120                  u64_lo32(c->vm->va_limit) | 0xFFF);
121
122         gk20a_mem_wr32(inst_ptr, ram_in_adr_limit_hi_w(),
123                 ram_in_adr_limit_hi_f(u64_hi32(c->vm->va_limit)));
124
125         return 0;
126 }
127
128 static int channel_gk20a_commit_userd(struct channel_gk20a *c)
129 {
130         u32 addr_lo;
131         u32 addr_hi;
132         void *inst_ptr;
133
134         gk20a_dbg_fn("");
135
136         inst_ptr = c->inst_block.cpuva;
137         if (!inst_ptr)
138                 return -ENOMEM;
139
140         addr_lo = u64_lo32(c->userd_iova >> ram_userd_base_shift_v());
141         addr_hi = u64_hi32(c->userd_iova);
142
143         gk20a_dbg_info("channel %d : set ramfc userd 0x%16llx",
144                 c->hw_chid, (u64)c->userd_iova);
145
146         gk20a_mem_wr32(inst_ptr, ram_in_ramfc_w() + ram_fc_userd_w(),
147                  pbdma_userd_target_vid_mem_f() |
148                  pbdma_userd_addr_f(addr_lo));
149
150         gk20a_mem_wr32(inst_ptr, ram_in_ramfc_w() + ram_fc_userd_hi_w(),
151                  pbdma_userd_target_vid_mem_f() |
152                  pbdma_userd_hi_addr_f(addr_hi));
153
154         return 0;
155 }
156
157 static int channel_gk20a_set_schedule_params(struct channel_gk20a *c,
158                                 u32 timeslice_timeout)
159 {
160         void *inst_ptr;
161         int shift = 3;
162         int value = timeslice_timeout;
163
164         inst_ptr = c->inst_block.cpuva;
165         if (!inst_ptr)
166                 return -ENOMEM;
167
168         /* disable channel */
169         c->g->ops.fifo.disable_channel(c);
170
171         /* preempt the channel */
172         WARN_ON(c->g->ops.fifo.preempt_channel(c->g, c->hw_chid));
173
174         /* value field is 8 bits long */
175         while (value >= 1 << 8) {
176                 value >>= 1;
177                 shift++;
178         }
179
180         /* time slice register is only 18bits long */
181         if ((value << shift) >= 1<<19) {
182                 pr_err("Requested timeslice value is clamped to 18 bits\n");
183                 value = 255;
184                 shift = 10;
185         }
186
187         /* set new timeslice */
188         gk20a_mem_wr32(inst_ptr, ram_fc_eng_timeslice_w(),
189                 value | (shift << 12) |
190                 fifo_eng_timeslice_enable_true_f());
191
192         /* enable channel */
193         gk20a_writel(c->g, ccsr_channel_r(c->hw_chid),
194                 gk20a_readl(c->g, ccsr_channel_r(c->hw_chid)) |
195                 ccsr_channel_enable_set_true_f());
196
197         return 0;
198 }
199
200 int channel_gk20a_setup_ramfc(struct channel_gk20a *c,
201                         u64 gpfifo_base, u32 gpfifo_entries)
202 {
203         void *inst_ptr;
204
205         gk20a_dbg_fn("");
206
207         inst_ptr = c->inst_block.cpuva;
208         if (!inst_ptr)
209                 return -ENOMEM;
210
211         memset(inst_ptr, 0, ram_fc_size_val_v());
212
213         gk20a_mem_wr32(inst_ptr, ram_fc_gp_base_w(),
214                 pbdma_gp_base_offset_f(
215                 u64_lo32(gpfifo_base >> pbdma_gp_base_rsvd_s())));
216
217         gk20a_mem_wr32(inst_ptr, ram_fc_gp_base_hi_w(),
218                 pbdma_gp_base_hi_offset_f(u64_hi32(gpfifo_base)) |
219                 pbdma_gp_base_hi_limit2_f(ilog2(gpfifo_entries)));
220
221         gk20a_mem_wr32(inst_ptr, ram_fc_signature_w(),
222                  pbdma_signature_hw_valid_f() | pbdma_signature_sw_zero_f());
223
224         gk20a_mem_wr32(inst_ptr, ram_fc_formats_w(),
225                 pbdma_formats_gp_fermi0_f() |
226                 pbdma_formats_pb_fermi1_f() |
227                 pbdma_formats_mp_fermi0_f());
228
229         gk20a_mem_wr32(inst_ptr, ram_fc_pb_header_w(),
230                 pbdma_pb_header_priv_user_f() |
231                 pbdma_pb_header_method_zero_f() |
232                 pbdma_pb_header_subchannel_zero_f() |
233                 pbdma_pb_header_level_main_f() |
234                 pbdma_pb_header_first_true_f() |
235                 pbdma_pb_header_type_inc_f());
236
237         gk20a_mem_wr32(inst_ptr, ram_fc_subdevice_w(),
238                 pbdma_subdevice_id_f(1) |
239                 pbdma_subdevice_status_active_f() |
240                 pbdma_subdevice_channel_dma_enable_f());
241
242         gk20a_mem_wr32(inst_ptr, ram_fc_target_w(), pbdma_target_engine_sw_f());
243
244         gk20a_mem_wr32(inst_ptr, ram_fc_acquire_w(),
245                 pbdma_acquire_retry_man_2_f() |
246                 pbdma_acquire_retry_exp_2_f() |
247                 pbdma_acquire_timeout_exp_max_f() |
248                 pbdma_acquire_timeout_man_max_f() |
249                 pbdma_acquire_timeout_en_disable_f());
250
251         gk20a_mem_wr32(inst_ptr, ram_fc_eng_timeslice_w(),
252                 fifo_eng_timeslice_timeout_128_f() |
253                 fifo_eng_timeslice_timescale_3_f() |
254                 fifo_eng_timeslice_enable_true_f());
255
256         gk20a_mem_wr32(inst_ptr, ram_fc_pb_timeslice_w(),
257                 fifo_pb_timeslice_timeout_16_f() |
258                 fifo_pb_timeslice_timescale_0_f() |
259                 fifo_pb_timeslice_enable_true_f());
260
261         gk20a_mem_wr32(inst_ptr, ram_fc_chid_w(), ram_fc_chid_id_f(c->hw_chid));
262
263         return channel_gk20a_commit_userd(c);
264 }
265
266 static int channel_gk20a_setup_userd(struct channel_gk20a *c)
267 {
268         BUG_ON(!c->userd_cpu_va);
269
270         gk20a_dbg_fn("");
271
272         gk20a_mem_wr32(c->userd_cpu_va, ram_userd_put_w(), 0);
273         gk20a_mem_wr32(c->userd_cpu_va, ram_userd_get_w(), 0);
274         gk20a_mem_wr32(c->userd_cpu_va, ram_userd_ref_w(), 0);
275         gk20a_mem_wr32(c->userd_cpu_va, ram_userd_put_hi_w(), 0);
276         gk20a_mem_wr32(c->userd_cpu_va, ram_userd_ref_threshold_w(), 0);
277         gk20a_mem_wr32(c->userd_cpu_va, ram_userd_gp_top_level_get_w(), 0);
278         gk20a_mem_wr32(c->userd_cpu_va, ram_userd_gp_top_level_get_hi_w(), 0);
279         gk20a_mem_wr32(c->userd_cpu_va, ram_userd_get_hi_w(), 0);
280         gk20a_mem_wr32(c->userd_cpu_va, ram_userd_gp_get_w(), 0);
281         gk20a_mem_wr32(c->userd_cpu_va, ram_userd_gp_put_w(), 0);
282
283         return 0;
284 }
285
286 static void channel_gk20a_bind(struct channel_gk20a *ch_gk20a)
287 {
288         struct gk20a *g = ch_gk20a->g;
289         struct fifo_gk20a *f = &g->fifo;
290         struct fifo_engine_info_gk20a *engine_info =
291                 f->engine_info + ENGINE_GR_GK20A;
292
293         u32 inst_ptr = ch_gk20a->inst_block.cpu_pa
294                 >> ram_in_base_shift_v();
295
296         gk20a_dbg_info("bind channel %d inst ptr 0x%08x",
297                 ch_gk20a->hw_chid, inst_ptr);
298
299         ch_gk20a->bound = true;
300
301         gk20a_writel(g, ccsr_channel_r(ch_gk20a->hw_chid),
302                 (gk20a_readl(g, ccsr_channel_r(ch_gk20a->hw_chid)) &
303                  ~ccsr_channel_runlist_f(~0)) |
304                  ccsr_channel_runlist_f(engine_info->runlist_id));
305
306         gk20a_writel(g, ccsr_channel_inst_r(ch_gk20a->hw_chid),
307                 ccsr_channel_inst_ptr_f(inst_ptr) |
308                 ccsr_channel_inst_target_vid_mem_f() |
309                 ccsr_channel_inst_bind_true_f());
310
311         gk20a_writel(g, ccsr_channel_r(ch_gk20a->hw_chid),
312                 (gk20a_readl(g, ccsr_channel_r(ch_gk20a->hw_chid)) &
313                  ~ccsr_channel_enable_set_f(~0)) |
314                  ccsr_channel_enable_set_true_f());
315 }
316
317 void channel_gk20a_unbind(struct channel_gk20a *ch_gk20a)
318 {
319         struct gk20a *g = ch_gk20a->g;
320
321         gk20a_dbg_fn("");
322
323         if (ch_gk20a->bound)
324                 gk20a_writel(g, ccsr_channel_inst_r(ch_gk20a->hw_chid),
325                         ccsr_channel_inst_ptr_f(0) |
326                         ccsr_channel_inst_bind_false_f());
327
328         ch_gk20a->bound = false;
329
330         /*
331          * if we are agrressive then we can destroy the syncpt
332          * resource at this point
333          * if not, then it will be destroyed at channel_free()
334          */
335         if (ch_gk20a->sync && ch_gk20a->sync->aggressive_destroy) {
336                 ch_gk20a->sync->destroy(ch_gk20a->sync);
337                 ch_gk20a->sync = NULL;
338         }
339 }
340
341 int channel_gk20a_alloc_inst(struct gk20a *g, struct channel_gk20a *ch)
342 {
343         struct device *d = dev_from_gk20a(g);
344         int err = 0;
345         dma_addr_t iova;
346
347         gk20a_dbg_fn("");
348
349         ch->inst_block.size = ram_in_alloc_size_v();
350         ch->inst_block.cpuva = dma_alloc_coherent(d,
351                                         ch->inst_block.size,
352                                         &iova,
353                                         GFP_KERNEL);
354         if (!ch->inst_block.cpuva) {
355                 gk20a_err(d, "%s: memory allocation failed\n", __func__);
356                 err = -ENOMEM;
357                 goto clean_up;
358         }
359
360         ch->inst_block.iova = iova;
361         ch->inst_block.cpu_pa = gk20a_get_phys_from_iova(d,
362                                                         ch->inst_block.iova);
363         if (!ch->inst_block.cpu_pa) {
364                 gk20a_err(d, "%s: failed to get physical address\n", __func__);
365                 err = -ENOMEM;
366                 goto clean_up;
367         }
368
369         gk20a_dbg_info("channel %d inst block physical addr: 0x%16llx",
370                 ch->hw_chid, (u64)ch->inst_block.cpu_pa);
371
372         gk20a_dbg_fn("done");
373         return 0;
374
375 clean_up:
376         gk20a_err(d, "fail");
377         g->ops.fifo.free_inst(g, ch);
378         return err;
379 }
380
381 void channel_gk20a_free_inst(struct gk20a *g, struct channel_gk20a *ch)
382 {
383         struct device *d = dev_from_gk20a(g);
384
385         if (ch->inst_block.cpuva)
386                 dma_free_coherent(d, ch->inst_block.size,
387                                 ch->inst_block.cpuva, ch->inst_block.iova);
388         ch->inst_block.cpuva = NULL;
389         ch->inst_block.iova = 0;
390         memset(&ch->inst_block, 0, sizeof(struct inst_desc));
391 }
392
393 static int channel_gk20a_update_runlist(struct channel_gk20a *c, bool add)
394 {
395         return c->g->ops.fifo.update_runlist(c->g, 0, c->hw_chid, add, true);
396 }
397
398 void channel_gk20a_disable(struct channel_gk20a *ch)
399 {
400         /* disable channel */
401         gk20a_writel(ch->g, ccsr_channel_r(ch->hw_chid),
402                 gk20a_readl(ch->g,
403                         ccsr_channel_r(ch->hw_chid)) |
404                         ccsr_channel_enable_clr_true_f());
405 }
406
407 void gk20a_channel_abort(struct channel_gk20a *ch)
408 {
409         struct channel_gk20a_job *job, *n;
410         bool released_job_semaphore = false;
411
412         /* ensure no fences are pending */
413         if (ch->sync)
414                 ch->sync->set_min_eq_max(ch->sync);
415
416         /* release all job semaphores (applies only to jobs that use
417            semaphore synchronization) */
418         mutex_lock(&ch->jobs_lock);
419         list_for_each_entry_safe(job, n, &ch->jobs, list) {
420                 if (job->post_fence->semaphore) {
421                         gk20a_semaphore_release(job->post_fence->semaphore);
422                         released_job_semaphore = true;
423                 }
424         }
425         mutex_unlock(&ch->jobs_lock);
426
427         ch->g->ops.fifo.disable_channel(ch);
428
429         if (released_job_semaphore) {
430                 wake_up_interruptible_all(&ch->semaphore_wq);
431                 gk20a_channel_update(ch, 0);
432         }
433 }
434
435 int gk20a_wait_channel_idle(struct channel_gk20a *ch)
436 {
437         bool channel_idle = false;
438         unsigned long end_jiffies = jiffies +
439                 msecs_to_jiffies(gk20a_get_gr_idle_timeout(ch->g));
440
441         do {
442                 mutex_lock(&ch->jobs_lock);
443                 channel_idle = list_empty(&ch->jobs);
444                 mutex_unlock(&ch->jobs_lock);
445                 if (channel_idle)
446                         break;
447
448                 usleep_range(1000, 3000);
449         } while (time_before(jiffies, end_jiffies)
450                         || !tegra_platform_is_silicon());
451
452         if (!channel_idle) {
453                 gk20a_err(dev_from_gk20a(ch->g), "jobs not freed for channel %d\n",
454                                 ch->hw_chid);
455                 return -EBUSY;
456         }
457
458         return 0;
459 }
460
461 void gk20a_disable_channel(struct channel_gk20a *ch,
462                            bool finish,
463                            unsigned long finish_timeout)
464 {
465         if (finish) {
466                 int err = gk20a_channel_finish(ch, finish_timeout);
467                 WARN_ON(err);
468         }
469
470         /* disable the channel from hw and increment syncpoints */
471         gk20a_channel_abort(ch);
472
473         gk20a_wait_channel_idle(ch);
474
475         /* preempt the channel */
476         ch->g->ops.fifo.preempt_channel(ch->g, ch->hw_chid);
477
478         /* remove channel from runlist */
479         channel_gk20a_update_runlist(ch, false);
480 }
481
482 #if defined(CONFIG_GK20A_CYCLE_STATS)
483
484 static void gk20a_free_cycle_stats_buffer(struct channel_gk20a *ch)
485 {
486         /* disable existing cyclestats buffer */
487         mutex_lock(&ch->cyclestate.cyclestate_buffer_mutex);
488         if (ch->cyclestate.cyclestate_buffer_handler) {
489                 dma_buf_vunmap(ch->cyclestate.cyclestate_buffer_handler,
490                                 ch->cyclestate.cyclestate_buffer);
491                 dma_buf_put(ch->cyclestate.cyclestate_buffer_handler);
492                 ch->cyclestate.cyclestate_buffer_handler = NULL;
493                 ch->cyclestate.cyclestate_buffer = NULL;
494                 ch->cyclestate.cyclestate_buffer_size = 0;
495         }
496         mutex_unlock(&ch->cyclestate.cyclestate_buffer_mutex);
497 }
498
499 static int gk20a_channel_cycle_stats(struct channel_gk20a *ch,
500                        struct nvhost_cycle_stats_args *args)
501 {
502         struct dma_buf *dmabuf;
503         void *virtual_address;
504
505         if (args->nvmap_handle && !ch->cyclestate.cyclestate_buffer_handler) {
506
507                 /* set up new cyclestats buffer */
508                 dmabuf = dma_buf_get(args->nvmap_handle);
509                 if (IS_ERR(dmabuf))
510                         return PTR_ERR(dmabuf);
511                 virtual_address = dma_buf_vmap(dmabuf);
512                 if (!virtual_address)
513                         return -ENOMEM;
514
515                 ch->cyclestate.cyclestate_buffer_handler = dmabuf;
516                 ch->cyclestate.cyclestate_buffer = virtual_address;
517                 ch->cyclestate.cyclestate_buffer_size = dmabuf->size;
518                 return 0;
519
520         } else if (!args->nvmap_handle &&
521                         ch->cyclestate.cyclestate_buffer_handler) {
522                 gk20a_free_cycle_stats_buffer(ch);
523                 return 0;
524
525         } else if (!args->nvmap_handle &&
526                         !ch->cyclestate.cyclestate_buffer_handler) {
527                 /* no requst from GL */
528                 return 0;
529
530         } else {
531                 pr_err("channel already has cyclestats buffer\n");
532                 return -EINVAL;
533         }
534 }
535 #endif
536
537 static int gk20a_init_error_notifier(struct channel_gk20a *ch,
538                 struct nvhost_set_error_notifier *args) {
539         void *va;
540
541         struct dma_buf *dmabuf;
542
543         if (!args->mem) {
544                 pr_err("gk20a_init_error_notifier: invalid memory handle\n");
545                 return -EINVAL;
546         }
547
548         dmabuf = dma_buf_get(args->mem);
549
550         if (ch->error_notifier_ref)
551                 gk20a_free_error_notifiers(ch);
552
553         if (IS_ERR(dmabuf)) {
554                 pr_err("Invalid handle: %d\n", args->mem);
555                 return -EINVAL;
556         }
557         /* map handle */
558         va = dma_buf_vmap(dmabuf);
559         if (!va) {
560                 dma_buf_put(dmabuf);
561                 pr_err("Cannot map notifier handle\n");
562                 return -ENOMEM;
563         }
564
565         /* set channel notifiers pointer */
566         ch->error_notifier_ref = dmabuf;
567         ch->error_notifier = va + args->offset;
568         ch->error_notifier_va = va;
569         memset(ch->error_notifier, 0, sizeof(struct nvhost_notification));
570         return 0;
571 }
572
573 void gk20a_set_error_notifier(struct channel_gk20a *ch, __u32 error)
574 {
575         if (ch->error_notifier_ref) {
576                 struct timespec time_data;
577                 u64 nsec;
578                 getnstimeofday(&time_data);
579                 nsec = ((u64)time_data.tv_sec) * 1000000000u +
580                                 (u64)time_data.tv_nsec;
581                 ch->error_notifier->time_stamp.nanoseconds[0] =
582                                 (u32)nsec;
583                 ch->error_notifier->time_stamp.nanoseconds[1] =
584                                 (u32)(nsec >> 32);
585                 ch->error_notifier->info32 = error;
586                 ch->error_notifier->status = 0xffff;
587                 gk20a_err(dev_from_gk20a(ch->g),
588                                 "error notifier set to %d\n", error);
589         }
590 }
591
592 static void gk20a_free_error_notifiers(struct channel_gk20a *ch)
593 {
594         if (ch->error_notifier_ref) {
595                 dma_buf_vunmap(ch->error_notifier_ref, ch->error_notifier_va);
596                 dma_buf_put(ch->error_notifier_ref);
597                 ch->error_notifier_ref = 0;
598                 ch->error_notifier = 0;
599                 ch->error_notifier_va = 0;
600         }
601 }
602
603 void gk20a_free_channel(struct channel_gk20a *ch, bool finish)
604 {
605         struct gk20a *g = ch->g;
606         struct device *d = dev_from_gk20a(g);
607         struct fifo_gk20a *f = &g->fifo;
608         struct gr_gk20a *gr = &g->gr;
609         struct vm_gk20a *ch_vm = ch->vm;
610         unsigned long timeout = gk20a_get_gr_idle_timeout(g);
611         struct dbg_session_gk20a *dbg_s;
612
613         gk20a_dbg_fn("");
614
615         /* if engine reset was deferred, perform it now */
616         mutex_lock(&f->deferred_reset_mutex);
617         if (g->fifo.deferred_reset_pending) {
618                 gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, "engine reset was"
619                            " deferred, running now");
620                 fifo_gk20a_finish_mmu_fault_handling(g, g->fifo.mmu_fault_engines);
621                 g->fifo.mmu_fault_engines = 0;
622                 g->fifo.deferred_reset_pending = false;
623         }
624         mutex_unlock(&f->deferred_reset_mutex);
625
626         if (!ch->bound)
627                 return;
628
629         if (!gk20a_channel_as_bound(ch) && !ch->vm)
630                 goto unbind;
631
632         gk20a_dbg_info("freeing bound channel context, timeout=%ld",
633                         timeout);
634
635         gk20a_disable_channel(ch, finish && !ch->has_timedout, timeout);
636
637         gk20a_free_error_notifiers(ch);
638
639         /* release channel ctx */
640         g->ops.gr.free_channel_ctx(ch);
641
642         gk20a_gr_flush_channel_tlb(gr);
643
644         memset(&ch->ramfc, 0, sizeof(struct mem_desc_sub));
645
646         /* free gpfifo */
647         if (ch->gpfifo.gpu_va)
648                 gk20a_gmmu_unmap(ch_vm, ch->gpfifo.gpu_va,
649                         ch->gpfifo.size, gk20a_mem_flag_none);
650         if (ch->gpfifo.cpu_va)
651                 dma_free_coherent(d, ch->gpfifo.size,
652                         ch->gpfifo.cpu_va, ch->gpfifo.iova);
653         ch->gpfifo.cpu_va = NULL;
654         ch->gpfifo.iova = 0;
655
656         memset(&ch->gpfifo, 0, sizeof(struct gpfifo_desc));
657
658 #if defined(CONFIG_GK20A_CYCLE_STATS)
659         gk20a_free_cycle_stats_buffer(ch);
660 #endif
661
662         channel_gk20a_free_priv_cmdbuf(ch);
663
664         /* sync must be destroyed before releasing channel vm */
665         if (ch->sync) {
666                 ch->sync->destroy(ch->sync);
667                 ch->sync = NULL;
668         }
669
670         /* release channel binding to the as_share */
671         if (ch_vm->as_share)
672                 gk20a_as_release_share(ch_vm->as_share);
673         else
674                 gk20a_vm_put(ch_vm);
675
676 unbind:
677         if (gk20a_is_channel_marked_as_tsg(ch))
678                 gk20a_tsg_unbind_channel(ch);
679
680         g->ops.fifo.unbind_channel(ch);
681         g->ops.fifo.free_inst(g, ch);
682
683         ch->vpr = false;
684         ch->vm = NULL;
685
686         mutex_lock(&ch->submit_lock);
687         gk20a_fence_put(ch->last_submit.pre_fence);
688         gk20a_fence_put(ch->last_submit.post_fence);
689         ch->last_submit.pre_fence = NULL;
690         ch->last_submit.post_fence = NULL;
691         mutex_unlock(&ch->submit_lock);
692         WARN_ON(ch->sync);
693
694         /* unlink all debug sessions */
695         mutex_lock(&ch->dbg_s_lock);
696
697         list_for_each_entry(dbg_s, &ch->dbg_s_list, dbg_s_list_node) {
698                 dbg_s->ch = NULL;
699                 list_del_init(&dbg_s->dbg_s_list_node);
700         }
701
702         mutex_unlock(&ch->dbg_s_lock);
703
704         /* ALWAYS last */
705         release_used_channel(f, ch);
706 }
707
708 int gk20a_channel_release(struct inode *inode, struct file *filp)
709 {
710         struct channel_gk20a *ch = (struct channel_gk20a *)filp->private_data;
711         struct gk20a *g = ch->g;
712         int err;
713
714         trace_gk20a_channel_release(dev_name(&g->dev->dev));
715
716         err = gk20a_busy(ch->g->dev);
717         if (err) {
718                 gk20a_err(dev_from_gk20a(g), "failed to release channel %d",
719                         ch->hw_chid);
720                 return err;
721         }
722         gk20a_free_channel(ch, true);
723         gk20a_idle(ch->g->dev);
724
725         gk20a_put_client(g);
726         filp->private_data = NULL;
727         return 0;
728 }
729
730 struct channel_gk20a *gk20a_open_new_channel(struct gk20a *g)
731 {
732         struct fifo_gk20a *f = &g->fifo;
733         struct channel_gk20a *ch;
734
735         ch = acquire_unused_channel(f);
736         if (ch == NULL) {
737                 /* TBD: we want to make this virtualizable */
738                 gk20a_err(dev_from_gk20a(g), "out of hw chids");
739                 return 0;
740         }
741
742         ch->g = g;
743
744         if (g->ops.fifo.alloc_inst(g, ch)) {
745                 ch->in_use = false;
746                 gk20a_err(dev_from_gk20a(g),
747                            "failed to open gk20a channel, out of inst mem");
748
749                 return 0;
750         }
751         g->ops.fifo.bind_channel(ch);
752         ch->pid = current->pid;
753
754         /* By default, channel is regular (non-TSG) channel */
755         ch->tsgid = NVGPU_INVALID_TSG_ID;
756
757         /* reset timeout counter and update timestamp */
758         ch->timeout_accumulated_ms = 0;
759         ch->timeout_gpfifo_get = 0;
760         /* set gr host default timeout */
761         ch->timeout_ms_max = gk20a_get_gr_idle_timeout(g);
762         ch->timeout_debug_dump = true;
763         ch->has_timedout = false;
764         ch->obj_class = 0;
765
766         /* The channel is *not* runnable at this point. It still needs to have
767          * an address space bound and allocate a gpfifo and grctx. */
768
769         init_waitqueue_head(&ch->notifier_wq);
770         init_waitqueue_head(&ch->semaphore_wq);
771         init_waitqueue_head(&ch->submit_wq);
772
773         return ch;
774 }
775
776 static int __gk20a_channel_open(struct gk20a *g, struct file *filp)
777 {
778         int err;
779         struct channel_gk20a *ch;
780
781         trace_gk20a_channel_open(dev_name(&g->dev->dev));
782
783         err = gk20a_get_client(g);
784         if (err) {
785                 gk20a_err(dev_from_gk20a(g),
786                         "failed to get client ref");
787                 return err;
788         }
789
790         err = gk20a_busy(g->dev);
791         if (err) {
792                 gk20a_put_client(g);
793                 gk20a_err(dev_from_gk20a(g), "failed to power on, %d", err);
794                 return err;
795         }
796         ch = gk20a_open_new_channel(g);
797         gk20a_idle(g->dev);
798         if (!ch) {
799                 gk20a_put_client(g);
800                 gk20a_err(dev_from_gk20a(g),
801                         "failed to get f");
802                 return -ENOMEM;
803         }
804
805         filp->private_data = ch;
806         return 0;
807 }
808
809 int gk20a_channel_open(struct inode *inode, struct file *filp)
810 {
811         struct gk20a *g = container_of(inode->i_cdev,
812                         struct gk20a, channel.cdev);
813         return __gk20a_channel_open(g, filp);
814 }
815
816 /* allocate private cmd buffer.
817    used for inserting commands before/after user submitted buffers. */
818 static int channel_gk20a_alloc_priv_cmdbuf(struct channel_gk20a *c)
819 {
820         struct device *d = dev_from_gk20a(c->g);
821         struct vm_gk20a *ch_vm = c->vm;
822         struct priv_cmd_queue *q = &c->priv_cmd_q;
823         struct priv_cmd_entry *e;
824         u32 i = 0, size;
825         int err = 0;
826         struct sg_table *sgt;
827         dma_addr_t iova;
828
829         /* Kernel can insert gpfifos before and after user gpfifos.
830            Before user gpfifos, kernel inserts fence_wait, which takes
831            syncpoint_a (2 dwords) + syncpoint_b (2 dwords) = 4 dwords.
832            After user gpfifos, kernel inserts fence_get, which takes
833            wfi (2 dwords) + syncpoint_a (2 dwords) + syncpoint_b (2 dwords)
834            = 6 dwords.
835            Worse case if kernel adds both of them for every user gpfifo,
836            max size of priv_cmdbuf is :
837            (gpfifo entry number * (2 / 3) * (4 + 6) * 4 bytes */
838         size = roundup_pow_of_two(
839                 c->gpfifo.entry_num * 2 * 10 * sizeof(u32) / 3);
840
841         q->mem.base_cpuva = dma_alloc_coherent(d, size,
842                                         &iova,
843                                         GFP_KERNEL);
844         if (!q->mem.base_cpuva) {
845                 gk20a_err(d, "%s: memory allocation failed\n", __func__);
846                 err = -ENOMEM;
847                 goto clean_up;
848         }
849
850         q->mem.base_iova = iova;
851         q->mem.size = size;
852
853         err = gk20a_get_sgtable(d, &sgt,
854                         q->mem.base_cpuva, q->mem.base_iova, size);
855         if (err) {
856                 gk20a_err(d, "%s: failed to create sg table\n", __func__);
857                 goto clean_up;
858         }
859
860         memset(q->mem.base_cpuva, 0, size);
861
862         q->base_gpuva = gk20a_gmmu_map(ch_vm, &sgt,
863                                         size,
864                                         0, /* flags */
865                                         gk20a_mem_flag_none);
866         if (!q->base_gpuva) {
867                 gk20a_err(d, "ch %d : failed to map gpu va"
868                            "for priv cmd buffer", c->hw_chid);
869                 err = -ENOMEM;
870                 goto clean_up_sgt;
871         }
872
873         q->size = q->mem.size / sizeof (u32);
874
875         INIT_LIST_HEAD(&q->head);
876         INIT_LIST_HEAD(&q->free);
877
878         /* pre-alloc 25% of priv cmdbuf entries and put them on free list */
879         for (i = 0; i < q->size / 4; i++) {
880                 e = kzalloc(sizeof(struct priv_cmd_entry), GFP_KERNEL);
881                 if (!e) {
882                         gk20a_err(d, "ch %d: fail to pre-alloc cmd entry",
883                                 c->hw_chid);
884                         err = -ENOMEM;
885                         goto clean_up_sgt;
886                 }
887                 e->pre_alloc = true;
888                 list_add(&e->list, &q->free);
889         }
890
891         gk20a_free_sgtable(&sgt);
892
893         return 0;
894
895 clean_up_sgt:
896         gk20a_free_sgtable(&sgt);
897 clean_up:
898         channel_gk20a_free_priv_cmdbuf(c);
899         return err;
900 }
901
902 static void channel_gk20a_free_priv_cmdbuf(struct channel_gk20a *c)
903 {
904         struct device *d = dev_from_gk20a(c->g);
905         struct vm_gk20a *ch_vm = c->vm;
906         struct priv_cmd_queue *q = &c->priv_cmd_q;
907         struct priv_cmd_entry *e;
908         struct list_head *pos, *tmp, *head;
909
910         if (q->size == 0)
911                 return;
912
913         if (q->base_gpuva)
914                 gk20a_gmmu_unmap(ch_vm, q->base_gpuva,
915                                 q->mem.size, gk20a_mem_flag_none);
916         if (q->mem.base_cpuva)
917                 dma_free_coherent(d, q->mem.size,
918                         q->mem.base_cpuva, q->mem.base_iova);
919         q->mem.base_cpuva = NULL;
920         q->mem.base_iova = 0;
921
922         /* free used list */
923         head = &q->head;
924         list_for_each_safe(pos, tmp, head) {
925                 e = container_of(pos, struct priv_cmd_entry, list);
926                 free_priv_cmdbuf(c, e);
927         }
928
929         /* free free list */
930         head = &q->free;
931         list_for_each_safe(pos, tmp, head) {
932                 e = container_of(pos, struct priv_cmd_entry, list);
933                 e->pre_alloc = false;
934                 free_priv_cmdbuf(c, e);
935         }
936
937         memset(q, 0, sizeof(struct priv_cmd_queue));
938 }
939
940 /* allocate a cmd buffer with given size. size is number of u32 entries */
941 int gk20a_channel_alloc_priv_cmdbuf(struct channel_gk20a *c, u32 orig_size,
942                              struct priv_cmd_entry **entry)
943 {
944         struct priv_cmd_queue *q = &c->priv_cmd_q;
945         struct priv_cmd_entry *e;
946         struct list_head *node;
947         u32 free_count;
948         u32 size = orig_size;
949         bool no_retry = false;
950
951         gk20a_dbg_fn("size %d", orig_size);
952
953         *entry = NULL;
954
955         /* if free space in the end is less than requested, increase the size
956          * to make the real allocated space start from beginning. */
957         if (q->put + size > q->size)
958                 size = orig_size + (q->size - q->put);
959
960         gk20a_dbg_info("ch %d: priv cmd queue get:put %d:%d",
961                         c->hw_chid, q->get, q->put);
962
963 TRY_AGAIN:
964         free_count = (q->size - (q->put - q->get) - 1) % q->size;
965
966         if (size > free_count) {
967                 if (!no_retry) {
968                         recycle_priv_cmdbuf(c);
969                         no_retry = true;
970                         goto TRY_AGAIN;
971                 } else
972                         return -EAGAIN;
973         }
974
975         if (unlikely(list_empty(&q->free))) {
976
977                 gk20a_dbg_info("ch %d: run out of pre-alloc entries",
978                         c->hw_chid);
979
980                 e = kzalloc(sizeof(struct priv_cmd_entry), GFP_KERNEL);
981                 if (!e) {
982                         gk20a_err(dev_from_gk20a(c->g),
983                                 "ch %d: fail to allocate priv cmd entry",
984                                 c->hw_chid);
985                         return -ENOMEM;
986                 }
987         } else  {
988                 node = q->free.next;
989                 list_del(node);
990                 e = container_of(node, struct priv_cmd_entry, list);
991         }
992
993         e->size = orig_size;
994         e->gp_get = c->gpfifo.get;
995         e->gp_put = c->gpfifo.put;
996         e->gp_wrap = c->gpfifo.wrap;
997
998         /* if we have increased size to skip free space in the end, set put
999            to beginning of cmd buffer (0) + size */
1000         if (size != orig_size) {
1001                 e->ptr = q->mem.base_cpuva;
1002                 e->gva = q->base_gpuva;
1003                 q->put = orig_size;
1004         } else {
1005                 e->ptr = q->mem.base_cpuva + q->put;
1006                 e->gva = q->base_gpuva + q->put * sizeof(u32);
1007                 q->put = (q->put + orig_size) & (q->size - 1);
1008         }
1009
1010         /* we already handled q->put + size > q->size so BUG_ON this */
1011         BUG_ON(q->put > q->size);
1012
1013         /* add new entry to head since we free from head */
1014         list_add(&e->list, &q->head);
1015
1016         *entry = e;
1017
1018         gk20a_dbg_fn("done");
1019
1020         return 0;
1021 }
1022
1023 /* Don't call this to free an explict cmd entry.
1024  * It doesn't update priv_cmd_queue get/put */
1025 static void free_priv_cmdbuf(struct channel_gk20a *c,
1026                              struct priv_cmd_entry *e)
1027 {
1028         struct priv_cmd_queue *q = &c->priv_cmd_q;
1029
1030         if (!e)
1031                 return;
1032
1033         list_del(&e->list);
1034
1035         if (unlikely(!e->pre_alloc))
1036                 kfree(e);
1037         else {
1038                 memset(e, 0, sizeof(struct priv_cmd_entry));
1039                 e->pre_alloc = true;
1040                 list_add(&e->list, &q->free);
1041         }
1042 }
1043
1044 /* free entries if they're no longer being used */
1045 static void recycle_priv_cmdbuf(struct channel_gk20a *c)
1046 {
1047         struct priv_cmd_queue *q = &c->priv_cmd_q;
1048         struct priv_cmd_entry *e, *tmp;
1049         struct list_head *head = &q->head;
1050         bool wrap_around, found = false;
1051
1052         gk20a_dbg_fn("");
1053
1054         /* Find the most recent free entry. Free it and everything before it */
1055         list_for_each_entry(e, head, list) {
1056
1057                 gk20a_dbg_info("ch %d: cmd entry get:put:wrap %d:%d:%d "
1058                         "curr get:put:wrap %d:%d:%d",
1059                         c->hw_chid, e->gp_get, e->gp_put, e->gp_wrap,
1060                         c->gpfifo.get, c->gpfifo.put, c->gpfifo.wrap);
1061
1062                 wrap_around = (c->gpfifo.wrap != e->gp_wrap);
1063                 if (e->gp_get < e->gp_put) {
1064                         if (c->gpfifo.get >= e->gp_put ||
1065                             wrap_around) {
1066                                 found = true;
1067                                 break;
1068                         } else
1069                                 e->gp_get = c->gpfifo.get;
1070                 } else if (e->gp_get > e->gp_put) {
1071                         if (wrap_around &&
1072                             c->gpfifo.get >= e->gp_put) {
1073                                 found = true;
1074                                 break;
1075                         } else
1076                                 e->gp_get = c->gpfifo.get;
1077                 }
1078         }
1079
1080         if (found)
1081                 q->get = (e->ptr - q->mem.base_cpuva) + e->size;
1082         else {
1083                 gk20a_dbg_info("no free entry recycled");
1084                 return;
1085         }
1086
1087         list_for_each_entry_safe_continue(e, tmp, head, list) {
1088                 free_priv_cmdbuf(c, e);
1089         }
1090
1091         gk20a_dbg_fn("done");
1092 }
1093
1094 int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c,
1095                                struct nvhost_alloc_gpfifo_args *args)
1096 {
1097         struct gk20a *g = c->g;
1098         struct device *d = dev_from_gk20a(g);
1099         struct vm_gk20a *ch_vm;
1100         u32 gpfifo_size;
1101         int err = 0;
1102         struct sg_table *sgt;
1103         dma_addr_t iova;
1104
1105         /* Kernel can insert one extra gpfifo entry before user submitted gpfifos
1106            and another one after, for internal usage. Triple the requested size. */
1107         gpfifo_size = roundup_pow_of_two(args->num_entries * 3);
1108
1109         if (args->flags & NVHOST_ALLOC_GPFIFO_FLAGS_VPR_ENABLED)
1110                 c->vpr = true;
1111
1112         /* an address space needs to have been bound at this point.   */
1113         if (!gk20a_channel_as_bound(c)) {
1114                 gk20a_err(d,
1115                             "not bound to an address space at time of gpfifo"
1116                             " allocation.  Attempting to create and bind to"
1117                             " one...");
1118                 return -EINVAL;
1119         }
1120         ch_vm = c->vm;
1121
1122         c->cmds_pending = false;
1123         mutex_lock(&c->submit_lock);
1124         gk20a_fence_put(c->last_submit.pre_fence);
1125         gk20a_fence_put(c->last_submit.post_fence);
1126         c->last_submit.pre_fence = NULL;
1127         c->last_submit.post_fence = NULL;
1128         mutex_unlock(&c->submit_lock);
1129
1130         c->ramfc.offset = 0;
1131         c->ramfc.size = ram_in_ramfc_s() / 8;
1132
1133         if (c->gpfifo.cpu_va) {
1134                 gk20a_err(d, "channel %d :"
1135                            "gpfifo already allocated", c->hw_chid);
1136                 return -EEXIST;
1137         }
1138
1139         c->gpfifo.size = gpfifo_size * sizeof(struct gpfifo);
1140         c->gpfifo.cpu_va = (struct gpfifo *)dma_alloc_coherent(d,
1141                                                 c->gpfifo.size,
1142                                                 &iova,
1143                                                 GFP_KERNEL);
1144         if (!c->gpfifo.cpu_va) {
1145                 gk20a_err(d, "%s: memory allocation failed\n", __func__);
1146                 err = -ENOMEM;
1147                 goto clean_up;
1148         }
1149
1150         c->gpfifo.iova = iova;
1151         c->gpfifo.entry_num = gpfifo_size;
1152
1153         c->gpfifo.get = c->gpfifo.put = 0;
1154
1155         err = gk20a_get_sgtable(d, &sgt,
1156                         c->gpfifo.cpu_va, c->gpfifo.iova, c->gpfifo.size);
1157         if (err) {
1158                 gk20a_err(d, "%s: failed to allocate sg table\n", __func__);
1159                 goto clean_up;
1160         }
1161
1162         c->gpfifo.gpu_va = gk20a_gmmu_map(ch_vm,
1163                                         &sgt,
1164                                         c->gpfifo.size,
1165                                         0, /* flags */
1166                                         gk20a_mem_flag_none);
1167         if (!c->gpfifo.gpu_va) {
1168                 gk20a_err(d, "channel %d : failed to map"
1169                            " gpu_va for gpfifo", c->hw_chid);
1170                 err = -ENOMEM;
1171                 goto clean_up_sgt;
1172         }
1173
1174         gk20a_dbg_info("channel %d : gpfifo_base 0x%016llx, size %d",
1175                 c->hw_chid, c->gpfifo.gpu_va, c->gpfifo.entry_num);
1176
1177         channel_gk20a_setup_userd(c);
1178
1179         err = g->ops.fifo.setup_ramfc(c, c->gpfifo.gpu_va, c->gpfifo.entry_num);
1180         if (err)
1181                 goto clean_up_unmap;
1182
1183         /* TBD: setup engine contexts */
1184
1185         err = channel_gk20a_alloc_priv_cmdbuf(c);
1186         if (err)
1187                 goto clean_up_unmap;
1188
1189         err = channel_gk20a_update_runlist(c, true);
1190         if (err)
1191                 goto clean_up_unmap;
1192
1193         gk20a_free_sgtable(&sgt);
1194
1195         gk20a_dbg_fn("done");
1196         return 0;
1197
1198 clean_up_unmap:
1199         gk20a_gmmu_unmap(ch_vm, c->gpfifo.gpu_va,
1200                 c->gpfifo.size, gk20a_mem_flag_none);
1201 clean_up_sgt:
1202         gk20a_free_sgtable(&sgt);
1203 clean_up:
1204         dma_free_coherent(d, c->gpfifo.size,
1205                 c->gpfifo.cpu_va, c->gpfifo.iova);
1206         c->gpfifo.cpu_va = NULL;
1207         c->gpfifo.iova = 0;
1208         memset(&c->gpfifo, 0, sizeof(struct gpfifo_desc));
1209         gk20a_err(d, "fail");
1210         return err;
1211 }
1212
1213 static inline int wfi_cmd_size(void)
1214 {
1215         return 2;
1216 }
1217 void add_wfi_cmd(struct priv_cmd_entry *cmd, int *i)
1218 {
1219         /* wfi */
1220         cmd->ptr[(*i)++] = 0x2001001E;
1221         /* handle, ignored */
1222         cmd->ptr[(*i)++] = 0x00000000;
1223 }
1224
1225 static inline bool check_gp_put(struct gk20a *g,
1226                                 struct channel_gk20a *c)
1227 {
1228         u32 put;
1229         /* gp_put changed unexpectedly since last update? */
1230         put = gk20a_bar1_readl(g,
1231                c->userd_gpu_va + 4 * ram_userd_gp_put_w());
1232         if (c->gpfifo.put != put) {
1233                 /*TBD: BUG_ON/teardown on this*/
1234                 gk20a_err(dev_from_gk20a(g), "gp_put changed unexpectedly "
1235                            "since last update");
1236                 c->gpfifo.put = put;
1237                 return false; /* surprise! */
1238         }
1239         return true; /* checked out ok */
1240 }
1241
1242 /* Update with this periodically to determine how the gpfifo is draining. */
1243 static inline u32 update_gp_get(struct gk20a *g,
1244                                 struct channel_gk20a *c)
1245 {
1246         u32 new_get = gk20a_bar1_readl(g,
1247                 c->userd_gpu_va + sizeof(u32) * ram_userd_gp_get_w());
1248         if (new_get < c->gpfifo.get)
1249                 c->gpfifo.wrap = !c->gpfifo.wrap;
1250         c->gpfifo.get = new_get;
1251         return new_get;
1252 }
1253
1254 static inline u32 gp_free_count(struct channel_gk20a *c)
1255 {
1256         return (c->gpfifo.entry_num - (c->gpfifo.put - c->gpfifo.get) - 1) %
1257                 c->gpfifo.entry_num;
1258 }
1259
1260 bool gk20a_channel_update_and_check_timeout(struct channel_gk20a *ch,
1261                 u32 timeout_delta_ms)
1262 {
1263         u32 gpfifo_get = update_gp_get(ch->g, ch);
1264         /* Count consequent timeout isr */
1265         if (gpfifo_get == ch->timeout_gpfifo_get) {
1266                 /* we didn't advance since previous channel timeout check */
1267                 ch->timeout_accumulated_ms += timeout_delta_ms;
1268         } else {
1269                 /* first timeout isr encountered */
1270                 ch->timeout_accumulated_ms = timeout_delta_ms;
1271         }
1272
1273         ch->timeout_gpfifo_get = gpfifo_get;
1274
1275         return ch->g->timeouts_enabled &&
1276                 ch->timeout_accumulated_ms > ch->timeout_ms_max;
1277 }
1278
1279
1280 /* Issue a syncpoint increment *preceded* by a wait-for-idle
1281  * command.  All commands on the channel will have been
1282  * consumed at the time the fence syncpoint increment occurs.
1283  */
1284 static int gk20a_channel_submit_wfi(struct channel_gk20a *c)
1285 {
1286         struct priv_cmd_entry *cmd = NULL;
1287         struct gk20a *g = c->g;
1288         u32 free_count;
1289         int err;
1290
1291         if (c->has_timedout)
1292                 return -ETIMEDOUT;
1293
1294         update_gp_get(g, c);
1295         free_count = gp_free_count(c);
1296         if (unlikely(!free_count)) {
1297                 gk20a_err(dev_from_gk20a(g),
1298                            "not enough gpfifo space");
1299                 return -EAGAIN;
1300         }
1301
1302         mutex_lock(&c->submit_lock);
1303
1304         if (!c->sync) {
1305                 c->sync = gk20a_channel_sync_create(c);
1306                 if (!c->sync) {
1307                         mutex_unlock(&c->submit_lock);
1308                         return -ENOMEM;
1309                 }
1310         }
1311
1312         gk20a_fence_put(c->last_submit.pre_fence);
1313         gk20a_fence_put(c->last_submit.post_fence);
1314         c->last_submit.pre_fence = NULL;
1315         c->last_submit.post_fence = NULL;
1316
1317         err = c->sync->incr_wfi(c->sync, &cmd, &c->last_submit.post_fence);
1318         if (unlikely(err)) {
1319                 mutex_unlock(&c->submit_lock);
1320                 return err;
1321         }
1322
1323         WARN_ON(!c->last_submit.post_fence->wfi);
1324
1325         c->gpfifo.cpu_va[c->gpfifo.put].entry0 = u64_lo32(cmd->gva);
1326         c->gpfifo.cpu_va[c->gpfifo.put].entry1 = u64_hi32(cmd->gva) |
1327                 pbdma_gp_entry1_length_f(cmd->size);
1328
1329         c->gpfifo.put = (c->gpfifo.put + 1) & (c->gpfifo.entry_num - 1);
1330
1331         /* save gp_put */
1332         cmd->gp_put = c->gpfifo.put;
1333
1334         gk20a_bar1_writel(g,
1335                 c->userd_gpu_va + 4 * ram_userd_gp_put_w(),
1336                 c->gpfifo.put);
1337
1338         mutex_unlock(&c->submit_lock);
1339
1340         gk20a_dbg_info("post-submit put %d, get %d, size %d",
1341                 c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num);
1342
1343         return 0;
1344 }
1345
1346 static u32 get_gp_free_count(struct channel_gk20a *c)
1347 {
1348         update_gp_get(c->g, c);
1349         return gp_free_count(c);
1350 }
1351
1352 static void trace_write_pushbuffer(struct channel_gk20a *c, struct gpfifo *g)
1353 {
1354         void *mem = NULL;
1355         unsigned int words;
1356         u64 offset;
1357         struct dma_buf *dmabuf = NULL;
1358
1359         if (gk20a_debug_trace_cmdbuf) {
1360                 u64 gpu_va = (u64)g->entry0 |
1361                         (u64)((u64)pbdma_gp_entry1_get_hi_v(g->entry1) << 32);
1362                 int err;
1363
1364                 words = pbdma_gp_entry1_length_v(g->entry1);
1365                 err = gk20a_vm_find_buffer(c->vm, gpu_va, &dmabuf, &offset);
1366                 if (!err)
1367                         mem = dma_buf_vmap(dmabuf);
1368         }
1369
1370         if (mem) {
1371                 u32 i;
1372                 /*
1373                  * Write in batches of 128 as there seems to be a limit
1374                  * of how much you can output to ftrace at once.
1375                  */
1376                 for (i = 0; i < words; i += 128U) {
1377                         trace_gk20a_push_cmdbuf(
1378                                 c->g->dev->name,
1379                                 0,
1380                                 min(words - i, 128U),
1381                                 offset + i * sizeof(u32),
1382                                 mem);
1383                 }
1384                 dma_buf_vunmap(dmabuf, mem);
1385         }
1386 }
1387
1388 static int gk20a_channel_add_job(struct channel_gk20a *c,
1389                                  struct gk20a_fence *pre_fence,
1390                                  struct gk20a_fence *post_fence)
1391 {
1392         struct vm_gk20a *vm = c->vm;
1393         struct channel_gk20a_job *job = NULL;
1394         struct mapped_buffer_node **mapped_buffers = NULL;
1395         int err = 0, num_mapped_buffers;
1396
1397         /* job needs reference to this vm */
1398         gk20a_vm_get(vm);
1399
1400         err = gk20a_vm_get_buffers(vm, &mapped_buffers, &num_mapped_buffers);
1401         if (err) {
1402                 gk20a_vm_put(vm);
1403                 return err;
1404         }
1405
1406         job = kzalloc(sizeof(*job), GFP_KERNEL);
1407         if (!job) {
1408                 gk20a_vm_put_buffers(vm, mapped_buffers, num_mapped_buffers);
1409                 gk20a_vm_put(vm);
1410                 return -ENOMEM;
1411         }
1412
1413         job->num_mapped_buffers = num_mapped_buffers;
1414         job->mapped_buffers = mapped_buffers;
1415         job->pre_fence = gk20a_fence_get(pre_fence);
1416         job->post_fence = gk20a_fence_get(post_fence);
1417
1418         mutex_lock(&c->jobs_lock);
1419         list_add_tail(&job->list, &c->jobs);
1420         mutex_unlock(&c->jobs_lock);
1421
1422         return 0;
1423 }
1424
1425 void gk20a_channel_update(struct channel_gk20a *c, int nr_completed)
1426 {
1427         struct vm_gk20a *vm = c->vm;
1428         struct channel_gk20a_job *job, *n;
1429
1430         wake_up(&c->submit_wq);
1431
1432         mutex_lock(&c->submit_lock);
1433         mutex_lock(&c->jobs_lock);
1434         list_for_each_entry_safe(job, n, &c->jobs, list) {
1435                 bool completed = gk20a_fence_is_expired(job->post_fence);
1436                 if (!completed)
1437                         break;
1438
1439                 c->sync->signal_timeline(c->sync);
1440
1441                 gk20a_vm_put_buffers(vm, job->mapped_buffers,
1442                                 job->num_mapped_buffers);
1443
1444                 /* Close the fences (this will unref the semaphores and release
1445                  * them to the pool). */
1446                 gk20a_fence_put(job->pre_fence);
1447                 gk20a_fence_put(job->post_fence);
1448
1449                 /* job is done. release its reference to vm */
1450                 gk20a_vm_put(vm);
1451
1452                 list_del_init(&job->list);
1453                 kfree(job);
1454                 gk20a_idle(c->g->dev);
1455         }
1456
1457         /*
1458          * If job list is empty then channel is idle and we can free
1459          * the syncpt here (given aggressive_destroy flag is set)
1460          * Note: check if last submit is complete before destroying
1461          * the sync resource
1462          */
1463         if (list_empty(&c->jobs)) {
1464                 if (c->sync && c->sync->aggressive_destroy &&
1465                           gk20a_fence_is_expired(c->last_submit.post_fence)) {
1466                         c->sync->destroy(c->sync);
1467                         c->sync = NULL;
1468                 }
1469         }
1470         mutex_unlock(&c->jobs_lock);
1471         mutex_unlock(&c->submit_lock);
1472 }
1473
1474 void add_wait_cmd(u32 *ptr, u32 id, u32 thresh)
1475 {
1476         /* syncpoint_a */
1477         ptr[0] = 0x2001001C;
1478         /* payload */
1479         ptr[1] = thresh;
1480         /* syncpoint_b */
1481         ptr[2] = 0x2001001D;
1482         /* syncpt_id, switch_en, wait */
1483         ptr[3] = (id << 8) | 0x10;
1484 }
1485
1486 int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
1487                                 struct nvhost_gpfifo *gpfifo,
1488                                 u32 num_entries,
1489                                 u32 flags,
1490                                 struct nvhost_fence *fence,
1491                                 struct gk20a_fence **fence_out)
1492 {
1493         struct gk20a *g = c->g;
1494         struct device *d = dev_from_gk20a(g);
1495         int err = 0;
1496         int i;
1497         int wait_fence_fd = -1;
1498         struct priv_cmd_entry *wait_cmd = NULL;
1499         struct priv_cmd_entry *incr_cmd = NULL;
1500         struct gk20a_fence *pre_fence = NULL;
1501         struct gk20a_fence *post_fence = NULL;
1502         /* we might need two extra gpfifo entries - one for pre fence
1503          * and one for post fence. */
1504         const int extra_entries = 2;
1505         bool need_wfi = !(flags & NVHOST_SUBMIT_GPFIFO_FLAGS_SUPPRESS_WFI);
1506
1507         if (c->has_timedout)
1508                 return -ETIMEDOUT;
1509
1510         if ((flags & (NVHOST_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT |
1511                       NVHOST_SUBMIT_GPFIFO_FLAGS_FENCE_GET)) &&
1512             !fence)
1513                 return -EINVAL;
1514
1515 #ifdef CONFIG_DEBUG_FS
1516         /* update debug settings */
1517         if (g->ops.ltc.sync_debugfs)
1518                 g->ops.ltc.sync_debugfs(g);
1519 #endif
1520
1521         gk20a_dbg_info("channel %d", c->hw_chid);
1522
1523         /* gk20a_channel_update releases this ref. */
1524         err = gk20a_busy(g->dev);
1525         if (err) {
1526                 gk20a_err(d, "failed to host gk20a to submit gpfifo");
1527                 return err;
1528         }
1529
1530         trace_gk20a_channel_submit_gpfifo(c->g->dev->name,
1531                                           c->hw_chid,
1532                                           num_entries,
1533                                           flags,
1534                                           fence ? fence->syncpt_id : 0,
1535                                           fence ? fence->value : 0);
1536         check_gp_put(g, c);
1537         update_gp_get(g, c);
1538
1539         gk20a_dbg_info("pre-submit put %d, get %d, size %d",
1540                 c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num);
1541
1542         /* Invalidate tlb if it's dirty...                                   */
1543         /* TBD: this should be done in the cmd stream, not with PRIs.        */
1544         /* We don't know what context is currently running...                */
1545         /* Note also: there can be more than one context associated with the */
1546         /* address space (vm).   */
1547         g->ops.mm.tlb_invalidate(c->vm);
1548
1549         /* Make sure we have enough space for gpfifo entries. If not,
1550          * wait for signals from completed submits */
1551         if (gp_free_count(c) < num_entries + extra_entries) {
1552                 err = wait_event_interruptible(c->submit_wq,
1553                         get_gp_free_count(c) >= num_entries + extra_entries ||
1554                         c->has_timedout);
1555         }
1556
1557         if (c->has_timedout) {
1558                 err = -ETIMEDOUT;
1559                 goto clean_up;
1560         }
1561
1562         if (err) {
1563                 gk20a_err(d, "not enough gpfifo space");
1564                 err = -EAGAIN;
1565                 goto clean_up;
1566         }
1567
1568         mutex_lock(&c->submit_lock);
1569
1570         if (!c->sync) {
1571                 c->sync = gk20a_channel_sync_create(c);
1572                 if (!c->sync) {
1573                         err = -ENOMEM;
1574                         mutex_unlock(&c->submit_lock);
1575                         goto clean_up;
1576                 }
1577         }
1578
1579         /*
1580          * optionally insert syncpt wait in the beginning of gpfifo submission
1581          * when user requested and the wait hasn't expired.
1582          * validate that the id makes sense, elide if not
1583          * the only reason this isn't being unceremoniously killed is to
1584          * keep running some tests which trigger this condition
1585          */
1586         if (flags & NVHOST_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT) {
1587                 if (flags & NVHOST_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE) {
1588                         wait_fence_fd = fence->syncpt_id;
1589                         err = c->sync->wait_fd(c->sync, wait_fence_fd,
1590                                         &wait_cmd, &pre_fence);
1591                 } else {
1592                         err = c->sync->wait_syncpt(c->sync, fence->syncpt_id,
1593                                         fence->value, &wait_cmd, &pre_fence);
1594                 }
1595         }
1596         if (err) {
1597                 mutex_unlock(&c->submit_lock);
1598                 goto clean_up;
1599         }
1600
1601
1602         /* always insert syncpt increment at end of gpfifo submission
1603            to keep track of method completion for idle railgating */
1604         if (flags & NVHOST_SUBMIT_GPFIFO_FLAGS_FENCE_GET)
1605                 err = c->sync->incr_user(c->sync, wait_fence_fd, &incr_cmd,
1606                                          &post_fence, need_wfi);
1607         else
1608                 err = c->sync->incr(c->sync, &incr_cmd,
1609                                     &post_fence);
1610         if (err) {
1611                 mutex_unlock(&c->submit_lock);
1612                 goto clean_up;
1613         }
1614
1615         if (wait_cmd) {
1616                 c->gpfifo.cpu_va[c->gpfifo.put].entry0 =
1617                         u64_lo32(wait_cmd->gva);
1618                 c->gpfifo.cpu_va[c->gpfifo.put].entry1 =
1619                         u64_hi32(wait_cmd->gva) |
1620                         pbdma_gp_entry1_length_f(wait_cmd->size);
1621                 trace_gk20a_push_cmdbuf(c->g->dev->name,
1622                         0, wait_cmd->size, 0, wait_cmd->ptr);
1623
1624                 c->gpfifo.put = (c->gpfifo.put + 1) &
1625                         (c->gpfifo.entry_num - 1);
1626
1627                 /* save gp_put */
1628                 wait_cmd->gp_put = c->gpfifo.put;
1629         }
1630
1631         for (i = 0; i < num_entries; i++) {
1632                 c->gpfifo.cpu_va[c->gpfifo.put].entry0 =
1633                         gpfifo[i].entry0; /* cmd buf va low 32 */
1634                 c->gpfifo.cpu_va[c->gpfifo.put].entry1 =
1635                         gpfifo[i].entry1; /* cmd buf va high 32 | words << 10 */
1636                 trace_write_pushbuffer(c, &c->gpfifo.cpu_va[c->gpfifo.put]);
1637                 c->gpfifo.put = (c->gpfifo.put + 1) &
1638                         (c->gpfifo.entry_num - 1);
1639         }
1640
1641         if (incr_cmd) {
1642                 c->gpfifo.cpu_va[c->gpfifo.put].entry0 =
1643                         u64_lo32(incr_cmd->gva);
1644                 c->gpfifo.cpu_va[c->gpfifo.put].entry1 =
1645                         u64_hi32(incr_cmd->gva) |
1646                         pbdma_gp_entry1_length_f(incr_cmd->size);
1647                 trace_gk20a_push_cmdbuf(c->g->dev->name,
1648                         0, incr_cmd->size, 0, incr_cmd->ptr);
1649
1650                 c->gpfifo.put = (c->gpfifo.put + 1) &
1651                         (c->gpfifo.entry_num - 1);
1652
1653                 /* save gp_put */
1654                 incr_cmd->gp_put = c->gpfifo.put;
1655         }
1656
1657         gk20a_fence_put(c->last_submit.pre_fence);
1658         gk20a_fence_put(c->last_submit.post_fence);
1659         c->last_submit.pre_fence = pre_fence;
1660         c->last_submit.post_fence = post_fence;
1661         if (fence_out)
1662                 *fence_out = gk20a_fence_get(post_fence);
1663
1664         /* TODO! Check for errors... */
1665         gk20a_channel_add_job(c, pre_fence, post_fence);
1666
1667         c->cmds_pending = true;
1668         gk20a_bar1_writel(g,
1669                 c->userd_gpu_va + 4 * ram_userd_gp_put_w(),
1670                 c->gpfifo.put);
1671
1672         mutex_unlock(&c->submit_lock);
1673
1674         trace_gk20a_channel_submitted_gpfifo(c->g->dev->name,
1675                                              c->hw_chid,
1676                                              num_entries,
1677                                              flags,
1678                                              post_fence->syncpt_id,
1679                                              post_fence->syncpt_value);
1680
1681         gk20a_dbg_info("post-submit put %d, get %d, size %d",
1682                 c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num);
1683
1684         gk20a_dbg_fn("done");
1685         return err;
1686
1687 clean_up:
1688         gk20a_err(d, "fail");
1689         free_priv_cmdbuf(c, wait_cmd);
1690         free_priv_cmdbuf(c, incr_cmd);
1691         gk20a_fence_put(pre_fence);
1692         gk20a_fence_put(post_fence);
1693         gk20a_idle(g->dev);
1694         return err;
1695 }
1696
1697 void gk20a_remove_channel_support(struct channel_gk20a *c)
1698 {
1699
1700 }
1701
1702 int gk20a_init_channel_support(struct gk20a *g, u32 chid)
1703 {
1704         struct channel_gk20a *c = g->fifo.channel+chid;
1705         c->g = g;
1706         c->in_use = false;
1707         c->hw_chid = chid;
1708         c->bound = false;
1709         c->remove_support = gk20a_remove_channel_support;
1710         mutex_init(&c->jobs_lock);
1711         mutex_init(&c->submit_lock);
1712         INIT_LIST_HEAD(&c->jobs);
1713 #if defined(CONFIG_GK20A_CYCLE_STATS)
1714         mutex_init(&c->cyclestate.cyclestate_buffer_mutex);
1715 #endif
1716         INIT_LIST_HEAD(&c->dbg_s_list);
1717         mutex_init(&c->dbg_s_lock);
1718
1719         return 0;
1720 }
1721
1722 int gk20a_channel_finish(struct channel_gk20a *ch, unsigned long timeout)
1723 {
1724         int err = 0;
1725         struct gk20a_fence *fence = ch->last_submit.post_fence;
1726
1727         if (!ch->cmds_pending)
1728                 return 0;
1729
1730         /* Do not wait for a timedout channel */
1731         if (ch->has_timedout)
1732                 return -ETIMEDOUT;
1733
1734         if (!(fence && fence->wfi) && ch->obj_class != KEPLER_C) {
1735                 gk20a_dbg_fn("issuing wfi, incr to finish the channel");
1736                 err = gk20a_channel_submit_wfi(ch);
1737                 fence = ch->last_submit.post_fence;
1738         }
1739         if (err)
1740                 return err;
1741
1742         BUG_ON(!(fence && fence->wfi) && ch->obj_class != KEPLER_C);
1743
1744         gk20a_dbg_fn("waiting for channel to finish thresh:%d sema:%p",
1745                      fence->syncpt_value, fence->semaphore);
1746
1747         err = gk20a_fence_wait(fence, timeout);
1748         if (WARN_ON(err))
1749                 dev_warn(dev_from_gk20a(ch->g),
1750                        "timed out waiting for gk20a channel to finish");
1751         else
1752                 ch->cmds_pending = false;
1753
1754         return err;
1755 }
1756
1757 static int gk20a_channel_wait_semaphore(struct channel_gk20a *ch,
1758                                         ulong id, u32 offset,
1759                                         u32 payload, long timeout)
1760 {
1761         struct platform_device *pdev = ch->g->dev;
1762         struct dma_buf *dmabuf;
1763         void *data;
1764         u32 *semaphore;
1765         int ret = 0;
1766         long remain;
1767
1768         /* do not wait if channel has timed out */
1769         if (ch->has_timedout)
1770                 return -ETIMEDOUT;
1771
1772         dmabuf = dma_buf_get(id);
1773         if (IS_ERR(dmabuf)) {
1774                 gk20a_err(&pdev->dev, "invalid notifier nvmap handle 0x%lx",
1775                            id);
1776                 return -EINVAL;
1777         }
1778
1779         data = dma_buf_kmap(dmabuf, offset >> PAGE_SHIFT);
1780         if (!data) {
1781                 gk20a_err(&pdev->dev, "failed to map notifier memory");
1782                 ret = -EINVAL;
1783                 goto cleanup_put;
1784         }
1785
1786         semaphore = data + (offset & ~PAGE_MASK);
1787
1788         remain = wait_event_interruptible_timeout(
1789                         ch->semaphore_wq,
1790                         *semaphore == payload || ch->has_timedout,
1791                         timeout);
1792
1793         if (remain == 0 && *semaphore != payload)
1794                 ret = -ETIMEDOUT;
1795         else if (remain < 0)
1796                 ret = remain;
1797
1798         dma_buf_kunmap(dmabuf, offset >> PAGE_SHIFT, data);
1799 cleanup_put:
1800         dma_buf_put(dmabuf);
1801         return ret;
1802 }
1803
1804 static int gk20a_channel_wait(struct channel_gk20a *ch,
1805                               struct nvhost_wait_args *args)
1806 {
1807         struct device *d = dev_from_gk20a(ch->g);
1808         struct dma_buf *dmabuf;
1809         struct notification *notif;
1810         struct timespec tv;
1811         u64 jiffies;
1812         ulong id;
1813         u32 offset;
1814         unsigned long timeout;
1815         int remain, ret = 0;
1816
1817         gk20a_dbg_fn("");
1818
1819         if (ch->has_timedout)
1820                 return -ETIMEDOUT;
1821
1822         if (args->timeout == NVHOST_NO_TIMEOUT)
1823                 timeout = MAX_SCHEDULE_TIMEOUT;
1824         else
1825                 timeout = (u32)msecs_to_jiffies(args->timeout);
1826
1827         switch (args->type) {
1828         case NVHOST_WAIT_TYPE_NOTIFIER:
1829                 id = args->condition.notifier.nvmap_handle;
1830                 offset = args->condition.notifier.offset;
1831
1832                 dmabuf = dma_buf_get(id);
1833                 if (IS_ERR(dmabuf)) {
1834                         gk20a_err(d, "invalid notifier nvmap handle 0x%lx",
1835                                    id);
1836                         return -EINVAL;
1837                 }
1838
1839                 notif = dma_buf_vmap(dmabuf);
1840                 if (!notif) {
1841                         gk20a_err(d, "failed to map notifier memory");
1842                         return -ENOMEM;
1843                 }
1844
1845                 notif = (struct notification *)((uintptr_t)notif + offset);
1846
1847                 /* user should set status pending before
1848                  * calling this ioctl */
1849                 remain = wait_event_interruptible_timeout(
1850                                 ch->notifier_wq,
1851                                 notif->status == 0 || ch->has_timedout,
1852                                 timeout);
1853
1854                 if (remain == 0 && notif->status != 0) {
1855                         ret = -ETIMEDOUT;
1856                         goto notif_clean_up;
1857                 } else if (remain < 0) {
1858                         ret = -EINTR;
1859                         goto notif_clean_up;
1860                 }
1861
1862                 /* TBD: fill in correct information */
1863                 jiffies = get_jiffies_64();
1864                 jiffies_to_timespec(jiffies, &tv);
1865                 notif->timestamp.nanoseconds[0] = tv.tv_nsec;
1866                 notif->timestamp.nanoseconds[1] = tv.tv_sec;
1867                 notif->info32 = 0xDEADBEEF; /* should be object name */
1868                 notif->info16 = ch->hw_chid; /* should be method offset */
1869
1870 notif_clean_up:
1871                 dma_buf_vunmap(dmabuf, notif);
1872                 return ret;
1873
1874         case NVHOST_WAIT_TYPE_SEMAPHORE:
1875                 ret = gk20a_channel_wait_semaphore(ch,
1876                                 args->condition.semaphore.nvmap_handle,
1877                                 args->condition.semaphore.offset,
1878                                 args->condition.semaphore.payload,
1879                                 timeout);
1880
1881                 break;
1882
1883         default:
1884                 ret = -EINVAL;
1885                 break;
1886         }
1887
1888         return ret;
1889 }
1890
1891 static int gk20a_channel_set_priority(struct channel_gk20a *ch,
1892                 u32 priority)
1893 {
1894         u32 timeslice_timeout;
1895         /* set priority of graphics channel */
1896         switch (priority) {
1897         case NVHOST_PRIORITY_LOW:
1898                 /* 64 << 3 = 512us */
1899                 timeslice_timeout = 64;
1900                 break;
1901         case NVHOST_PRIORITY_MEDIUM:
1902                 /* 128 << 3 = 1024us */
1903                 timeslice_timeout = 128;
1904                 break;
1905         case NVHOST_PRIORITY_HIGH:
1906                 /* 255 << 3 = 2048us */
1907                 timeslice_timeout = 255;
1908                 break;
1909         default:
1910                 pr_err("Unsupported priority");
1911                 return -EINVAL;
1912         }
1913         channel_gk20a_set_schedule_params(ch,
1914                         timeslice_timeout);
1915         return 0;
1916 }
1917
1918 static int gk20a_channel_zcull_bind(struct channel_gk20a *ch,
1919                             struct nvhost_zcull_bind_args *args)
1920 {
1921         struct gk20a *g = ch->g;
1922         struct gr_gk20a *gr = &g->gr;
1923
1924         gk20a_dbg_fn("");
1925
1926         return g->ops.gr.bind_ctxsw_zcull(g, gr, ch,
1927                                 args->gpu_va, args->mode);
1928 }
1929
1930 /* in this context the "channel" is the host1x channel which
1931  * maps to *all* gk20a channels */
1932 int gk20a_channel_suspend(struct gk20a *g)
1933 {
1934         struct fifo_gk20a *f = &g->fifo;
1935         u32 chid;
1936         bool channels_in_use = false;
1937         int err;
1938
1939         gk20a_dbg_fn("");
1940
1941         /* wait for engine idle */
1942         err = g->ops.fifo.wait_engine_idle(g);
1943         if (err)
1944                 return err;
1945
1946         for (chid = 0; chid < f->num_channels; chid++) {
1947                 if (f->channel[chid].in_use) {
1948
1949                         gk20a_dbg_info("suspend channel %d", chid);
1950                         /* disable channel */
1951                         g->ops.fifo.disable_channel(&f->channel[chid]);
1952                         /* preempt the channel */
1953                         g->ops.fifo.preempt_channel(g, chid);
1954
1955                         channels_in_use = true;
1956                 }
1957         }
1958
1959         if (channels_in_use) {
1960                 g->ops.fifo.update_runlist(g, 0, ~0, false, true);
1961
1962                 for (chid = 0; chid < f->num_channels; chid++) {
1963                         if (f->channel[chid].in_use)
1964                                 g->ops.fifo.unbind_channel(&f->channel[chid]);
1965                 }
1966         }
1967
1968         gk20a_dbg_fn("done");
1969         return 0;
1970 }
1971
1972 /* in this context the "channel" is the host1x channel which
1973  * maps to *all* gk20a channels */
1974 int gk20a_channel_resume(struct gk20a *g)
1975 {
1976         struct fifo_gk20a *f = &g->fifo;
1977         u32 chid;
1978         bool channels_in_use = false;
1979
1980         gk20a_dbg_fn("");
1981
1982         for (chid = 0; chid < f->num_channels; chid++) {
1983                 if (f->channel[chid].in_use) {
1984                         gk20a_dbg_info("resume channel %d", chid);
1985                         g->ops.fifo.bind_channel(&f->channel[chid]);
1986                         channels_in_use = true;
1987                 }
1988         }
1989
1990         if (channels_in_use)
1991                 g->ops.fifo.update_runlist(g, 0, ~0, true, true);
1992
1993         gk20a_dbg_fn("done");
1994         return 0;
1995 }
1996
1997 void gk20a_channel_semaphore_wakeup(struct gk20a *g)
1998 {
1999         struct fifo_gk20a *f = &g->fifo;
2000         u32 chid;
2001
2002         gk20a_dbg_fn("");
2003
2004         for (chid = 0; chid < f->num_channels; chid++) {
2005                 struct channel_gk20a *c = g->fifo.channel+chid;
2006                 if (c->in_use) {
2007                         wake_up_interruptible_all(&c->semaphore_wq);
2008                         gk20a_channel_update(c, 0);
2009                 }
2010         }
2011 }
2012
2013 static int gk20a_ioctl_channel_submit_gpfifo(
2014         struct channel_gk20a *ch,
2015         struct nvhost_submit_gpfifo_args *args)
2016 {
2017         struct gk20a_fence *fence_out;
2018         void *gpfifo;
2019         u32 size;
2020         int ret = 0;
2021
2022         gk20a_dbg_fn("");
2023
2024         if (ch->has_timedout)
2025                 return -ETIMEDOUT;
2026
2027         size = args->num_entries * sizeof(struct nvhost_gpfifo);
2028
2029         gpfifo = kzalloc(size, GFP_KERNEL);
2030         if (!gpfifo)
2031                 return -ENOMEM;
2032
2033         if (copy_from_user(gpfifo,
2034                            (void __user *)(uintptr_t)args->gpfifo, size)) {
2035                 ret = -EINVAL;
2036                 goto clean_up;
2037         }
2038
2039         ret = gk20a_submit_channel_gpfifo(ch, gpfifo, args->num_entries,
2040                                           args->flags, &args->fence,
2041                                           &fence_out);
2042
2043         if (ret)
2044                 goto clean_up;
2045
2046         /* Convert fence_out to something we can pass back to user space. */
2047         if (args->flags & NVHOST_SUBMIT_GPFIFO_FLAGS_FENCE_GET) {
2048                 if (args->flags & NVHOST_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE) {
2049                         int fd = gk20a_fence_install_fd(fence_out);
2050                         if (fd < 0)
2051                                 ret = fd;
2052                         else
2053                                 args->fence.syncpt_id = fd;
2054                 } else {
2055                         args->fence.syncpt_id = fence_out->syncpt_id;
2056                         args->fence.value = fence_out->syncpt_value;
2057                 }
2058         }
2059         gk20a_fence_put(fence_out);
2060
2061 clean_up:
2062         kfree(gpfifo);
2063         return ret;
2064 }
2065
2066 void gk20a_init_channel(struct gpu_ops *gops)
2067 {
2068         gops->fifo.bind_channel = channel_gk20a_bind;
2069         gops->fifo.unbind_channel = channel_gk20a_unbind;
2070         gops->fifo.disable_channel = channel_gk20a_disable;
2071         gops->fifo.alloc_inst = channel_gk20a_alloc_inst;
2072         gops->fifo.free_inst = channel_gk20a_free_inst;
2073         gops->fifo.setup_ramfc = channel_gk20a_setup_ramfc;
2074 }
2075
2076 long gk20a_channel_ioctl(struct file *filp,
2077         unsigned int cmd, unsigned long arg)
2078 {
2079         struct channel_gk20a *ch = filp->private_data;
2080         struct platform_device *dev = ch->g->dev;
2081         u8 buf[NVHOST_IOCTL_CHANNEL_MAX_ARG_SIZE];
2082         int err = 0;
2083
2084         if ((_IOC_TYPE(cmd) != NVHOST_IOCTL_MAGIC) ||
2085                 (_IOC_NR(cmd) == 0) ||
2086                 (_IOC_NR(cmd) > NVHOST_IOCTL_CHANNEL_LAST) ||
2087                 (_IOC_SIZE(cmd) > NVHOST_IOCTL_CHANNEL_MAX_ARG_SIZE))
2088                 return -EFAULT;
2089
2090         if (_IOC_DIR(cmd) & _IOC_WRITE) {
2091                 if (copy_from_user(buf, (void __user *)arg, _IOC_SIZE(cmd)))
2092                         return -EFAULT;
2093         }
2094
2095         switch (cmd) {
2096         case NVHOST_IOCTL_CHANNEL_OPEN:
2097         {
2098                 int fd;
2099                 struct file *file;
2100                 char *name;
2101
2102                 err = get_unused_fd_flags(O_RDWR);
2103                 if (err < 0)
2104                         break;
2105                 fd = err;
2106
2107                 name = kasprintf(GFP_KERNEL, "nvhost-%s-fd%d",
2108                                 dev_name(&dev->dev), fd);
2109                 if (!name) {
2110                         err = -ENOMEM;
2111                         put_unused_fd(fd);
2112                         break;
2113                 }
2114
2115                 file = anon_inode_getfile(name, filp->f_op, NULL, O_RDWR);
2116                 kfree(name);
2117                 if (IS_ERR(file)) {
2118                         err = PTR_ERR(file);
2119                         put_unused_fd(fd);
2120                         break;
2121                 }
2122                 fd_install(fd, file);
2123
2124                 err = __gk20a_channel_open(ch->g, file);
2125                 if (err) {
2126                         put_unused_fd(fd);
2127                         fput(file);
2128                         break;
2129                 }
2130
2131                 ((struct nvhost_channel_open_args *)buf)->channel_fd = fd;
2132                 break;
2133         }
2134         case NVHOST_IOCTL_CHANNEL_SET_NVMAP_FD:
2135                 break;
2136         case NVHOST_IOCTL_CHANNEL_ALLOC_OBJ_CTX:
2137                 err = gk20a_busy(dev);
2138                 if (err) {
2139                         dev_err(&dev->dev,
2140                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2141                                 __func__, cmd);
2142                         return err;
2143                 }
2144                 err = ch->g->ops.gr.alloc_obj_ctx(ch,
2145                                 (struct nvhost_alloc_obj_ctx_args *)buf);
2146                 gk20a_idle(dev);
2147                 break;
2148         case NVHOST_IOCTL_CHANNEL_FREE_OBJ_CTX:
2149                 err = gk20a_busy(dev);
2150                 if (err) {
2151                         dev_err(&dev->dev,
2152                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2153                                 __func__, cmd);
2154                         return err;
2155                 }
2156                 err = ch->g->ops.gr.free_obj_ctx(ch,
2157                                 (struct nvhost_free_obj_ctx_args *)buf);
2158                 gk20a_idle(dev);
2159                 break;
2160         case NVHOST_IOCTL_CHANNEL_ALLOC_GPFIFO:
2161                 err = gk20a_busy(dev);
2162                 if (err) {
2163                         dev_err(&dev->dev,
2164                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2165                                 __func__, cmd);
2166                         return err;
2167                 }
2168                 err = gk20a_alloc_channel_gpfifo(ch,
2169                                 (struct nvhost_alloc_gpfifo_args *)buf);
2170                 gk20a_idle(dev);
2171                 break;
2172         case NVHOST_IOCTL_CHANNEL_SUBMIT_GPFIFO:
2173                 err = gk20a_ioctl_channel_submit_gpfifo(ch,
2174                                 (struct nvhost_submit_gpfifo_args *)buf);
2175                 break;
2176         case NVHOST_IOCTL_CHANNEL_WAIT:
2177                 err = gk20a_busy(dev);
2178                 if (err) {
2179                         dev_err(&dev->dev,
2180                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2181                                 __func__, cmd);
2182                         return err;
2183                 }
2184                 err = gk20a_channel_wait(ch,
2185                                 (struct nvhost_wait_args *)buf);
2186                 gk20a_idle(dev);
2187                 break;
2188         case NVHOST_IOCTL_CHANNEL_ZCULL_BIND:
2189                 err = gk20a_busy(dev);
2190                 if (err) {
2191                         dev_err(&dev->dev,
2192                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2193                                 __func__, cmd);
2194                         return err;
2195                 }
2196                 err = gk20a_channel_zcull_bind(ch,
2197                                 (struct nvhost_zcull_bind_args *)buf);
2198                 gk20a_idle(dev);
2199                 break;
2200         case NVHOST_IOCTL_CHANNEL_SET_ERROR_NOTIFIER:
2201                 err = gk20a_busy(dev);
2202                 if (err) {
2203                         dev_err(&dev->dev,
2204                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2205                                 __func__, cmd);
2206                         return err;
2207                 }
2208                 err = gk20a_init_error_notifier(ch,
2209                                 (struct nvhost_set_error_notifier *)buf);
2210                 gk20a_idle(dev);
2211                 break;
2212 #ifdef CONFIG_GK20A_CYCLE_STATS
2213         case NVHOST_IOCTL_CHANNEL_CYCLE_STATS:
2214                 err = gk20a_busy(dev);
2215                 if (err) {
2216                         dev_err(&dev->dev,
2217                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2218                                 __func__, cmd);
2219                         return err;
2220                 }
2221                 err = gk20a_channel_cycle_stats(ch,
2222                                 (struct nvhost_cycle_stats_args *)buf);
2223                 gk20a_idle(dev);
2224                 break;
2225 #endif
2226         case NVHOST_IOCTL_CHANNEL_SET_TIMEOUT:
2227         {
2228                 u32 timeout =
2229                         (u32)((struct nvhost_set_timeout_args *)buf)->timeout;
2230                 gk20a_dbg(gpu_dbg_gpu_dbg, "setting timeout (%d ms) for chid %d",
2231                            timeout, ch->hw_chid);
2232                 ch->timeout_ms_max = timeout;
2233                 break;
2234         }
2235         case NVHOST_IOCTL_CHANNEL_SET_TIMEOUT_EX:
2236         {
2237                 u32 timeout =
2238                         (u32)((struct nvhost_set_timeout_args *)buf)->timeout;
2239                 bool timeout_debug_dump = !((u32)
2240                         ((struct nvhost_set_timeout_ex_args *)buf)->flags &
2241                         (1 << NVHOST_TIMEOUT_FLAG_DISABLE_DUMP));
2242                 gk20a_dbg(gpu_dbg_gpu_dbg, "setting timeout (%d ms) for chid %d",
2243                            timeout, ch->hw_chid);
2244                 ch->timeout_ms_max = timeout;
2245                 ch->timeout_debug_dump = timeout_debug_dump;
2246                 break;
2247         }
2248         case NVHOST_IOCTL_CHANNEL_GET_TIMEDOUT:
2249                 ((struct nvhost_get_param_args *)buf)->value =
2250                         ch->has_timedout;
2251                 break;
2252         case NVHOST_IOCTL_CHANNEL_SET_PRIORITY:
2253                 err = gk20a_busy(dev);
2254                 if (err) {
2255                         dev_err(&dev->dev,
2256                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2257                                 __func__, cmd);
2258                         return err;
2259                 }
2260                 gk20a_channel_set_priority(ch,
2261                         ((struct nvhost_set_priority_args *)buf)->priority);
2262                 gk20a_idle(dev);
2263                 break;
2264         case NVHOST_IOCTL_CHANNEL_ENABLE:
2265                 err = gk20a_busy(dev);
2266                 if (err) {
2267                         dev_err(&dev->dev,
2268                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2269                                 __func__, cmd);
2270                         return err;
2271                 }
2272                 /* enable channel */
2273                 gk20a_writel(ch->g, ccsr_channel_r(ch->hw_chid),
2274                         gk20a_readl(ch->g, ccsr_channel_r(ch->hw_chid)) |
2275                         ccsr_channel_enable_set_true_f());
2276                 gk20a_idle(dev);
2277                 break;
2278         case NVHOST_IOCTL_CHANNEL_DISABLE:
2279                 err = gk20a_busy(dev);
2280                 if (err) {
2281                         dev_err(&dev->dev,
2282                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2283                                 __func__, cmd);
2284                         return err;
2285                 }
2286                 /* disable channel */
2287                 gk20a_writel(ch->g, ccsr_channel_r(ch->hw_chid),
2288                         gk20a_readl(ch->g, ccsr_channel_r(ch->hw_chid)) |
2289                         ccsr_channel_enable_clr_true_f());
2290                 gk20a_idle(dev);
2291                 break;
2292         case NVHOST_IOCTL_CHANNEL_PREEMPT:
2293                 if (gk20a_is_channel_marked_as_tsg(ch))
2294                         return -EINVAL;
2295                 err = gk20a_busy(dev);
2296                 if (err) {
2297                         dev_err(&dev->dev,
2298                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2299                                 __func__, cmd);
2300                         return err;
2301                 }
2302                 /* preempt channel */
2303                 err = gk20a_fifo_preempt_channel(ch->g, ch->hw_chid);
2304                 gk20a_idle(dev);
2305                 break;
2306         case NVHOST_IOCTL_CHANNEL_FORCE_RESET:
2307                 err = gk20a_busy(dev);
2308                 if (err) {
2309                         dev_err(&dev->dev,
2310                                 "%s: failed to host gk20a for ioctl cmd: 0x%x",
2311                                 __func__, cmd);
2312                         return err;
2313                 }
2314                 gk20a_set_error_notifier(ch,
2315                         NVHOST_CHANNEL_RESETCHANNEL_VERIF_ERROR);
2316                 gk20a_fifo_recover_ch(ch->g, ch->hw_chid, true);
2317                 gk20a_idle(dev);
2318                 break;
2319         default:
2320                 dev_err(&dev->dev, "unrecognized ioctl cmd: 0x%x", cmd);
2321                 err = -ENOTTY;
2322                 break;
2323         }
2324
2325         if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ))
2326                 err = copy_to_user((void __user *)arg, buf, _IOC_SIZE(cmd));
2327
2328         return err;
2329 }