gpu: nvgpu: support gk20a virtualization
[linux-3.10.git] / drivers / gpu / nvgpu / vgpu / fifo_vgpu.c
1 /*
2  * Virtualized GPU Fifo
3  *
4  * Copyright (c) 2014 NVIDIA CORPORATION.  All rights reserved.
5  *
6  * This program is free software; you can redistribute it and/or modify it
7  * under the terms and conditions of the GNU General Public License,
8  * version 2, as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope it will be useful, but WITHOUT
11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13  * more details.
14  */
15
16 #include <linux/dma-mapping.h>
17 #include "vgpu/vgpu.h"
18 #include "gk20a/hw_fifo_gk20a.h"
19 #include "gk20a/hw_ram_gk20a.h"
20
21 static void vgpu_channel_bind(struct channel_gk20a *ch)
22 {
23         struct gk20a_platform *platform = gk20a_get_platform(ch->g->dev);
24         struct tegra_vgpu_cmd_msg msg;
25         struct tegra_vgpu_channel_config_params *p =
26                         &msg.params.channel_config;
27         int err;
28
29         gk20a_dbg_info("bind channel %d", ch->hw_chid);
30
31         msg.cmd = TEGRA_VGPU_CMD_CHANNEL_BIND;
32         msg.handle = platform->virt_handle;
33         p->handle = ch->virt_ctx;
34         err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
35         WARN_ON(err || msg.ret);
36
37         ch->bound = true;
38 }
39
40 static void vgpu_channel_unbind(struct channel_gk20a *ch)
41 {
42         struct gk20a_platform *platform = gk20a_get_platform(ch->g->dev);
43
44         gk20a_dbg_fn("");
45
46         if (ch->bound) {
47                 struct tegra_vgpu_cmd_msg msg;
48                 struct tegra_vgpu_channel_config_params *p =
49                                 &msg.params.channel_config;
50                 int err;
51
52                 msg.cmd = TEGRA_VGPU_CMD_CHANNEL_UNBIND;
53                 msg.handle = platform->virt_handle;
54                 p->handle = ch->virt_ctx;
55                 err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
56                 WARN_ON(err || msg.ret);
57         }
58
59         ch->bound = false;
60
61         /*
62          * if we are agrressive then we can destroy the syncpt
63          * resource at this point
64          * if not, then it will be destroyed at channel_free()
65          */
66         if (ch->sync && ch->sync->aggressive_destroy) {
67                 ch->sync->destroy(ch->sync);
68                 ch->sync = NULL;
69         }
70 }
71
72 static int vgpu_channel_alloc_inst(struct gk20a *g, struct channel_gk20a *ch)
73 {
74         struct gk20a_platform *platform = gk20a_get_platform(g->dev);
75         struct tegra_vgpu_cmd_msg msg;
76         struct tegra_vgpu_channel_hwctx_params *p = &msg.params.channel_hwctx;
77         int err;
78
79         gk20a_dbg_fn("");
80
81         msg.cmd = TEGRA_VGPU_CMD_CHANNEL_ALLOC_HWCTX;
82         msg.handle = platform->virt_handle;
83         p->id = ch->hw_chid;
84         err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
85         if (err || msg.ret) {
86                 gk20a_err(dev_from_gk20a(g), "fail");
87                 return -ENOMEM;
88         }
89
90         ch->virt_ctx = p->handle;
91         gk20a_dbg_fn("done");
92         return 0;
93 }
94
95 static void vgpu_channel_free_inst(struct gk20a *g, struct channel_gk20a *ch)
96 {
97         struct gk20a_platform *platform = gk20a_get_platform(g->dev);
98         struct tegra_vgpu_cmd_msg msg;
99         struct tegra_vgpu_channel_hwctx_params *p = &msg.params.channel_hwctx;
100         int err;
101
102         gk20a_dbg_fn("");
103
104         msg.cmd = TEGRA_VGPU_CMD_CHANNEL_FREE_HWCTX;
105         msg.handle = platform->virt_handle;
106         p->handle = ch->virt_ctx;
107         err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
108         WARN_ON(err || msg.ret);
109 }
110
111 static void vgpu_channel_disable(struct channel_gk20a *ch)
112 {
113         struct gk20a_platform *platform = gk20a_get_platform(ch->g->dev);
114         struct tegra_vgpu_cmd_msg msg;
115         struct tegra_vgpu_channel_config_params *p =
116                         &msg.params.channel_config;
117         int err;
118
119         gk20a_dbg_fn("");
120
121         msg.cmd = TEGRA_VGPU_CMD_CHANNEL_DISABLE;
122         msg.handle = platform->virt_handle;
123         p->handle = ch->virt_ctx;
124         err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
125         WARN_ON(err || msg.ret);
126 }
127
128 static int vgpu_channel_setup_ramfc(struct channel_gk20a *ch, u64 gpfifo_base,
129                                 u32 gpfifo_entries)
130 {
131         struct gk20a_platform *platform = gk20a_get_platform(ch->g->dev);
132         struct device __maybe_unused *d = dev_from_gk20a(ch->g);
133         struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(d);
134         struct tegra_vgpu_cmd_msg msg;
135         struct tegra_vgpu_ramfc_params *p = &msg.params.ramfc;
136         int err;
137
138         gk20a_dbg_fn("");
139
140         msg.cmd = TEGRA_VGPU_CMD_CHANNEL_SETUP_RAMFC;
141         msg.handle = platform->virt_handle;
142         p->handle = ch->virt_ctx;
143         p->gpfifo_va = gpfifo_base;
144         p->num_entries = gpfifo_entries;
145         p->userd_addr = ch->userd_iova;
146         p->iova = mapping ? 1 : 0;
147         err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
148
149         return (err || msg.ret) ? -ENOMEM : 0;
150 }
151
152 static int init_engine_info(struct fifo_gk20a *f)
153 {
154         struct fifo_engine_info_gk20a *gr_info;
155         const u32 gr_sw_id = ENGINE_GR_GK20A;
156
157         gk20a_dbg_fn("");
158
159         /* all we really care about finding is the graphics entry    */
160         /* especially early on in sim it probably thinks it has more */
161         f->num_engines = 1;
162
163         gr_info = f->engine_info + gr_sw_id;
164
165         gr_info->sw_id = gr_sw_id;
166         gr_info->name = "gr";
167         /* FIXME: retrieve this from server */
168         gr_info->runlist_id = 0;
169         return 0;
170 }
171
172 static int init_runlist(struct gk20a *g, struct fifo_gk20a *f)
173 {
174         struct fifo_engine_info_gk20a *engine_info;
175         struct fifo_runlist_info_gk20a *runlist;
176         struct device *d = dev_from_gk20a(g);
177         u32 runlist_id;
178         u32 i;
179         u64 runlist_size;
180
181         gk20a_dbg_fn("");
182
183         f->max_runlists = fifo_eng_runlist_base__size_1_v();
184         f->runlist_info = kzalloc(sizeof(struct fifo_runlist_info_gk20a) *
185                                   f->max_runlists, GFP_KERNEL);
186         if (!f->runlist_info)
187                 goto clean_up;
188
189         engine_info = f->engine_info + ENGINE_GR_GK20A;
190         runlist_id = engine_info->runlist_id;
191         runlist = &f->runlist_info[runlist_id];
192
193         runlist->active_channels =
194                 kzalloc(DIV_ROUND_UP(f->num_channels, BITS_PER_BYTE),
195                         GFP_KERNEL);
196         if (!runlist->active_channels)
197                 goto clean_up_runlist_info;
198
199         runlist_size  = sizeof(u16) * f->num_channels;
200         for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) {
201                 dma_addr_t iova;
202
203                 runlist->mem[i].cpuva =
204                         dma_alloc_coherent(d,
205                                         runlist_size,
206                                         &iova,
207                                         GFP_KERNEL);
208                 if (!runlist->mem[i].cpuva) {
209                         dev_err(d, "memory allocation failed\n");
210                         goto clean_up_runlist;
211                 }
212                 runlist->mem[i].iova = iova;
213                 runlist->mem[i].size = runlist_size;
214         }
215         mutex_init(&runlist->mutex);
216         init_waitqueue_head(&runlist->runlist_wq);
217
218         /* None of buffers is pinned if this value doesn't change.
219             Otherwise, one of them (cur_buffer) must have been pinned. */
220         runlist->cur_buffer = MAX_RUNLIST_BUFFERS;
221
222         gk20a_dbg_fn("done");
223         return 0;
224
225 clean_up_runlist:
226         for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) {
227                 if (runlist->mem[i].cpuva)
228                         dma_free_coherent(d,
229                                 runlist->mem[i].size,
230                                 runlist->mem[i].cpuva,
231                                 runlist->mem[i].iova);
232                 runlist->mem[i].cpuva = NULL;
233                 runlist->mem[i].iova = 0;
234         }
235
236         kfree(runlist->active_channels);
237         runlist->active_channels = NULL;
238
239 clean_up_runlist_info:
240         kfree(f->runlist_info);
241         f->runlist_info = NULL;
242
243 clean_up:
244         gk20a_dbg_fn("fail");
245         return -ENOMEM;
246 }
247
248 static int vgpu_init_fifo_setup_sw(struct gk20a *g)
249 {
250         struct gk20a_platform *platform = gk20a_get_platform(g->dev);
251         struct fifo_gk20a *f = &g->fifo;
252         struct device *d = dev_from_gk20a(g);
253         int chid, err = 0;
254         dma_addr_t iova;
255
256         gk20a_dbg_fn("");
257
258         if (f->sw_ready) {
259                 gk20a_dbg_fn("skip init");
260                 return 0;
261         }
262
263         f->g = g;
264
265         err = vgpu_get_attribute(platform->virt_handle,
266                                 TEGRA_VGPU_ATTRIB_NUM_CHANNELS,
267                                 &f->num_channels);
268         if (err)
269                 return -ENXIO;
270
271         f->max_engines = ENGINE_INVAL_GK20A;
272
273         f->userd_entry_size = 1 << ram_userd_base_shift_v();
274         f->userd_total_size = f->userd_entry_size * f->num_channels;
275
276         f->userd.cpuva = dma_alloc_coherent(d,
277                                         f->userd_total_size,
278                                         &iova,
279                                         GFP_KERNEL);
280         if (!f->userd.cpuva) {
281                 dev_err(d, "memory allocation failed\n");
282                 goto clean_up;
283         }
284
285         f->userd.iova = iova;
286         err = gk20a_get_sgtable(d, &f->userd.sgt,
287                                 f->userd.cpuva, f->userd.iova,
288                                 f->userd_total_size);
289         if (err) {
290                 dev_err(d, "failed to create sg table\n");
291                 goto clean_up;
292         }
293
294         /* bar1 va */
295         f->userd.gpu_va = vgpu_bar1_map(g, &f->userd.sgt, f->userd_total_size);
296         if (!f->userd.gpu_va) {
297                 dev_err(d, "gmmu mapping failed\n");
298                 goto clean_up;
299         }
300
301         gk20a_dbg(gpu_dbg_map, "userd bar1 va = 0x%llx", f->userd.gpu_va);
302
303         f->userd.size = f->userd_total_size;
304
305         f->channel = kzalloc(f->num_channels * sizeof(*f->channel),
306                                 GFP_KERNEL);
307         f->engine_info = kzalloc(f->max_engines * sizeof(*f->engine_info),
308                                 GFP_KERNEL);
309
310         if (!(f->channel && f->engine_info)) {
311                 err = -ENOMEM;
312                 goto clean_up;
313         }
314
315         init_engine_info(f);
316
317         init_runlist(g, f);
318
319         for (chid = 0; chid < f->num_channels; chid++) {
320                 f->channel[chid].userd_cpu_va =
321                         f->userd.cpuva + chid * f->userd_entry_size;
322                 f->channel[chid].userd_iova =
323                         NV_MC_SMMU_VADDR_TRANSLATE(f->userd.iova)
324                                 + chid * f->userd_entry_size;
325                 f->channel[chid].userd_gpu_va =
326                         f->userd.gpu_va + chid * f->userd_entry_size;
327
328                 gk20a_init_channel_support(g, chid);
329         }
330         mutex_init(&f->ch_inuse_mutex);
331
332         f->deferred_reset_pending = false;
333         mutex_init(&f->deferred_reset_mutex);
334
335         f->sw_ready = true;
336
337         gk20a_dbg_fn("done");
338         return 0;
339
340 clean_up:
341         gk20a_dbg_fn("fail");
342         /* FIXME: unmap from bar1 */
343         if (f->userd.sgt)
344                 gk20a_free_sgtable(&f->userd.sgt);
345         if (f->userd.cpuva)
346                 dma_free_coherent(d,
347                                 f->userd_total_size,
348                                 f->userd.cpuva,
349                                 f->userd.iova);
350         f->userd.cpuva = NULL;
351         f->userd.iova = 0;
352
353         memset(&f->userd, 0, sizeof(struct userd_desc));
354
355         kfree(f->channel);
356         f->channel = NULL;
357         kfree(f->engine_info);
358         f->engine_info = NULL;
359
360         return err;
361 }
362
363 static int vgpu_init_fifo_setup_hw(struct gk20a *g)
364 {
365         gk20a_dbg_fn("");
366
367         /* test write, read through bar1 @ userd region before
368          * turning on the snooping */
369         {
370                 struct fifo_gk20a *f = &g->fifo;
371                 u32 v, v1 = 0x33, v2 = 0x55;
372
373                 u32 bar1_vaddr = f->userd.gpu_va;
374                 volatile u32 *cpu_vaddr = f->userd.cpuva;
375
376                 gk20a_dbg_info("test bar1 @ vaddr 0x%x",
377                            bar1_vaddr);
378
379                 v = gk20a_bar1_readl(g, bar1_vaddr);
380
381                 *cpu_vaddr = v1;
382                 smp_mb();
383
384                 if (v1 != gk20a_bar1_readl(g, bar1_vaddr)) {
385                         gk20a_err(dev_from_gk20a(g), "bar1 broken @ gk20a!");
386                         return -EINVAL;
387                 }
388
389                 gk20a_bar1_writel(g, bar1_vaddr, v2);
390
391                 if (v2 != gk20a_bar1_readl(g, bar1_vaddr)) {
392                         gk20a_err(dev_from_gk20a(g), "bar1 broken @ gk20a!");
393                         return -EINVAL;
394                 }
395
396                 /* is it visible to the cpu? */
397                 if (*cpu_vaddr != v2) {
398                         gk20a_err(dev_from_gk20a(g),
399                                 "cpu didn't see bar1 write @ %p!",
400                                 cpu_vaddr);
401                 }
402
403                 /* put it back */
404                 gk20a_bar1_writel(g, bar1_vaddr, v);
405         }
406
407         gk20a_dbg_fn("done");
408
409         return 0;
410 }
411
412 int vgpu_init_fifo_support(struct gk20a *g)
413 {
414         u32 err;
415
416         gk20a_dbg_fn("");
417
418         err = vgpu_init_fifo_setup_sw(g);
419         if (err)
420                 return err;
421
422         err = vgpu_init_fifo_setup_hw(g);
423         return err;
424 }
425
426 static int vgpu_fifo_preempt_channel(struct gk20a *g, u32 hw_chid)
427 {
428         struct gk20a_platform *platform = gk20a_get_platform(g->dev);
429         struct fifo_gk20a *f = &g->fifo;
430         struct tegra_vgpu_cmd_msg msg;
431         struct tegra_vgpu_channel_config_params *p =
432                         &msg.params.channel_config;
433         int err;
434
435         gk20a_dbg_fn("");
436
437         msg.cmd = TEGRA_VGPU_CMD_CHANNEL_PREEMPT;
438         msg.handle = platform->virt_handle;
439         p->handle = f->channel[hw_chid].virt_ctx;
440         err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
441
442         if (err || msg.ret) {
443                 gk20a_err(dev_from_gk20a(g),
444                         "preempt channel %d failed\n", hw_chid);
445                 err = -ENOMEM;
446         }
447
448         return err;
449 }
450
451 static int vgpu_submit_runlist(u64 handle, u8 runlist_id, u16 *runlist,
452                         u32 num_entries)
453 {
454         struct tegra_vgpu_cmd_msg *msg;
455         struct tegra_vgpu_runlist_params *p;
456         size_t size = sizeof(*msg) + sizeof(*runlist) * num_entries;
457         char *ptr;
458         int err;
459
460         msg = kmalloc(size, GFP_KERNEL);
461         if (!msg)
462                 return -1;
463
464         msg->cmd = TEGRA_VGPU_CMD_SUBMIT_RUNLIST;
465         msg->handle = handle;
466         p = &msg->params.runlist;
467         p->runlist_id = runlist_id;
468         p->num_entries = num_entries;
469
470         ptr = (char *)msg + sizeof(*msg);
471         memcpy(ptr, runlist, sizeof(*runlist) * num_entries);
472         err = vgpu_comm_sendrecv(msg, size, sizeof(*msg));
473
474         err = (err || msg->ret) ? -1 : 0;
475         kfree(msg);
476         return err;
477 }
478
479 static int vgpu_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id,
480                                         u32 hw_chid, bool add,
481                                         bool wait_for_finish)
482 {
483         struct gk20a_platform *platform = gk20a_get_platform(g->dev);
484         struct fifo_gk20a *f = &g->fifo;
485         struct fifo_runlist_info_gk20a *runlist;
486         u16 *runlist_entry = NULL;
487         u32 count = 0;
488
489         gk20a_dbg_fn("");
490
491         runlist = &f->runlist_info[runlist_id];
492
493         /* valid channel, add/remove it from active list.
494            Otherwise, keep active list untouched for suspend/resume. */
495         if (hw_chid != ~0) {
496                 if (add) {
497                         if (test_and_set_bit(hw_chid,
498                                 runlist->active_channels) == 1)
499                                 return 0;
500                 } else {
501                         if (test_and_clear_bit(hw_chid,
502                                 runlist->active_channels) == 0)
503                                 return 0;
504                 }
505         }
506
507         if (hw_chid != ~0 || /* add/remove a valid channel */
508             add /* resume to add all channels back */) {
509                 u32 chid;
510
511                 runlist_entry = runlist->mem[0].cpuva;
512                 for_each_set_bit(chid,
513                         runlist->active_channels, f->num_channels) {
514                         gk20a_dbg_info("add channel %d to runlist", chid);
515                         runlist_entry[0] = chid;
516                         runlist_entry++;
517                         count++;
518                 }
519         } else  /* suspend to remove all channels */
520                 count = 0;
521
522         return vgpu_submit_runlist(platform->virt_handle, runlist_id,
523                                 runlist->mem[0].cpuva, count);
524 }
525
526 /* add/remove a channel from runlist
527    special cases below: runlist->active_channels will NOT be changed.
528    (hw_chid == ~0 && !add) means remove all active channels from runlist.
529    (hw_chid == ~0 &&  add) means restore all active channels on runlist. */
530 static int vgpu_fifo_update_runlist(struct gk20a *g, u32 runlist_id,
531                                 u32 hw_chid, bool add, bool wait_for_finish)
532 {
533         struct fifo_runlist_info_gk20a *runlist = NULL;
534         struct fifo_gk20a *f = &g->fifo;
535         u32 ret = 0;
536
537         gk20a_dbg_fn("");
538
539         runlist = &f->runlist_info[runlist_id];
540
541         mutex_lock(&runlist->mutex);
542
543         ret = vgpu_fifo_update_runlist_locked(g, runlist_id, hw_chid, add,
544                                         wait_for_finish);
545
546         mutex_unlock(&runlist->mutex);
547         return ret;
548 }
549
550 static int vgpu_fifo_wait_engine_idle(struct gk20a *g)
551 {
552         gk20a_dbg_fn("");
553
554         return 0;
555 }
556
557 void vgpu_init_fifo_ops(struct gpu_ops *gops)
558 {
559         gops->fifo.bind_channel = vgpu_channel_bind;
560         gops->fifo.unbind_channel = vgpu_channel_unbind;
561         gops->fifo.disable_channel = vgpu_channel_disable;
562         gops->fifo.alloc_inst = vgpu_channel_alloc_inst;
563         gops->fifo.free_inst = vgpu_channel_free_inst;
564         gops->fifo.setup_ramfc = vgpu_channel_setup_ramfc;
565         gops->fifo.preempt_channel = vgpu_fifo_preempt_channel;
566         gops->fifo.update_runlist = vgpu_fifo_update_runlist;
567         gops->fifo.wait_engine_idle = vgpu_fifo_wait_engine_idle;
568 }
569