gpu: nvgpu: support gk20a virtualization
[linux-3.10.git] / drivers / gpu / nvgpu / vgpu / mm_vgpu.c
1 /*
2  * Virtualized GPU Memory Management
3  *
4  * Copyright (c) 2014 NVIDIA CORPORATION.  All rights reserved.
5  *
6  * This program is free software; you can redistribute it and/or modify it
7  * under the terms and conditions of the GNU General Public License,
8  * version 2, as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope it will be useful, but WITHOUT
11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13  * more details.
14  */
15
16 #include <linux/dma-mapping.h>
17 #include "vgpu/vgpu.h"
18
19 /* note: keep the page sizes sorted lowest to highest here */
20 static const u32 gmmu_page_sizes[gmmu_nr_page_sizes] = { SZ_4K, SZ_128K };
21 static const u32 gmmu_page_shifts[gmmu_nr_page_sizes] = { 12, 17 };
22
23 static int vgpu_init_mm_setup_sw(struct gk20a *g)
24 {
25         struct mm_gk20a *mm = &g->mm;
26
27         gk20a_dbg_fn("");
28
29         if (mm->sw_ready) {
30                 gk20a_dbg_fn("skip init");
31                 return 0;
32         }
33
34         mm->g = g;
35         mm->big_page_size = gmmu_page_sizes[gmmu_page_size_big];
36         mm->compression_page_size = gmmu_page_sizes[gmmu_page_size_big];
37         mm->pde_stride    = mm->big_page_size << 10;
38         mm->pde_stride_shift = ilog2(mm->pde_stride);
39         BUG_ON(mm->pde_stride_shift > 31); /* we have assumptions about this */
40
41         /*TBD: make channel vm size configurable */
42         mm->channel.size = 1ULL << NV_GMMU_VA_RANGE;
43
44         gk20a_dbg_info("channel vm size: %dMB", (int)(mm->channel.size >> 20));
45
46         mm->sw_ready = true;
47
48         return 0;
49 }
50
51 int vgpu_init_mm_support(struct gk20a *g)
52 {
53         gk20a_dbg_fn("");
54
55         return vgpu_init_mm_setup_sw(g);
56 }
57
58 static u64 vgpu_locked_gmmu_map(struct vm_gk20a *vm,
59                                 u64 map_offset,
60                                 struct sg_table *sgt,
61                                 u64 buffer_offset,
62                                 u64 size,
63                                 int pgsz_idx,
64                                 u8 kind_v,
65                                 u32 ctag_offset,
66                                 u32 flags,
67                                 int rw_flag,
68                                 bool clear_ctags)
69 {
70         int err = 0;
71         struct device *d = dev_from_vm(vm);
72         struct gk20a *g = gk20a_from_vm(vm);
73         struct gk20a_platform *platform = gk20a_get_platform(g->dev);
74         struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(d);
75         struct tegra_vgpu_cmd_msg msg;
76         struct tegra_vgpu_as_map_params *p = &msg.params.as_map;
77         u64 addr = gk20a_mm_iova_addr(sgt->sgl);
78         u8 prot;
79
80         gk20a_dbg_fn("");
81
82         /* Allocate (or validate when map_offset != 0) the virtual address. */
83         if (!map_offset) {
84                 map_offset = gk20a_vm_alloc_va(vm, size,
85                                           pgsz_idx);
86                 if (!map_offset) {
87                         gk20a_err(d, "failed to allocate va space");
88                         err = -ENOMEM;
89                         goto fail;
90                 }
91         }
92
93         if (rw_flag == gk20a_mem_flag_read_only)
94                 prot = TEGRA_VGPU_MAP_PROT_READ_ONLY;
95         else if (rw_flag == gk20a_mem_flag_write_only)
96                 prot = TEGRA_VGPU_MAP_PROT_WRITE_ONLY;
97         else
98                 prot = TEGRA_VGPU_MAP_PROT_NONE;
99
100         msg.cmd = TEGRA_VGPU_CMD_AS_MAP;
101         msg.handle = platform->virt_handle;
102         p->handle = vm->handle;
103         p->addr = addr;
104         p->gpu_va = map_offset;
105         p->size = size;
106         p->pgsz_idx = pgsz_idx;
107         p->iova = mapping ? 1 : 0;
108         p->kind = kind_v;
109         p->cacheable =
110                 (flags & NVHOST_MAP_BUFFER_FLAGS_CACHEABLE_TRUE) ? 1 : 0;
111         p->prot = prot;
112         p->ctag_offset = ctag_offset;
113         p->clear_ctags = clear_ctags;
114         err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
115         if (err || msg.ret)
116                 goto fail;
117
118         vm->tlb_dirty = true;
119         return map_offset;
120 fail:
121         gk20a_err(d, "%s: failed with err=%d\n", __func__, err);
122         return 0;
123 }
124
125 static void vgpu_locked_gmmu_unmap(struct vm_gk20a *vm,
126                                 u64 vaddr,
127                                 u64 size,
128                                 int pgsz_idx,
129                                 bool va_allocated,
130                                 int rw_flag)
131 {
132         struct gk20a *g = gk20a_from_vm(vm);
133         struct gk20a_platform *platform = gk20a_get_platform(g->dev);
134         struct tegra_vgpu_cmd_msg msg;
135         struct tegra_vgpu_as_map_params *p = &msg.params.as_map;
136         int err;
137
138         gk20a_dbg_fn("");
139
140         if (va_allocated) {
141                 err = gk20a_vm_free_va(vm, vaddr, size, pgsz_idx);
142                 if (err) {
143                         dev_err(dev_from_vm(vm),
144                                 "failed to free va");
145                         return;
146                 }
147         }
148
149         msg.cmd = TEGRA_VGPU_CMD_AS_UNMAP;
150         msg.handle = platform->virt_handle;
151         p->handle = vm->handle;
152         p->gpu_va = vaddr;
153         err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
154         if (err || msg.ret)
155                 dev_err(dev_from_vm(vm),
156                         "failed to update gmmu ptes on unmap");
157
158         vm->tlb_dirty = true;
159 }
160
161 static void vgpu_vm_remove_support(struct vm_gk20a *vm)
162 {
163         struct gk20a *g = vm->mm->g;
164         struct gk20a_platform *platform = gk20a_get_platform(g->dev);
165         struct mapped_buffer_node *mapped_buffer;
166         struct vm_reserved_va_node *va_node, *va_node_tmp;
167         struct tegra_vgpu_cmd_msg msg;
168         struct tegra_vgpu_as_share_params *p = &msg.params.as_share;
169         struct rb_node *node;
170         int err;
171
172         gk20a_dbg_fn("");
173         mutex_lock(&vm->update_gmmu_lock);
174
175         /* TBD: add a flag here for the unmap code to recognize teardown
176          * and short-circuit any otherwise expensive operations. */
177
178         node = rb_first(&vm->mapped_buffers);
179         while (node) {
180                 mapped_buffer =
181                         container_of(node, struct mapped_buffer_node, node);
182                 gk20a_vm_unmap_locked(mapped_buffer);
183                 node = rb_first(&vm->mapped_buffers);
184         }
185
186         /* destroy remaining reserved memory areas */
187         list_for_each_entry_safe(va_node, va_node_tmp, &vm->reserved_va_list,
188                 reserved_va_list) {
189                 list_del(&va_node->reserved_va_list);
190                 kfree(va_node);
191         }
192
193         msg.cmd = TEGRA_VGPU_CMD_AS_FREE_SHARE;
194         msg.handle = platform->virt_handle;
195         p->handle = vm->handle;
196         err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
197         WARN_ON(err || msg.ret);
198
199         gk20a_allocator_destroy(&vm->vma[gmmu_page_size_small]);
200         gk20a_allocator_destroy(&vm->vma[gmmu_page_size_big]);
201
202         mutex_unlock(&vm->update_gmmu_lock);
203
204         /* release zero page if used */
205         if (vm->zero_page_cpuva)
206                 dma_free_coherent(&g->dev->dev, vm->mm->big_page_size,
207                                   vm->zero_page_cpuva, vm->zero_page_iova);
208
209         /* vm is not used anymore. release it. */
210         kfree(vm);
211 }
212
213 u64 vgpu_bar1_map(struct gk20a *g, struct sg_table **sgt, u64 size)
214 {
215         struct gk20a_platform *platform = gk20a_get_platform(g->dev);
216         struct dma_iommu_mapping *mapping =
217                         to_dma_iommu_mapping(dev_from_gk20a(g));
218         u64 addr = gk20a_mm_iova_addr((*sgt)->sgl);
219         struct tegra_vgpu_cmd_msg msg;
220         struct tegra_vgpu_as_map_params *p = &msg.params.as_map;
221         int err;
222
223         msg.cmd = TEGRA_VGPU_CMD_MAP_BAR1;
224         msg.handle = platform->virt_handle;
225         p->addr = addr;
226         p->size = size;
227         p->iova = mapping ? 1 : 0;
228         err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
229         if (err || msg.ret)
230                 addr = 0;
231         else
232                 addr = p->gpu_va;
233
234         return addr;
235 }
236
237 /* address space interfaces for the gk20a module */
238 static int vgpu_vm_alloc_share(struct gk20a_as_share *as_share)
239 {
240         struct gk20a_as *as = as_share->as;
241         struct gk20a *g = gk20a_from_as(as);
242         struct gk20a_platform *platform = gk20a_get_platform(g->dev);
243         struct tegra_vgpu_cmd_msg msg;
244         struct tegra_vgpu_as_share_params *p = &msg.params.as_share;
245         struct mm_gk20a *mm = &g->mm;
246         struct vm_gk20a *vm;
247         u64 vma_size;
248         u32 num_pages, low_hole_pages;
249         char name[32];
250         int err;
251
252         gk20a_dbg_fn("");
253
254         vm = kzalloc(sizeof(*vm), GFP_KERNEL);
255         if (!vm)
256                 return -ENOMEM;
257
258         as_share->vm = vm;
259
260         vm->mm = mm;
261         vm->as_share = as_share;
262
263         vm->big_pages = true;
264
265         vm->va_start  = mm->pde_stride;   /* create a one pde hole */
266         vm->va_limit  = mm->channel.size; /* note this means channel.size is
267                                              really just the max */
268
269         msg.cmd = TEGRA_VGPU_CMD_AS_ALLOC_SHARE;
270         msg.handle = platform->virt_handle;
271         p->size = vm->va_limit;
272         err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
273         if (err || msg.ret)
274                 return -ENOMEM;
275
276         vm->handle = p->handle;
277
278         /* low-half: alloc small pages */
279         /* high-half: alloc big pages */
280         vma_size = mm->channel.size >> 1;
281
282         snprintf(name, sizeof(name), "gk20a_as_%d-%dKB", as_share->id,
283                  gmmu_page_sizes[gmmu_page_size_small]>>10);
284         num_pages = (u32)(vma_size >> gmmu_page_shifts[gmmu_page_size_small]);
285
286         /* num_pages above is without regard to the low-side hole. */
287         low_hole_pages = (vm->va_start >>
288                           gmmu_page_shifts[gmmu_page_size_small]);
289
290         gk20a_allocator_init(&vm->vma[gmmu_page_size_small], name,
291               low_hole_pages,             /* start */
292               num_pages - low_hole_pages, /* length */
293               1);                         /* align */
294
295         snprintf(name, sizeof(name), "gk20a_as_%d-%dKB", as_share->id,
296                  gmmu_page_sizes[gmmu_page_size_big]>>10);
297
298         num_pages = (u32)(vma_size >> gmmu_page_shifts[gmmu_page_size_big]);
299         gk20a_allocator_init(&vm->vma[gmmu_page_size_big], name,
300                               num_pages, /* start */
301                               num_pages, /* length */
302                               1); /* align */
303
304         vm->mapped_buffers = RB_ROOT;
305
306         mutex_init(&vm->update_gmmu_lock);
307         kref_init(&vm->ref);
308         INIT_LIST_HEAD(&vm->reserved_va_list);
309
310         vm->enable_ctag = true;
311
312         return 0;
313 }
314
315 static int vgpu_vm_bind_channel(struct gk20a_as_share *as_share,
316                                 struct channel_gk20a *ch)
317 {
318         struct vm_gk20a *vm = as_share->vm;
319         struct gk20a_platform *platform = gk20a_get_platform(ch->g->dev);
320         struct tegra_vgpu_cmd_msg msg;
321         struct tegra_vgpu_as_bind_share_params *p = &msg.params.as_bind_share;
322         int err;
323
324         gk20a_dbg_fn("");
325
326         ch->vm = vm;
327         msg.cmd = TEGRA_VGPU_CMD_AS_BIND_SHARE;
328         msg.handle = platform->virt_handle;
329         p->as_handle = vm->handle;
330         p->chan_handle = ch->virt_ctx;
331         err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
332
333         if (err || msg.ret) {
334                 ch->vm = NULL;
335                 err = -ENOMEM;
336         }
337
338         return err;
339 }
340
341 static void vgpu_cache_maint(u64 handle, u8 op)
342 {
343         struct tegra_vgpu_cmd_msg msg;
344         struct tegra_vgpu_cache_maint_params *p = &msg.params.cache_maint;
345         int err;
346
347         msg.cmd = TEGRA_VGPU_CMD_CACHE_MAINT;
348         msg.handle = handle;
349         p->op = op;
350         err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
351         WARN_ON(err || msg.ret);
352 }
353
354 static int vgpu_mm_fb_flush(struct gk20a *g)
355 {
356         struct gk20a_platform *platform = gk20a_get_platform(g->dev);
357
358         gk20a_dbg_fn("");
359
360         vgpu_cache_maint(platform->virt_handle, TEGRA_VGPU_FB_FLUSH);
361         return 0;
362 }
363
364 static void vgpu_mm_l2_invalidate(struct gk20a *g)
365 {
366         struct gk20a_platform *platform = gk20a_get_platform(g->dev);
367
368         gk20a_dbg_fn("");
369
370         vgpu_cache_maint(platform->virt_handle, TEGRA_VGPU_L2_MAINT_INV);
371 }
372
373 static void vgpu_mm_l2_flush(struct gk20a *g, bool invalidate)
374 {
375         struct gk20a_platform *platform = gk20a_get_platform(g->dev);
376         u8 op;
377
378         gk20a_dbg_fn("");
379
380         if (invalidate)
381                 op = TEGRA_VGPU_L2_MAINT_FLUSH_INV;
382         else
383                 op =  TEGRA_VGPU_L2_MAINT_FLUSH;
384
385         vgpu_cache_maint(platform->virt_handle, op);
386 }
387
388 static void vgpu_mm_tlb_invalidate(struct vm_gk20a *vm)
389 {
390         struct gk20a *g = gk20a_from_vm(vm);
391         struct gk20a_platform *platform = gk20a_get_platform(g->dev);
392         struct tegra_vgpu_cmd_msg msg;
393         struct tegra_vgpu_as_invalidate_params *p = &msg.params.as_invalidate;
394         int err;
395
396         gk20a_dbg_fn("");
397
398         /* No need to invalidate if tlb is clean */
399         mutex_lock(&vm->update_gmmu_lock);
400         if (!vm->tlb_dirty) {
401                 mutex_unlock(&vm->update_gmmu_lock);
402                 return;
403         }
404
405         msg.cmd = TEGRA_VGPU_CMD_AS_INVALIDATE;
406         msg.handle = platform->virt_handle;
407         p->handle = vm->handle;
408         err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
409         WARN_ON(err || msg.ret);
410         vm->tlb_dirty = false;
411         mutex_unlock(&vm->update_gmmu_lock);
412 }
413
414 void vgpu_init_mm_ops(struct gpu_ops *gops)
415 {
416         gops->mm.gmmu_map = vgpu_locked_gmmu_map;
417         gops->mm.gmmu_unmap = vgpu_locked_gmmu_unmap;
418         gops->mm.vm_remove = vgpu_vm_remove_support;
419         gops->mm.vm_alloc_share = vgpu_vm_alloc_share;
420         gops->mm.vm_bind_channel = vgpu_vm_bind_channel;
421         gops->mm.fb_flush = vgpu_mm_fb_flush;
422         gops->mm.l2_invalidate = vgpu_mm_l2_invalidate;
423         gops->mm.l2_flush = vgpu_mm_l2_flush;
424         gops->mm.tlb_invalidate = vgpu_mm_tlb_invalidate;
425 }