video: tegra: host: Check error from channel finish
[linux-3.10.git] / drivers / video / tegra / host / gk20a / mm_gk20a.c
1 /*
2  * drivers/video/tegra/host/gk20a/mm_gk20a.c
3  *
4  * GK20A memory management
5  *
6  * Copyright (c) 2011-2013, NVIDIA CORPORATION.  All rights reserved.
7  *
8  * This program is free software; you can redistribute it and/or modify it
9  * under the terms and conditions of the GNU General Public License,
10  * version 2, as published by the Free Software Foundation.
11  *
12  * This program is distributed in the hope it will be useful, but WITHOUT
13  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
15  * more details.
16  *
17  * You should have received a copy of the GNU General Public License along with
18  * this program; if not, write to the Free Software Foundation, Inc.,
19  * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
20  */
21
22 #include <linux/delay.h>
23 #include <linux/highmem.h>
24 #include <linux/log2.h>
25 #include <linux/nvhost.h>
26 #include <linux/scatterlist.h>
27 #include <linux/nvmap.h>
28 #include <mach/hardware.h>
29 #include <asm/cacheflush.h>
30
31 #include "../../nvmap/nvmap.h"
32 #include "../../nvmap/nvmap_ioctl.h"
33
34 #include "../dev.h"
35 #include "../nvhost_as.h"
36 #include "gk20a.h"
37 #include "mm_gk20a.h"
38 #include "hw_gmmu_gk20a.h"
39 #include "hw_fb_gk20a.h"
40 #include "hw_bus_gk20a.h"
41 #include "hw_ram_gk20a.h"
42 #include "hw_mc_gk20a.h"
43 #include "hw_flush_gk20a.h"
44
45 #include "kind_gk20a.h"
46
47
48 #define FLUSH_CPU_DCACHE(va, pa, size)  \
49         do {    \
50                 __cpuc_flush_dcache_area((void *)(va), (size_t)(size)); \
51                 outer_flush_range(pa, pa + (size_t)(size));             \
52         } while (0)
53
54 enum gmmu_page_smmu_type {
55         gmmu_page_smmu_type_physical,
56         gmmu_page_smmu_type_virtual,
57 };
58
59
60 static void gk20a_mm_tlb_invalidate(struct vm_gk20a *vm);
61 static int update_gmmu_ptes(struct vm_gk20a *vm,
62                             enum gmmu_pgsz_gk20a pgsz_idx, struct sg_table *sgt,
63                             u64 first_vaddr, u64 last_vaddr,
64                             u8 kind_v, u32 ctag_offset, bool cacheable);
65 static void update_gmmu_pde(struct vm_gk20a *vm, u32 i);
66
67
68 /* note: keep the page sizes sorted lowest to highest here */
69 static const u32 gmmu_page_sizes[gmmu_nr_page_sizes] = { SZ_4K, SZ_128K };
70 static const u32 gmmu_page_shifts[gmmu_nr_page_sizes] = { 12, 17 };
71 static const u64 gmmu_page_offset_masks[gmmu_nr_page_sizes] = { 0xfffLL,
72                                                                 0x1ffffLL };
73 static const u64 gmmu_page_masks[gmmu_nr_page_sizes] = { ~0xfffLL, ~0x1ffffLL };
74
75 static int gk20a_init_mm_reset_enable_hw(struct gk20a *g)
76 {
77         u32 pmc_enable;
78
79         pmc_enable = gk20a_readl(g, mc_enable_r());
80         pmc_enable &= ~mc_enable_pfb_enabled_f();
81         pmc_enable &= ~mc_enable_l2_enabled_f();
82         pmc_enable &= ~mc_enable_ce2_enabled_f();
83         pmc_enable &= ~mc_enable_xbar_enabled_f();
84         pmc_enable &= ~mc_enable_hub_enabled_f();
85         gk20a_writel(g, mc_enable_r(), pmc_enable);
86
87         pmc_enable = gk20a_readl(g, mc_enable_r());
88         pmc_enable |= mc_enable_pfb_enabled_f();
89         pmc_enable |= mc_enable_l2_enabled_f();
90         pmc_enable |= mc_enable_ce2_enabled_f();
91         pmc_enable |= mc_enable_xbar_enabled_f();
92         pmc_enable |= mc_enable_hub_enabled_f();
93         gk20a_writel(g, mc_enable_r(), pmc_enable);
94         gk20a_readl(g, mc_enable_r());
95
96         nvhost_dbg_fn("done");
97         return 0;
98 }
99
100 void gk20a_remove_mm_support(struct mm_gk20a *mm)
101 {
102         struct gk20a *g = mm->g;
103         struct vm_gk20a *vm = &mm->bar1.vm;
104         struct inst_desc *inst_block = &mm->bar1.inst_block;
105         struct mem_mgr *memmgr = mem_mgr_from_g(g);
106
107         nvhost_dbg_fn("");
108
109         nvhost_memmgr_free_sg_table(memmgr, inst_block->mem.ref,
110                         inst_block->mem.sgt);
111         nvhost_memmgr_put(memmgr, inst_block->mem.ref);
112
113         vm->remove_support(vm);
114 }
115
116 int gk20a_init_mm_setup_sw(struct gk20a *g)
117 {
118         struct mm_gk20a *mm = &g->mm;
119         int i;
120
121         nvhost_dbg_fn("");
122
123         if (mm->sw_ready) {
124                 nvhost_dbg_fn("skip init");
125                 return 0;
126         }
127
128         mm->g = g;
129         mm->big_page_size = gmmu_page_sizes[gmmu_page_size_big];
130         mm->pde_stride    = mm->big_page_size << 10;
131         mm->pde_stride_shift = ilog2(mm->pde_stride);
132         BUG_ON(mm->pde_stride_shift > 31); /* we have assumptions about this */
133
134         for (i = 0; i < ARRAY_SIZE(gmmu_page_sizes); i++) {
135
136                 u32 num_ptes, pte_space, num_pages;
137
138                 /* assuming "full" page tables */
139                 num_ptes = mm->pde_stride / gmmu_page_sizes[i];
140
141                 pte_space = num_ptes * gmmu_pte__size_v();
142                 /* allocate whole pages */
143                 pte_space = roundup(pte_space, PAGE_SIZE);
144
145                 num_pages = pte_space / PAGE_SIZE;
146                 /* make sure "order" is viable */
147                 BUG_ON(!is_power_of_2(num_pages));
148
149                 mm->page_table_sizing[i].num_ptes = num_ptes;
150                 mm->page_table_sizing[i].order = ilog2(num_pages);
151         }
152
153         /*TBD: make channel vm size configurable */
154         /* For now keep the size relatively small-ish compared
155          * to the full 40b va.  8GB for now (as it allows for two separate,
156          * 32b regions.) */
157         mm->channel.size = 1ULL << 33ULL;
158
159         nvhost_dbg_info("channel vm size: %dMB", (int)(mm->channel.size >> 20));
160
161         nvhost_dbg_info("small page-size (%dKB) pte array: %dKB",
162                         gmmu_page_sizes[gmmu_page_size_small] >> 10,
163                         (mm->page_table_sizing[gmmu_page_size_small].num_ptes *
164                          gmmu_pte__size_v()) >> 10);
165
166         nvhost_dbg_info("big page-size (%dKB) pte array: %dKB",
167                         gmmu_page_sizes[gmmu_page_size_big] >> 10,
168                         (mm->page_table_sizing[gmmu_page_size_big].num_ptes *
169                          gmmu_pte__size_v()) >> 10);
170
171
172         gk20a_init_bar1_vm(mm);
173
174         gk20a_init_uncompressed_kind_map();
175         gk20a_init_kind_attr();
176
177         mm->remove_support = gk20a_remove_mm_support;
178         mm->sw_ready = true;
179
180         nvhost_dbg_fn("done");
181         return 0;
182 }
183
184 /* make sure gk20a_init_mm_support is called before */
185 static int gk20a_init_mm_setup_hw(struct gk20a *g)
186 {
187         struct mm_gk20a *mm = &g->mm;
188         struct inst_desc *inst_block = &mm->bar1.inst_block;
189         phys_addr_t inst_pa = sg_phys(inst_block->mem.sgt->sgl);
190
191         nvhost_dbg_fn("");
192
193         /* set large page size in fb
194          * note this is very early on, can we defer it ? */
195         {
196                 u32 fb_mmu_ctrl = gk20a_readl(g, fb_mmu_ctrl_r());
197
198                 if (gmmu_page_sizes[gmmu_page_size_big] == SZ_128K)
199                         fb_mmu_ctrl = (fb_mmu_ctrl &
200                                        ~fb_mmu_ctrl_vm_pg_size_f(~0x0)) |
201                                 fb_mmu_ctrl_vm_pg_size_128kb_f();
202                 else
203                         BUG_ON(1); /* no support/testing for larger ones yet */
204
205                 gk20a_writel(g, fb_mmu_ctrl_r(), fb_mmu_ctrl);
206         }
207
208         inst_pa = (u32)(inst_pa >> bar1_instance_block_shift_gk20a());
209         nvhost_dbg_info("bar1 inst block ptr: 0x%08x",  (u32)inst_pa);
210
211         /* this is very early in init... can we defer this? */
212         {
213                 gk20a_writel(g, bus_bar1_block_r(),
214                              bus_bar1_block_target_vid_mem_f() |
215                              bus_bar1_block_mode_virtual_f() |
216                              bus_bar1_block_ptr_f(inst_pa));
217         }
218
219         nvhost_dbg_fn("done");
220         return 0;
221 }
222
223 int gk20a_init_mm_support(struct gk20a *g)
224 {
225         u32 err;
226
227         err = gk20a_init_mm_reset_enable_hw(g);
228         if (err)
229                 return err;
230
231         err = gk20a_init_mm_setup_sw(g);
232         if (err)
233                 return err;
234
235         err = gk20a_init_mm_setup_hw(g);
236         if (err)
237                 return err;
238
239         return err;
240 }
241
242 #ifdef CONFIG_TEGRA_IOMMU_SMMU
243 static int alloc_gmmu_pages(struct vm_gk20a *vm, u32 order,
244                             void **handle,
245                             struct sg_table **sgt)
246 {
247         u32 num_pages = 1 << order;
248         u32 len = num_pages * PAGE_SIZE;
249         int err;
250         struct page *pages;
251
252         nvhost_dbg_fn("");
253
254         pages = alloc_pages(GFP_KERNEL, order);
255         if (!pages) {
256                 nvhost_dbg(dbg_pte, "alloc_pages failed\n");
257                 goto err_out;
258         }
259         *sgt = kzalloc(sizeof(*sgt), GFP_KERNEL);
260         if (!sgt) {
261                 nvhost_dbg(dbg_pte, "cannot allocate sg table");
262                 goto err_alloced;
263         }
264         err =  sg_alloc_table_from_pages(*sgt, &pages, 1, 0, len, GFP_KERNEL);
265         if (err) {
266                 nvhost_dbg(dbg_pte, "sg_alloc_table failed\n");
267                 goto err_sg_table;
268         }
269         *handle = page_address(pages);
270         memset(*handle, 0, len);
271
272         return 0;
273
274 err_sg_table:
275         kfree(*sgt);
276 err_alloced:
277         __free_pages(pages, order);
278 err_out:
279         return -ENOMEM;
280 }
281
282 static void free_gmmu_pages(struct vm_gk20a *vm, void *handle,
283                             struct sg_table *sgt, u32 order)
284 {
285         nvhost_dbg_fn("");
286         BUG_ON(sgt == NULL);
287         free_pages((unsigned long)handle, order);
288         sg_free_table(sgt);
289         kfree(sgt);
290 }
291
292 static int map_gmmu_pages(void *handle, struct sg_table *sgt, void **va)
293 {
294         FLUSH_CPU_DCACHE(handle, sg_phys(sgt->sgl), sgt->sgl->length);
295         *va = handle;
296         return 0;
297 }
298
299 static void unmap_gmmu_pages(void *handle, struct sg_table *sgt, u32 *va)
300 {
301         FLUSH_CPU_DCACHE(handle, sg_phys(sgt->sgl), sgt->sgl->length);
302 }
303 #else
304 static int alloc_gmmu_pages(struct vm_gk20a *vm, u32 order,
305                             void **handle,
306                             struct sg_table **sgt)
307 {
308         struct mem_mgr *client = mem_mgr_from_vm(vm);
309         struct mem_handle *r;
310         u32 num_pages = 1 << order;
311         u32 len = num_pages * PAGE_SIZE;
312         void *va;
313
314         nvhost_dbg_fn("");
315
316         r = nvhost_memmgr_alloc(client, len,
317                                 DEFAULT_ALLOC_ALIGNMENT,
318                                 DEFAULT_ALLOC_FLAGS,
319                                 0);
320         if (IS_ERR_OR_NULL(r)) {
321                 nvhost_dbg(dbg_pte, "nvmap_alloc failed\n");
322                 goto err_out;
323         }
324         va = nvhost_memmgr_mmap(r);
325         if (IS_ERR_OR_NULL(va)) {
326                 nvhost_dbg(dbg_pte, "nvmap_mmap failed\n");
327                 goto err_alloced;
328         }
329         *sgt = nvhost_memmgr_sg_table(client, r);
330         if (!*sgt) {
331                 nvhost_dbg(dbg_pte, "cannot allocate sg table");
332                 goto err_mmaped;
333         }
334         memset(va, 0, len);
335         nvhost_memmgr_munmap(r, va);
336         *handle = (void *)r;
337
338         return 0;
339
340 err_mmaped:
341         nvhost_memmgr_munmap(r, va);
342 err_alloced:
343         nvhost_memmgr_put(client, r);
344 err_out:
345         return -ENOMEM;
346 }
347
348 static void free_gmmu_pages(struct vm_gk20a *vm, void *handle,
349                             struct sg_table *sgt, u32 order)
350 {
351         struct mem_mgr *client = mem_mgr_from_vm(vm);
352         nvhost_dbg_fn("");
353         BUG_ON(sgt == NULL);
354         nvhost_memmgr_free_sg_table(client, handle, sgt);
355         nvhost_memmgr_put(client, handle);
356 }
357
358 static int map_gmmu_pages(void *handle, struct sg_table *sgt, void **va)
359 {
360         struct mem_handle *r = handle;
361         u32 *tmp_va;
362
363         nvhost_dbg_fn("");
364
365         tmp_va = nvhost_memmgr_mmap(r);
366         if (IS_ERR_OR_NULL(tmp_va))
367                 goto err_out;
368
369         *va = tmp_va;
370         return 0;
371
372 err_out:
373         return -ENOMEM;
374 }
375
376 static void unmap_gmmu_pages(void *handle, struct sg_table *sgt, u32 *va)
377 {
378         struct mem_handle *r = handle;
379         nvhost_dbg_fn("");
380         nvhost_memmgr_munmap(r, va);
381 }
382 #endif
383
384 /* allocate a phys contig region big enough for a full
385  * sized gmmu page table for the given gmmu_page_size.
386  * the whole range is zeroed so it's "invalid"/will fault
387  */
388
389 static int zalloc_gmmu_page_table_gk20a(struct vm_gk20a *vm,
390                                         enum gmmu_pgsz_gk20a gmmu_pgsz_idx,
391                                         struct page_table_gk20a *pte)
392 {
393         int err;
394         u32 pte_order;
395         void *handle;
396         struct sg_table *sgt;
397
398         nvhost_dbg_fn("");
399
400         /* allocate enough pages for the table */
401         pte_order = vm->mm->page_table_sizing[gmmu_pgsz_idx].order;
402
403         err = alloc_gmmu_pages(vm, pte_order, &handle, &sgt);
404         if (err)
405                 return err;
406
407         nvhost_dbg(dbg_pte, "pte = 0x%p, addr=%08llx, size %d",
408                         pte, (u64)sg_phys(sgt->sgl), pte_order);
409
410         pte->ref = handle;
411         pte->sgt = sgt;
412
413         return 0;
414 }
415
416 /* given address range (inclusive) determine the pdes crossed */
417 static inline void pde_range_from_vaddr_range(struct vm_gk20a *vm,
418                                               u64 addr_lo, u64 addr_hi,
419                                               u32 *pde_lo, u32 *pde_hi)
420 {
421         *pde_lo = (u32)(addr_lo >> vm->mm->pde_stride_shift);
422         *pde_hi = (u32)(addr_hi >> vm->mm->pde_stride_shift);
423         nvhost_dbg(dbg_pte, "addr_lo=0x%llx addr_hi=0x%llx pde_ss=%d",
424                    addr_lo, addr_hi, vm->mm->pde_stride_shift);
425         nvhost_dbg(dbg_pte, "pde_lo=%d pde_hi=%d",
426                    *pde_lo, *pde_hi);
427 }
428
429 static inline u32 *pde_from_index(struct vm_gk20a *vm, u32 i)
430 {
431         return (u32 *) (((u8 *)vm->pdes.kv) + i*gmmu_pde__size_v());
432 }
433
434 static inline u32 pte_index_from_vaddr(struct vm_gk20a *vm,
435                                        u64 addr, enum gmmu_pgsz_gk20a pgsz_idx)
436 {
437         u32 ret;
438         /* mask off pde part */
439         addr = addr & ((((u64)1) << vm->mm->pde_stride_shift) - ((u64)1));
440         /* shift over to get pte index. note assumption that pte index
441          * doesn't leak over into the high 32b */
442         ret = (u32)(addr >> gmmu_page_shifts[pgsz_idx]);
443
444         nvhost_dbg(dbg_pte, "addr=0x%llx pte_i=0x%x", addr, ret);
445         return ret;
446 }
447
448 static inline void pte_space_page_offset_from_index(u32 i, u32 *pte_page,
449                                                     u32 *pte_offset)
450 {
451         /* ptes are 8B regardless of pagesize */
452         /* pte space pages are 4KB. so 512 ptes per 4KB page*/
453         *pte_page = i >> 9;
454
455         /* this offset is a pte offset, not a byte offset */
456         *pte_offset = i & ((1<<9)-1);
457
458         nvhost_dbg(dbg_pte, "i=0x%x pte_page=0x%x pte_offset=0x%x",
459                    i, *pte_page, *pte_offset);
460 }
461
462
463 /*
464  * given a pde index/page table number make sure it has
465  * backing store and if not go ahead allocate it and
466  * record it in the appropriate pde
467  */
468 static int validate_gmmu_page_table_gk20a(struct vm_gk20a *vm,
469                           u32 i,
470                           enum gmmu_pgsz_gk20a gmmu_pgsz_idx)
471 {
472         int err;
473         struct page_table_gk20a *pte =
474                 vm->pdes.ptes[gmmu_pgsz_idx] + i;
475
476         nvhost_dbg_fn("");
477
478         /* if it's already in place it's valid */
479         if (pte->ref)
480                 return 0;
481
482         nvhost_dbg(dbg_pte, "alloc %dKB ptes for pde %d",
483                    gmmu_page_sizes[gmmu_pgsz_idx]/1024, i);
484
485         err = zalloc_gmmu_page_table_gk20a(vm, gmmu_pgsz_idx, pte);
486         if (err)
487                 return err;
488
489         /* rewrite pde */
490         update_gmmu_pde(vm, i);
491
492         return 0;
493 }
494
495 static u64 gk20a_vm_alloc_va(struct vm_gk20a *vm,
496                              u64 size,
497                              enum gmmu_pgsz_gk20a gmmu_pgsz_idx)
498
499 {
500         struct nvhost_allocator *vma = &vm->vma[gmmu_pgsz_idx];
501         int err;
502         u64 offset;
503         u32 start_page_nr = 0, num_pages;
504         u32 i, pde_lo, pde_hi;
505         u64 gmmu_page_size = gmmu_page_sizes[gmmu_pgsz_idx];
506
507         if (gmmu_pgsz_idx >= ARRAY_SIZE(gmmu_page_sizes)) {
508                 dev_warn(dev_from_vm(vm),
509                          "invalid page size requested in gk20a vm alloc");
510                 return -EINVAL;
511         }
512
513         if ((gmmu_pgsz_idx == gmmu_page_size_big) && !vm->big_pages) {
514                 dev_warn(dev_from_vm(vm),
515                          "unsupportd page size requested");
516                 return -EINVAL;
517
518         }
519
520         /* be certain we round up to gmmu_page_size if needed */
521         /* TBD: DIV_ROUND_UP -> undefined reference to __aeabi_uldivmod */
522         size = (size + ((u64)gmmu_page_size - 1)) & ~((u64)gmmu_page_size - 1);
523
524         nvhost_dbg_info("size=0x%llx @ pgsz=%dKB", size,
525                         gmmu_page_sizes[gmmu_pgsz_idx]>>10);
526
527         /* The vma allocator represents page accounting. */
528         num_pages = size >> gmmu_page_shifts[gmmu_pgsz_idx];
529
530         err = vma->alloc(vma, &start_page_nr, num_pages);
531
532         if (err) {
533                 nvhost_err(dev_from_vm(vm),
534                            "%s oom: sz=0x%llx", vma->name, size);
535                 return 0;
536         }
537
538         offset = (u64)start_page_nr << gmmu_page_shifts[gmmu_pgsz_idx];
539         nvhost_dbg_fn("%s found addr: 0x%llx", vma->name, offset);
540
541         pde_range_from_vaddr_range(vm,
542                                    offset, offset + size - 1,
543                                    &pde_lo, &pde_hi);
544
545         /* mark the addr range valid (but with 0 phys addr, which will fault) */
546         for (i = pde_lo; i <= pde_hi; i++) {
547
548                 err = validate_gmmu_page_table_gk20a(vm, i, gmmu_pgsz_idx);
549
550                 if (err) {
551                         nvhost_err(dev_from_vm(vm),
552                                    "failed to validate page table %d: %d",
553                                    i, err);
554                         return 0;
555                 }
556         }
557
558         nvhost_dbg_fn("ret=0x%llx", offset);
559
560         return offset;
561 }
562
563 static void gk20a_vm_free_va(struct vm_gk20a *vm,
564                              u64 offset, u64 size,
565                              enum gmmu_pgsz_gk20a pgsz_idx)
566 {
567         struct nvhost_allocator *vma = &vm->vma[pgsz_idx];
568         u32 page_size = gmmu_page_sizes[pgsz_idx];
569         u32 page_shift = gmmu_page_shifts[pgsz_idx];
570         u32 start_page_nr, num_pages;
571         int err;
572
573         nvhost_dbg_info("%s free addr=0x%llx, size=0x%llx",
574                         vma->name, offset, size);
575
576         start_page_nr = (u32)(offset >> page_shift);
577         num_pages = (u32)((size + page_size - 1) >> page_shift);
578
579         err = vma->free(vma, start_page_nr, num_pages);
580         if (err) {
581                 nvhost_err(dev_from_vm(vm),
582                            "not found: offset=0x%llx, sz=0x%llx",
583                            offset, size);
584         }
585 }
586
587 static int insert_mapped_buffer(struct rb_root *root,
588                                 struct mapped_buffer_node *mapped_buffer)
589 {
590         struct rb_node **new_node = &(root->rb_node), *parent = NULL;
591
592         /* Figure out where to put new node */
593         while (*new_node) {
594                 struct mapped_buffer_node *cmp_with =
595                         container_of(*new_node, struct mapped_buffer_node,
596                                      node);
597
598                 parent = *new_node;
599
600                 if (cmp_with->addr > mapped_buffer->addr) /* u64 cmp */
601                         new_node = &((*new_node)->rb_left);
602                 else if (cmp_with->addr != mapped_buffer->addr) /* u64 cmp */
603                         new_node = &((*new_node)->rb_right);
604                 else
605                         return -EINVAL; /* no fair dup'ing */
606         }
607
608         /* Add new node and rebalance tree. */
609         rb_link_node(&mapped_buffer->node, parent, new_node);
610         rb_insert_color(&mapped_buffer->node, root);
611
612         return 0;
613 }
614
615 static struct mapped_buffer_node *find_mapped_buffer(struct rb_root *root,
616                                                      u64 addr)
617 {
618
619         struct rb_node *node = root->rb_node;
620         while (node) {
621                 struct mapped_buffer_node *mapped_buffer =
622                         container_of(node, struct mapped_buffer_node, node);
623                 if (mapped_buffer->addr > addr) /* u64 cmp */
624                         node = node->rb_left;
625                 else if (mapped_buffer->addr != addr) /* u64 cmp */
626                         node = node->rb_right;
627                 else
628                         return mapped_buffer;
629         }
630         return 0;
631 }
632
633 /* convenience setup for nvmap buffer attr queries */
634 struct bfr_attr_query {
635         int err;
636         u32 v;
637 };
638 static u32 nvmap_bfr_param[] = {
639 #define BFR_SIZE   0
640         NVMAP_HANDLE_PARAM_SIZE,
641 #define BFR_ALIGN  1
642         NVMAP_HANDLE_PARAM_ALIGNMENT,
643 #define BFR_HEAP   2
644         NVMAP_HANDLE_PARAM_HEAP,
645 #define BFR_KIND   3
646         NVMAP_HANDLE_PARAM_KIND,
647 };
648 #define BFR_ATTRS (sizeof(nvmap_bfr_param)/sizeof(nvmap_bfr_param[0]))
649
650 struct buffer_attrs {
651         struct sg_table *sgt;
652         u64 size;
653         u64 align;
654         u32 ctag_offset;
655         u32 ctag_lines;
656         int contig;
657         int iovmm_mapped;
658         int pgsz_idx;
659         u8 kind_v;
660         u8 uc_kind_v;
661 };
662
663 static int setup_buffer_size_and_align(struct device *d,
664                                        struct buffer_attrs *bfr,
665                                        struct bfr_attr_query *query)
666 {
667         int i;
668         /* buffer allocation size and alignment must be a multiple
669            of one of the supported page sizes.*/
670         bfr->size = query[BFR_SIZE].v;
671         bfr->align = query[BFR_ALIGN].v;
672         bfr->pgsz_idx = -1;
673
674         /*  choose the biggest first (top->bottom) */
675         for (i = (gmmu_nr_page_sizes-1); i >= 0; i--)
676                 if (!(gmmu_page_offset_masks[i] & bfr->align)) {
677                         /* would like to add this too but nvmap returns the
678                          * original requested size not the allocated size.
679                          * (!(gmmu_page_offset_masks[i] & bfr->size)) */
680                         bfr->pgsz_idx = i;
681                         break;
682                 }
683
684         if (unlikely(bfr->pgsz_idx == -1)) {
685                 nvhost_warn(d, "unsupported buffer alignment: 0x%llx",
686                            bfr->align);
687                 return -EINVAL;
688         }
689         switch (query[BFR_HEAP].v) {
690         case NVMAP_HEAP_SYSMEM:
691                 /* sysmem, contig
692                  * Fall through to carveout...
693                  * TBD: Need nvmap support for scattered sysmem allocs
694                  * w/o mapping through smmu.
695                  */
696
697         case NVMAP_HEAP_CARVEOUT_GENERIC:
698                 /* carveout sysmem, contig */
699                 bfr->contig = 1;
700                 bfr->iovmm_mapped = 0;
701                 break;
702
703         case NVMAP_HEAP_CARVEOUT_VPR:
704                 /* carveout vpr, contig */
705                 bfr->contig = 1;
706                 bfr->iovmm_mapped = 0;
707                 break;
708
709         case NVMAP_HEAP_IOVMM:
710                 /* sysmem, iovmm/smmu mapped */
711                 bfr->contig = 1;
712                 bfr->iovmm_mapped = 1;
713                 break;
714         default:
715                 nvhost_err(0, "unsupported nvmap buffer heap: 0x%x\n",
716                            query[BFR_HEAP].v);
717                 return -EINVAL;
718         }
719
720         bfr->kind_v = query[BFR_KIND].v;
721
722         return 0;
723 }
724
725
726 static int setup_buffer_kind_and_compression(struct device *d,
727                                              u32 flags,
728                                              u32 kind,
729                                              struct buffer_attrs *bfr,
730                                              enum gmmu_pgsz_gk20a pgsz_idx)
731 {
732         bool kind_compressible;
733
734         /* This flag (which comes from map_buffer ioctl) is for override now.
735            It will be removed when all clients which use it have been
736            changed to specify kind in the nvmap buffer alloc. */
737         if (flags & NVHOST_MAP_BUFFER_FLAGS_KIND_SPECIFIED)
738                 bfr->kind_v = kind;
739
740         if (unlikely(bfr->kind_v == gmmu_pte_kind_invalid_v()))
741                 bfr->kind_v = gmmu_pte_kind_pitch_v();
742
743         if (unlikely(!gk20a_kind_is_supported(bfr->kind_v))) {
744                 nvhost_err(d, "kind 0x%x not supported", bfr->kind_v);
745                 return -EINVAL;
746         }
747
748         bfr->uc_kind_v = gmmu_pte_kind_invalid_v();
749         /* find a suitable uncompressed kind if it becomes necessary later */
750         kind_compressible = gk20a_kind_is_compressible(bfr->kind_v);
751         if (kind_compressible) {
752                 bfr->uc_kind_v = gk20a_get_uncompressed_kind(bfr->kind_v);
753                 if (unlikely(bfr->uc_kind_v == gmmu_pte_kind_invalid_v())) {
754                         /* shouldn't happen, but it is worth cross-checking */
755                         nvhost_err(d, "comptag kind 0x%x can't be"
756                                    " downgraded to uncompressed kind",
757                                    bfr->kind_v);
758                         return -EINVAL;
759                 }
760         }
761         /* comptags only supported for suitable kinds, 128KB pagesize */
762         if (unlikely(kind_compressible &&
763                      (gmmu_page_sizes[pgsz_idx] != 128*1024))) {
764                 nvhost_warn(d, "comptags specified"
765                             " but pagesize being used doesn't support it");
766                 /* it is safe to fall back to uncompressed as
767                    functionality is not harmed */
768                 bfr->kind_v = bfr->uc_kind_v;
769                 kind_compressible = false;
770         }
771         if (kind_compressible)
772                 bfr->ctag_lines = ALIGN(bfr->size, COMP_TAG_LINE_SIZE) >>
773                         COMP_TAG_LINE_SIZE_SHIFT;
774         else
775                 bfr->ctag_lines = 0;
776
777         return 0;
778 }
779
780 static u64 gk20a_vm_map(struct vm_gk20a *vm,
781                         struct mem_mgr *memmgr,
782                         struct mem_handle *r,
783                         u64 offset_align,
784                         u32 flags /*NVHOST_MAP_BUFFER_FLAGS_*/,
785                         u32 kind,
786                         struct sg_table **sgt)
787 {
788         struct gk20a *g = gk20a_from_vm(vm);
789         struct nvhost_allocator *ctag_allocator = &g->gr.comp_tags;
790         struct device *d = dev_from_vm(vm);
791         struct mapped_buffer_node *mapped_buffer = 0;
792         bool inserted = false, va_allocated = false;
793         u32 gmmu_page_size = 0;
794         int gmmu_page_smmu_type = 0;
795         u64 map_offset = 0;
796         int attr, err = 0;
797         struct buffer_attrs bfr = {0};
798         struct bfr_attr_query query[BFR_ATTRS];
799
800         /* query bfr attributes: size, align, heap, kind */
801         for (attr = 0; attr < BFR_ATTRS; attr++) {
802                 query[attr].err =
803                         nvhost_memmgr_get_param(memmgr, r,
804                                                 nvmap_bfr_param[attr],
805                                                 &query[attr].v);
806                 if (unlikely(query[attr].err != 0)) {
807                         nvhost_err(d,
808                                    "failed to get nvmap buffer param %d: %d\n",
809                                    nvmap_bfr_param[attr],
810                                    query[attr].err);
811                         return query[attr].err;
812                 }
813         }
814
815         /* validate/adjust bfr attributes */
816         err = setup_buffer_size_and_align(d, &bfr, query);
817         if (unlikely(err))
818                 goto clean_up;
819         if (unlikely(bfr.pgsz_idx < gmmu_page_size_small ||
820                      bfr.pgsz_idx > gmmu_page_size_big)) {
821                 BUG_ON(1);
822                 err = -EINVAL;
823                 goto clean_up;
824         }
825         gmmu_page_size = gmmu_page_sizes[bfr.pgsz_idx];
826
827         /* if specified the map offset must be bfr page size aligned */
828         if (flags & NVHOST_MAP_BUFFER_FLAGS_OFFSET) {
829                 map_offset = offset_align;
830                 if (map_offset & gmmu_page_offset_masks[bfr.pgsz_idx]) {
831                         nvhost_err(d,
832                            "map offset must be buffer page size aligned 0x%llx",
833                            map_offset);
834                         err = -EINVAL;
835                         goto clean_up;
836                 }
837         }
838
839         /* works ok with 4k phys.  works 4k,smmu if buffers in
840          * sim invocation is bumped up again */
841         gmmu_page_smmu_type = bfr.iovmm_mapped ?
842                 gmmu_page_smmu_type_virtual : gmmu_page_smmu_type_physical;
843
844         /* pin buffer to get phys/iovmm addr */
845         bfr.sgt = nvhost_memmgr_sg_table(memmgr, r);
846         if (IS_ERR_OR_NULL(bfr.sgt)) {
847                 nvhost_warn(d, "oom allocating tracking buffer");
848                 goto clean_up;
849         }
850         if (sgt)
851                 *sgt = bfr.sgt;
852 #ifdef CONFIG_TEGRA_IOMMU_SMMU
853         if (gmmu_page_smmu_type == gmmu_page_smmu_type_virtual) {
854                 int err = nvhost_memmgr_smmu_map(bfr.sgt,
855                                 bfr.size, d);
856                 if (err) {
857                         /* if mapping fails, fall back to physical, small
858                          * pages */
859                         bfr.iovmm_mapped = 0;
860                         bfr.pgsz_idx = gmmu_page_size_small;
861                         gmmu_page_smmu_type = gmmu_page_smmu_type_physical;
862                         nvhost_warn(d, "Failed to map to SMMU\n");
863                 } else
864                         nvhost_dbg(dbg_pte, "Mapped to SMMU, address %08llx",
865                                         (u64)sg_dma_address(bfr.sgt->sgl));
866         }
867 #endif
868
869         err = setup_buffer_kind_and_compression(d, flags, kind,
870                                                 &bfr, bfr.pgsz_idx);
871         if (unlikely(err)) {
872                 nvhost_err(d, "failure setting up kind and compression");
873                 goto clean_up;
874         }
875
876         /* bar1 and pmu vm don't need ctag */
877         if (!vm->enable_ctag)
878                 bfr.ctag_lines = 0;
879
880         /* allocate compression resources if needed */
881         if (bfr.ctag_lines) {
882                 err = ctag_allocator->alloc(ctag_allocator, &bfr.ctag_offset,
883                                             bfr.ctag_lines);
884                 /* ok to fall back here if we ran out */
885                 /* TBD: we can partially alloc ctags as well... */
886                 if (err) {
887                         bfr.ctag_lines = bfr.ctag_offset = 0;
888                         bfr.kind_v = bfr.uc_kind_v;
889                 }
890         }
891
892         /* init/clear the ctag buffer */
893         if (bfr.ctag_lines)
894                 gk20a_gr_clear_comptags(g,
895                                         bfr.ctag_offset,
896                                         bfr.ctag_offset + bfr.ctag_lines - 1);
897
898
899         /* Allocate (or validate when map_offset != 0) the virtual address. */
900         if (!map_offset) {
901                 map_offset = vm->alloc_va(vm, bfr.size,
902                                           bfr.pgsz_idx);
903                 if (!map_offset) {
904                         nvhost_err(d, "failed to allocate va space");
905                         err = -ENOMEM;
906                         goto clean_up;
907                 }
908                 va_allocated = true;
909         } else {
910                 /* TODO: allocate the offset to keep track? */
911                 /* TODO: then we could warn on actual collisions... */
912                 nvhost_warn(d, "fixed offset mapping isn't safe yet!");
913                 nvhost_warn(d, "other mappings may collide!");
914         }
915
916         nvhost_dbg_fn("r=%p, map_offset=0x%llx, contig=%d page_size=%d "
917                       "iovmm_mapped=%d kind=0x%x kind_uc=0x%x flags=0x%x",
918                       r, map_offset, bfr.contig, gmmu_page_size,
919                       bfr.iovmm_mapped,
920                       bfr.kind_v, bfr.uc_kind_v, flags);
921         nvhost_dbg_info("comptag size=%d start=%d for 0x%llx",
922                         bfr.ctag_lines, bfr.ctag_offset,
923                         (u64)sg_phys(bfr.sgt->sgl));
924
925         /* keep track of the buffer for unmapping */
926         /* TBD: check for multiple mapping of same buffer */
927         mapped_buffer = kzalloc(sizeof(*mapped_buffer), GFP_KERNEL);
928         if (!mapped_buffer) {
929                 nvhost_warn(d, "oom allocating tracking buffer");
930                 goto clean_up;
931         }
932         mapped_buffer->memmgr      = memmgr;
933         mapped_buffer->handle_ref  = r;
934         mapped_buffer->sgt         = bfr.sgt;
935         mapped_buffer->addr        = map_offset;
936         mapped_buffer->size        = bfr.size;
937         mapped_buffer->pgsz_idx    = bfr.pgsz_idx;
938         mapped_buffer->ctag_offset = bfr.ctag_offset;
939         mapped_buffer->ctag_lines  = bfr.ctag_lines;
940
941         err = insert_mapped_buffer(&vm->mapped_buffers, mapped_buffer);
942         if (err) {
943                 nvhost_err(d, "failed to insert into mapped buffer tree");
944                 goto clean_up;
945         }
946         inserted = true;
947
948         nvhost_dbg_info("allocated va @ 0x%llx", map_offset);
949
950         err = update_gmmu_ptes(vm, bfr.pgsz_idx,
951                                bfr.sgt,
952                                map_offset, map_offset + bfr.size - 1,
953                                bfr.kind_v,
954                                bfr.ctag_offset,
955                                flags & NVHOST_MAP_BUFFER_FLAGS_CACHEABLE_TRUE);
956         if (err) {
957                 nvhost_err(d, "failed to update ptes on map");
958                 goto clean_up;
959         }
960
961         return map_offset;
962
963 clean_up:
964         if (inserted)
965                 rb_erase(&mapped_buffer->node, &vm->mapped_buffers);
966         kfree(mapped_buffer);
967         if (va_allocated)
968                 vm->free_va(vm, map_offset, bfr.size, bfr.pgsz_idx);
969         if (bfr.ctag_lines)
970                 ctag_allocator->free(ctag_allocator,
971                                      bfr.ctag_offset,
972                                      bfr.ctag_lines);
973         if (bfr.sgt) {
974 #ifdef CONFIG_TEGRA_IOMMU_SMMU
975                 if (sg_dma_address(bfr.sgt->sgl))
976                         nvhost_memmgr_smmu_unmap(bfr.sgt, bfr.size, d);
977 #endif
978                 nvhost_memmgr_free_sg_table(memmgr, r, bfr.sgt);
979         }
980
981         nvhost_dbg_info("err=%d\n", err);
982         return 0;
983 }
984
985 static int update_gmmu_ptes(struct vm_gk20a *vm,
986                             enum gmmu_pgsz_gk20a pgsz_idx,
987                        struct sg_table *sgt, u64 first_vaddr, u64 last_vaddr,
988                        u8 kind_v, u32 ctag_offset, bool cacheable)
989 {
990         int err;
991         u32 pde_lo, pde_hi, pde_i;
992         struct scatterlist *cur_chunk;
993         unsigned int cur_offset;
994         u32 pte_w[2] = {0, 0}; /* invalid pte */
995         u32 ctag = ctag_offset;
996         u32 ctag_ptes, ctag_pte_cnt;
997         u32 page_shift = gmmu_page_shifts[pgsz_idx];
998
999         pde_range_from_vaddr_range(vm, first_vaddr, last_vaddr,
1000                                    &pde_lo, &pde_hi);
1001
1002         nvhost_dbg(dbg_pte, "size_idx=%d, pde_lo=%d, pde_hi=%d",
1003                    pgsz_idx, pde_lo, pde_hi);
1004
1005         ctag_ptes = COMP_TAG_LINE_SIZE >> page_shift;
1006
1007         if (sgt)
1008                 cur_chunk = sgt->sgl;
1009         else
1010                 cur_chunk = NULL;
1011
1012         cur_offset = 0;
1013         ctag_pte_cnt = 0;
1014         for (pde_i = pde_lo; pde_i <= pde_hi; pde_i++) {
1015                 u32 pte_lo, pte_hi;
1016                 u32 pte_cur;
1017                 u32 pte_space_page_cur, pte_space_offset_cur;
1018                 u32 pte_space_page_offset;
1019                 void *pte_kv_cur;
1020
1021                 struct page_table_gk20a *pte = vm->pdes.ptes[pgsz_idx] + pde_i;
1022
1023                 if (pde_i == pde_lo)
1024                         pte_lo = pte_index_from_vaddr(vm, first_vaddr,
1025                                                       pgsz_idx);
1026                 else
1027                         pte_lo = 0;
1028
1029                 if ((pde_i != pde_hi) && (pde_hi != pde_lo))
1030                         pte_hi = vm->mm->page_table_sizing[pgsz_idx].num_ptes-1;
1031                 else
1032                         pte_hi = pte_index_from_vaddr(vm, last_vaddr,
1033                                                       pgsz_idx);
1034
1035                 /* need to worry about crossing pages when accessing the ptes */
1036                 pte_space_page_offset_from_index(pte_lo, &pte_space_page_cur,
1037                                                  &pte_space_offset_cur);
1038                 err = map_gmmu_pages(pte->ref, pte->sgt, &pte_kv_cur);
1039                 if (err) {
1040                         nvhost_err(dev_from_vm(vm),
1041                                    "couldn't map ptes for update");
1042                         goto clean_up;
1043                 }
1044
1045                 nvhost_dbg(dbg_pte, "pte_lo=%d, pte_hi=%d", pte_lo, pte_hi);
1046                 for (pte_cur = pte_lo; pte_cur <= pte_hi; pte_cur++) {
1047                         pte_space_page_offset = pte_cur;
1048                         if (ctag) {
1049                                 if (ctag_pte_cnt >= ctag_ptes) {
1050                                         ctag++;
1051                                         ctag_pte_cnt = 0;
1052                                 }
1053                                 ctag_pte_cnt++;
1054                         }
1055
1056                         if (likely(sgt)) {
1057                                 u64 addr = gk20a_mm_iova_addr(cur_chunk);
1058                                 addr += cur_offset;
1059
1060                                 nvhost_dbg(dbg_pte,
1061                                    "pte_cur=%d addr=0x%08llx kind=%d ctag=%d",
1062                                    pte_cur, addr, kind_v, ctag);
1063
1064                                 addr >>= gmmu_pte_address_shift_v();
1065                                 pte_w[0] = gmmu_pte_valid_true_f() |
1066                                         gmmu_pte_address_sys_f(addr);
1067                                 pte_w[1] = gmmu_pte_aperture_video_memory_f() |
1068                                         gmmu_pte_kind_f(kind_v) |
1069                                         gmmu_pte_comptagline_f(ctag);
1070
1071                                 nvhost_dbg(dbg_pte, "\t0x%x,%x",
1072                                            pte_w[1], pte_w[0]);
1073
1074                                 if (!cacheable)
1075                                         pte_w[1] |= gmmu_pte_vol_true_f();
1076
1077                                 cur_offset += 1 << page_shift;
1078                                 while (cur_chunk &&
1079                                         cur_offset >= cur_chunk->length) {
1080                                         cur_offset -= cur_chunk->length;
1081                                         cur_chunk = sg_next(cur_chunk);
1082                                 }
1083                                 pte->ref_cnt++;
1084                         } else {
1085                                 pte->ref_cnt--;
1086                         }
1087
1088                         nvhost_dbg(dbg_pte,
1089                            "vm %p, pte[1]=0x%x, pte[0]=0x%x, ref_cnt=%d",
1090                            vm, pte_w[1], pte_w[0], pte->ref_cnt);
1091
1092                         mem_wr32(pte_kv_cur + pte_space_page_offset*8, 0,
1093                                  pte_w[0]);
1094                         mem_wr32(pte_kv_cur + pte_space_page_offset*8, 1,
1095                                  pte_w[1]);
1096                 }
1097
1098                 __cpuc_flush_dcache_area(pte_kv_cur, PAGE_SIZE);
1099
1100                 unmap_gmmu_pages(pte->ref, pte->sgt, pte_kv_cur);
1101
1102                 if (pte->ref_cnt == 0) {
1103                         /* It can make sense to keep around one page table for
1104                          * each flavor (empty)... in case a new map is coming
1105                          * right back to alloc (and fill it in) again.
1106                          * But: deferring unmapping should help with pathologic
1107                          * unmap/map/unmap/map cases where we'd trigger pte
1108                          * free/alloc/free/alloc.
1109                          */
1110                         free_gmmu_pages(vm, pte->ref, pte->sgt,
1111                                 vm->mm->page_table_sizing[pgsz_idx].order);
1112                         pte->ref = NULL;
1113
1114                         /* rewrite pde */
1115                         update_gmmu_pde(vm, pde_i);
1116                 }
1117
1118         }
1119
1120         smp_mb();
1121         vm->tlb_dirty = true;
1122         nvhost_dbg_fn("set tlb dirty");
1123         return 0;
1124
1125 clean_up:
1126         /*TBD: potentially rewrite above to pre-map everything it needs to
1127          * as that's the only way it can fail */
1128         return err;
1129
1130 }
1131
1132
1133 /* for gk20a the "video memory" apertures here are misnomers. */
1134 static inline u32 big_valid_pde0_bits(u64 pte_addr)
1135 {
1136         u32 pde0_bits =
1137                 gmmu_pde_aperture_big_video_memory_f() |
1138                 gmmu_pde_address_big_sys_f(
1139                            (u32)(pte_addr >> gmmu_pde_address_shift_v()));
1140         return  pde0_bits;
1141 }
1142 static inline u32 small_valid_pde1_bits(u64 pte_addr)
1143 {
1144         u32 pde1_bits =
1145                 gmmu_pde_aperture_small_video_memory_f() |
1146                 gmmu_pde_vol_small_true_f() | /* tbd: why? */
1147                 gmmu_pde_address_small_sys_f(
1148                            (u32)(pte_addr >> gmmu_pde_address_shift_v()));
1149         return pde1_bits;
1150 }
1151
1152 /* Given the current state of the ptes associated with a pde,
1153    determine value and write it out.  There's no checking
1154    here to determine whether or not a change was actually
1155    made.  So, superfluous updates will cause unnecessary
1156    pde invalidations.
1157 */
1158 static void update_gmmu_pde(struct vm_gk20a *vm, u32 i)
1159 {
1160         bool small_valid, big_valid;
1161         u64 pte_addr[2] = {0, 0};
1162         struct page_table_gk20a *small_pte =
1163                 vm->pdes.ptes[gmmu_page_size_small] + i;
1164         struct page_table_gk20a *big_pte =
1165                 vm->pdes.ptes[gmmu_page_size_big] + i;
1166         u32 pde_v[2] = {0, 0};
1167         u32 *pde;
1168
1169         small_valid = small_pte && small_pte->ref;
1170         big_valid   = big_pte && big_pte->ref;
1171
1172         if (small_valid)
1173                 pte_addr[gmmu_page_size_small] =
1174                         sg_phys(small_pte->sgt->sgl);
1175         if (big_valid)
1176                 pte_addr[gmmu_page_size_big] =
1177                         sg_phys(big_pte->sgt->sgl);
1178
1179         pde_v[0] = gmmu_pde_size_full_f();
1180         pde_v[0] |= big_valid ?
1181                 big_valid_pde0_bits(pte_addr[gmmu_page_size_big])
1182                 :
1183                 (gmmu_pde_aperture_big_invalid_f());
1184
1185         pde_v[1] |= (small_valid ?
1186                      small_valid_pde1_bits(pte_addr[gmmu_page_size_small])
1187                      :
1188                      (gmmu_pde_aperture_small_invalid_f() |
1189                       gmmu_pde_vol_small_false_f())
1190                      )
1191                 |
1192                 (big_valid ? (gmmu_pde_vol_big_true_f()) :
1193                  gmmu_pde_vol_big_false_f());
1194
1195         pde = pde_from_index(vm, i);
1196
1197         mem_wr32(pde, 0, pde_v[0]);
1198         mem_wr32(pde, 1, pde_v[1]);
1199
1200         smp_mb();
1201         __cpuc_flush_dcache_area(pde, sizeof(u32) * 2);
1202
1203
1204         nvhost_dbg(dbg_pte, "pde:%d = 0x%x,0x%08x\n", i, pde_v[1], pde_v[0]);
1205         vm->pdes.dirty = true;
1206         vm->tlb_dirty  = true;
1207 }
1208
1209
1210 /* return mem_mgr and mem_handle to caller. If the mem_handle is a kernel dup
1211    from user space (as_ioctl), caller releases the kernel duplicated handle */
1212 static void gk20a_vm_unmap_user(struct vm_gk20a *vm, u64 offset,
1213                         struct mem_mgr **memmgr, struct mem_handle **r)
1214 {
1215         struct mapped_buffer_node *mapped_buffer;
1216         struct gk20a *g = gk20a_from_vm(vm);
1217         struct nvhost_allocator *comp_tags = &g->gr.comp_tags;
1218         int err = 0;
1219
1220         BUG_ON(memmgr == NULL || r == NULL);
1221
1222         *memmgr = NULL;
1223         *r = NULL;
1224
1225         nvhost_dbg_fn("offset=0x%llx", offset);
1226
1227         mapped_buffer = find_mapped_buffer(&vm->mapped_buffers, offset);
1228         if (!mapped_buffer) {
1229                 nvhost_dbg(dbg_err, "invalid addr to unmap 0x%llx", offset);
1230                 return;
1231         }
1232
1233         vm->free_va(vm, mapped_buffer->addr, mapped_buffer->size,
1234                     mapped_buffer->pgsz_idx);
1235
1236         if (mapped_buffer->ctag_offset)
1237                 comp_tags->free(comp_tags,
1238                         mapped_buffer->ctag_offset, mapped_buffer->ctag_lines);
1239
1240         /* unmap here needs to know the page size we assigned at mapping */
1241         err = update_gmmu_ptes(vm,
1242                                mapped_buffer->pgsz_idx,
1243                                0, /* n/a for unmap */
1244                                mapped_buffer->addr,
1245                                mapped_buffer->addr + mapped_buffer->size - 1,
1246                                0, 0, false /* n/a for unmap */);
1247
1248         /* detect which if any pdes/ptes can now be released */
1249
1250         if (err)
1251                 dev_err(dev_from_vm(vm),
1252                         "failed to update gmmu ptes on unmap");
1253
1254 #ifdef CONFIG_TEGRA_IOMMU_SMMU
1255         if (sg_dma_address(mapped_buffer->sgt->sgl)) {
1256                 nvhost_dbg(dbg_pte, "unmap from SMMU addr %08llx",
1257                            (u64)sg_dma_address(mapped_buffer->sgt->sgl));
1258                 nvhost_memmgr_smmu_unmap(mapped_buffer->sgt,
1259                                          mapped_buffer->size,
1260                                          dev_from_vm(vm));
1261         }
1262 #endif
1263         nvhost_memmgr_free_sg_table(mapped_buffer->memmgr,
1264                         mapped_buffer->handle_ref, mapped_buffer->sgt);
1265
1266         /* remove from mapped buffer tree, free */
1267         rb_erase(&mapped_buffer->node, &vm->mapped_buffers);
1268
1269         *memmgr = mapped_buffer->memmgr;
1270         *r = mapped_buffer->handle_ref;
1271         kfree(mapped_buffer);
1272
1273         return;
1274 }
1275
1276 /* called by kernel. mem_mgr and mem_handle are ignored */
1277 static void gk20a_vm_unmap(struct vm_gk20a *vm, u64 offset)
1278 {
1279         struct mem_mgr *memmgr;
1280         struct mem_handle *r;
1281
1282         gk20a_vm_unmap_user(vm, offset, &memmgr, &r);
1283 }
1284
1285 void gk20a_vm_remove_support(struct vm_gk20a *vm)
1286 {
1287         struct mapped_buffer_node *mapped_buffer;
1288         struct rb_node *node;
1289         struct mem_mgr *memmgr;
1290         struct mem_handle *r;
1291
1292         nvhost_dbg_fn("");
1293
1294         /* TBD: add a flag here for the unmap code to recognize teardown
1295          * and short-circuit any otherwise expensive operations. */
1296
1297         node = rb_first(&vm->mapped_buffers);
1298         while (node) {
1299                 mapped_buffer =
1300                         container_of(node, struct mapped_buffer_node, node);
1301                 vm->unmap_user(vm, mapped_buffer->addr, &memmgr, &r);
1302                 if (memmgr != mem_mgr_from_vm(vm)) {
1303                         nvhost_memmgr_put(memmgr, r);
1304                         nvhost_memmgr_put_mgr(memmgr);
1305                 }
1306                 node = rb_first(&vm->mapped_buffers);
1307         }
1308
1309         /* TBD: unmapping all buffers above may not actually free
1310          * all vm ptes.  jettison them here for certain... */
1311
1312         unmap_gmmu_pages(vm->pdes.ref, vm->pdes.sgt, vm->pdes.kv);
1313         free_gmmu_pages(vm, vm->pdes.ref, vm->pdes.sgt, 0);
1314
1315         kfree(vm->pdes.ptes[gmmu_page_size_small]);
1316         kfree(vm->pdes.ptes[gmmu_page_size_big]);
1317         nvhost_allocator_destroy(&vm->vma[gmmu_page_size_small]);
1318         nvhost_allocator_destroy(&vm->vma[gmmu_page_size_big]);
1319 }
1320
1321 /* address space interfaces for the gk20a module */
1322 static int gk20a_as_alloc_share(struct nvhost_as_share *as_share)
1323 {
1324         struct nvhost_as *as = as_share->as;
1325         struct gk20a *gk20a = get_gk20a(as->ch->dev);
1326         struct mm_gk20a *mm = &gk20a->mm;
1327         struct vm_gk20a *vm;
1328         u64 vma_size;
1329         u32 num_pages;
1330         char name[32];
1331         int err;
1332
1333         nvhost_dbg_fn("");
1334
1335         vm = kzalloc(sizeof(*vm), GFP_KERNEL);
1336         if (!vm)
1337                 return -ENOMEM;
1338
1339         as_share->priv = (void *)vm;
1340
1341         vm->mm = mm;
1342         vm->as_share = as_share;
1343
1344         vm->big_pages = true;
1345
1346         vm->va_start  = 0; /* we have a one page hole though so zeros fault*/
1347         vm->va_limit  = mm->channel.size;
1348
1349         {
1350                 u32 pde_lo, pde_hi;
1351                 pde_range_from_vaddr_range(vm,
1352                                            0, vm->va_limit-1,
1353                                            &pde_lo, &pde_hi);
1354                 vm->pdes.num_pdes = pde_hi + 1;
1355         }
1356
1357         vm->pdes.ptes[gmmu_page_size_small] =
1358                 kzalloc(sizeof(struct page_table_gk20a) *
1359                         vm->pdes.num_pdes, GFP_KERNEL);
1360
1361         vm->pdes.ptes[gmmu_page_size_big] =
1362                 kzalloc(sizeof(struct page_table_gk20a) *
1363                         vm->pdes.num_pdes, GFP_KERNEL);
1364
1365         if (!(vm->pdes.ptes[gmmu_page_size_small] &&
1366               vm->pdes.ptes[gmmu_page_size_big]))
1367                 return -ENOMEM;
1368
1369         nvhost_dbg_info("init space for va_limit=0x%llx num_pdes=%d",
1370                    vm->va_limit, vm->pdes.num_pdes);
1371
1372         /* allocate the page table directory */
1373         err = alloc_gmmu_pages(vm, 0, &vm->pdes.ref,
1374                                &vm->pdes.sgt);
1375         if (err)
1376                 return -ENOMEM;
1377
1378         err = map_gmmu_pages(vm->pdes.ref, vm->pdes.sgt, &vm->pdes.kv);
1379         if (err) {
1380                 free_gmmu_pages(vm, vm->pdes.ref, vm->pdes.sgt, 0);
1381                 return -ENOMEM;
1382         }
1383         nvhost_dbg(dbg_pte, "pdes.kv = 0x%p, pdes.phys = 0x%llx",
1384                         vm->pdes.kv, (u64)sg_phys(vm->pdes.sgt->sgl));
1385         /* we could release vm->pdes.kv but it's only one page... */
1386
1387
1388         /* low-half: alloc small pages */
1389         /* high-half: alloc big pages */
1390         vma_size = mm->channel.size >> 1;
1391
1392         snprintf(name, sizeof(name), "gk20a_as_%d-%dKB", as_share->id,
1393                  gmmu_page_sizes[gmmu_page_size_small]>>10);
1394
1395         num_pages = (u32)(vma_size >> gmmu_page_shifts[gmmu_page_size_small]);
1396         /* note the "1" below as the start for the allocator. *
1397          * it's what keeps us from using the zero-page        */
1398         nvhost_allocator_init(&vm->vma[gmmu_page_size_small], name,
1399               1, /* start */
1400               num_pages - 1, /* length */
1401               1); /* align */
1402
1403
1404         snprintf(name, sizeof(name), "gk20a_as_%d-%dKB", as_share->id,
1405                  gmmu_page_sizes[gmmu_page_size_big]>>10);
1406         num_pages = (u32)(vma_size >> gmmu_page_shifts[gmmu_page_size_big]);
1407         nvhost_allocator_init(&vm->vma[gmmu_page_size_big], name,
1408                               num_pages, /* start */
1409                               num_pages, /* length */
1410                               1); /* align */
1411
1412         vm->mapped_buffers = RB_ROOT;
1413
1414         vm->alloc_va       = gk20a_vm_alloc_va;
1415         vm->free_va        = gk20a_vm_free_va;
1416         vm->map            = gk20a_vm_map;
1417         vm->unmap          = gk20a_vm_unmap;
1418         vm->unmap_user     = gk20a_vm_unmap_user;
1419         vm->tlb_inval      = gk20a_mm_tlb_invalidate;
1420         vm->remove_support = gk20a_vm_remove_support;
1421
1422         vm->enable_ctag = true;
1423
1424         return 0;
1425 }
1426
1427
1428 static int gk20a_as_release_share(struct nvhost_as_share *as_share)
1429 {
1430         struct vm_gk20a *vm = (struct vm_gk20a *)as_share->priv;
1431
1432         nvhost_dbg_fn("");
1433
1434         gk20a_vm_remove_support(vm);
1435
1436         as_share->priv = NULL;
1437         kfree(vm);
1438
1439         return 0;
1440 }
1441
1442
1443 static int gk20a_as_alloc_space(struct nvhost_as_share *as_share,
1444                                 struct nvhost_as_alloc_space_args *args)
1445
1446 {       int err = -ENOMEM;
1447         int pgsz_idx;
1448         u32 start_page_nr;
1449         struct nvhost_allocator *vma;
1450         struct vm_gk20a *vm = (struct vm_gk20a *)as_share->priv;
1451
1452         nvhost_dbg_fn("flags=0x%x pgsz=0x%x nr_pages=0x%x o/a=0x%llx",
1453                         args->flags, args->page_size, args->pages,
1454                         args->o_a.offset);
1455
1456         /* determine pagesz idx */
1457         for (pgsz_idx = gmmu_page_size_small;
1458              pgsz_idx < gmmu_nr_page_sizes;
1459              pgsz_idx++) {
1460                 if (gmmu_page_sizes[pgsz_idx] == args->page_size)
1461                         break;
1462         }
1463
1464         if (pgsz_idx >= gmmu_nr_page_sizes) {
1465                 err = -EINVAL;
1466                 goto clean_up;
1467         }
1468
1469         start_page_nr = ~(u32)0;
1470         if (args->flags & NVHOST_AS_ALLOC_SPACE_FLAGS_FIXED_OFFSET)
1471                 start_page_nr = (u32)(args->o_a.offset >>
1472                                       gmmu_page_shifts[pgsz_idx]);
1473
1474         vma = &vm->vma[pgsz_idx];
1475         err = vma->alloc(vma, &start_page_nr, args->pages);
1476         args->o_a.offset = start_page_nr;
1477
1478  clean_up:
1479         return err;
1480 }
1481
1482 static int gk20a_as_free_space(struct nvhost_as_share *as_share,
1483                                struct nvhost_as_free_space_args *args)
1484 {
1485         int err = -ENOMEM;
1486         int pgsz_idx;
1487         u32 start_page_nr;
1488         struct nvhost_allocator *vma;
1489         struct vm_gk20a *vm = (struct vm_gk20a *)as_share->priv;
1490
1491         nvhost_dbg_fn("pgsz=0x%x nr_pages=0x%x o/a=0x%llx", args->page_size,
1492                         args->pages, args->offset);
1493
1494         /* determine pagesz idx */
1495         for (pgsz_idx = gmmu_page_size_small;
1496              pgsz_idx < gmmu_nr_page_sizes;
1497              pgsz_idx++) {
1498                 if (gmmu_page_sizes[pgsz_idx] == args->page_size)
1499                         break;
1500         }
1501
1502         if (pgsz_idx >= gmmu_nr_page_sizes) {
1503                 err = -EINVAL;
1504                 goto clean_up;
1505         }
1506
1507         start_page_nr = (u32)(args->offset >>
1508                               gmmu_page_shifts[pgsz_idx]);
1509
1510         vma = &vm->vma[pgsz_idx];
1511         err = vma->free(vma, start_page_nr, args->pages);
1512
1513 clean_up:
1514         return err;
1515 }
1516
1517 static int gk20a_as_bind_hwctx(struct nvhost_as_share *as_share,
1518                                struct nvhost_hwctx *hwctx)
1519 {
1520         int err = 0;
1521         struct vm_gk20a *vm = (struct vm_gk20a *)as_share->priv;
1522         struct channel_gk20a *c = hwctx->priv;
1523
1524         nvhost_dbg_fn("");
1525
1526         c->vm = vm;
1527         err = channel_gk20a_commit_va(c);
1528         if (err)
1529                 c->vm = 0;
1530
1531         return err;
1532 }
1533
1534 static int gk20a_as_map_buffer(struct nvhost_as_share *as_share,
1535                                struct mem_mgr *nvmap,
1536                                struct mem_handle *r,
1537                                u64 *offset_align,
1538                                u32 flags /*NVHOST_AS_MAP_BUFFER_FLAGS_*/)
1539 {
1540         int err = 0;
1541         struct vm_gk20a *vm = (struct vm_gk20a *)as_share->priv;
1542         u64 ret_va;
1543
1544         nvhost_dbg_fn("");
1545
1546         ret_va = vm->map(vm, nvmap, r, *offset_align,
1547                         flags, 0/*no kind here, to be removed*/, NULL);
1548         *offset_align = ret_va;
1549         if (!ret_va)
1550                 err = -EINVAL;
1551
1552         return err;
1553
1554 }
1555
1556 static int gk20a_as_unmap_buffer(struct nvhost_as_share *as_share, u64 offset,
1557                                  struct mem_mgr **memmgr, struct mem_handle **r)
1558 {
1559         struct vm_gk20a *vm = (struct vm_gk20a *)as_share->priv;
1560         int err = 0;
1561         struct nvhost_hwctx *hwctx;
1562         struct channel_gk20a *ch;
1563         struct list_head *pos;
1564         unsigned long timeout = CONFIG_TEGRA_GRHOST_DEFAULT_TIMEOUT;
1565
1566         nvhost_dbg_fn("");
1567
1568         if (!tegra_platform_is_silicon())
1569                 timeout = MAX_SCHEDULE_TIMEOUT;
1570
1571         /* User mode clients expect to be able to cleanly free a buffers after
1572          * launching work against them.  To avoid causing mmu faults we wait
1573          * for all pending work with respect to the share to clear before
1574          * unmapping the pages...
1575          * Note: the finish call below takes care to wait only if necessary.
1576          * So only the first in a series of unmappings will cause a wait for
1577          * idle.
1578          */
1579         /* TODO: grab bound list lock, release during wait */
1580         /* TODO: even better: schedule deferred (finish,unmap) and return
1581          * immediately */
1582         list_for_each(pos, &as_share->bound_list) {
1583                 hwctx = container_of(pos, struct nvhost_hwctx,
1584                                      as_share_bound_list_node);
1585                 if (likely(!hwctx->has_timedout)) {
1586                         ch =  (struct channel_gk20a *)hwctx->priv;
1587                         BUG_ON(!ch);
1588                         err = gk20a_channel_finish(ch, timeout);
1589                         if (err)
1590                                 break;
1591                 }
1592         }
1593
1594         if (!err)
1595                 vm->unmap_user(vm, offset, memmgr, r);
1596
1597         return err;
1598 }
1599
1600
1601 const struct nvhost_as_moduleops gk20a_as_moduleops = {
1602         .alloc_share   = gk20a_as_alloc_share,
1603         .release_share = gk20a_as_release_share,
1604         .alloc_space   = gk20a_as_alloc_space,
1605         .free_space    = gk20a_as_free_space,
1606         .bind_hwctx    = gk20a_as_bind_hwctx,
1607         .map_buffer    = gk20a_as_map_buffer,
1608         .unmap_buffer  = gk20a_as_unmap_buffer,
1609 };
1610
1611 int gk20a_init_bar1_vm(struct mm_gk20a *mm)
1612 {
1613         int err;
1614         struct mem_mgr *nvmap = mem_mgr_from_mm(mm);
1615         phys_addr_t inst_pa;
1616         void *inst_ptr;
1617         struct vm_gk20a *vm = &mm->bar1.vm;
1618         struct inst_desc *inst_block = &mm->bar1.inst_block;
1619         phys_addr_t pde_addr;
1620         u32 pde_addr_lo;
1621         u32 pde_addr_hi;
1622
1623         vm->mm = mm;
1624
1625         mm->bar1.aperture_size = bar1_aperture_size_mb_gk20a() << 20;
1626
1627         nvhost_dbg_info("bar1 vm size = 0x%x", mm->bar1.aperture_size);
1628
1629         vm->va_start = mm->pde_stride * 1;
1630         vm->va_limit = mm->bar1.aperture_size;
1631
1632         {
1633                 u32 pde_lo, pde_hi;
1634                 pde_range_from_vaddr_range(vm,
1635                                            0, vm->va_limit-1,
1636                                            &pde_lo, &pde_hi);
1637                 vm->pdes.num_pdes = pde_hi + 1;
1638         }
1639
1640         /* bar1 is likely only to ever use/need small page sizes. */
1641         /* But just in case, for now... arrange for both.*/
1642         vm->pdes.ptes[gmmu_page_size_small] =
1643                 kzalloc(sizeof(struct page_table_gk20a) *
1644                         vm->pdes.num_pdes, GFP_KERNEL);
1645
1646         vm->pdes.ptes[gmmu_page_size_big] =
1647                 kzalloc(sizeof(struct page_table_gk20a) *
1648                         vm->pdes.num_pdes, GFP_KERNEL);
1649
1650         if (!(vm->pdes.ptes[gmmu_page_size_small] &&
1651               vm->pdes.ptes[gmmu_page_size_big]))
1652                 return -ENOMEM;
1653
1654         nvhost_dbg_info("init space for bar1 va_limit=0x%llx num_pdes=%d",
1655                    vm->va_limit, vm->pdes.num_pdes);
1656
1657
1658         /* allocate the page table directory */
1659         err = alloc_gmmu_pages(vm, 0, &vm->pdes.ref,
1660                                &vm->pdes.sgt);
1661         if (err)
1662                 goto clean_up;
1663
1664         err = map_gmmu_pages(vm->pdes.ref, vm->pdes.sgt, &vm->pdes.kv);
1665         if (err) {
1666                 free_gmmu_pages(vm, vm->pdes.ref, vm->pdes.sgt, 0);
1667                 goto clean_up;
1668         }
1669         nvhost_dbg(dbg_pte, "bar 1 pdes.kv = 0x%p, pdes.phys = 0x%llx",
1670                         vm->pdes.kv, (u64)sg_phys(vm->pdes.sgt->sgl));
1671         /* we could release vm->pdes.kv but it's only one page... */
1672
1673         pde_addr = sg_phys(vm->pdes.sgt->sgl);
1674         pde_addr_lo = u64_lo32(pde_addr) >> 12;
1675         pde_addr_hi = u64_hi32(pde_addr);
1676
1677         nvhost_dbg_info("pde pa=0x%llx pde_addr_lo=0x%x pde_addr_hi=0x%x",
1678                 (u64)sg_phys(vm->pdes.sgt->sgl), pde_addr_lo, pde_addr_hi);
1679
1680         /* allocate instance mem for bar1 */
1681         inst_block->mem.size = ram_in_alloc_size_v();
1682         inst_block->mem.ref =
1683                 nvhost_memmgr_alloc(nvmap, inst_block->mem.size,
1684                                     DEFAULT_ALLOC_ALIGNMENT,
1685                                     DEFAULT_ALLOC_FLAGS,
1686                                     0);
1687
1688         if (IS_ERR(inst_block->mem.ref)) {
1689                 inst_block->mem.ref = 0;
1690                 err = -ENOMEM;
1691                 goto clean_up;
1692         }
1693
1694         inst_block->mem.sgt = nvhost_memmgr_sg_table(nvmap,
1695                         inst_block->mem.ref);
1696         /* IS_ERR throws a warning here (expecting void *) */
1697         if (IS_ERR_OR_NULL(inst_block->mem.sgt)) {
1698                 inst_pa = 0;
1699                 err = (int)inst_pa;
1700                 goto clean_up;
1701         }
1702         inst_pa = sg_phys(inst_block->mem.sgt->sgl);
1703
1704         inst_ptr = nvhost_memmgr_mmap(inst_block->mem.ref);
1705         if (IS_ERR(inst_ptr)) {
1706                 return -ENOMEM;
1707                 goto clean_up;
1708         }
1709
1710         nvhost_dbg_info("bar1 inst block physical phys = 0x%llx, kv = 0x%p",
1711                 (u64)inst_pa, inst_ptr);
1712
1713         memset(inst_ptr, 0, ram_fc_size_val_v());
1714
1715         mem_wr32(inst_ptr, ram_in_page_dir_base_lo_w(),
1716                 ram_in_page_dir_base_target_vid_mem_f() |
1717                 ram_in_page_dir_base_vol_true_f() |
1718                 ram_in_page_dir_base_lo_f(pde_addr_lo));
1719
1720         mem_wr32(inst_ptr, ram_in_page_dir_base_hi_w(),
1721                 ram_in_page_dir_base_hi_f(pde_addr_hi));
1722
1723         mem_wr32(inst_ptr, ram_in_adr_limit_lo_w(),
1724                  u64_lo32(vm->va_limit) | 0xFFF);
1725
1726         mem_wr32(inst_ptr, ram_in_adr_limit_hi_w(),
1727                 ram_in_adr_limit_hi_f(u64_hi32(vm->va_limit)));
1728
1729         nvhost_memmgr_munmap(inst_block->mem.ref, inst_ptr);
1730
1731         nvhost_dbg_info("bar1 inst block ptr: %08llx",  (u64)inst_pa);
1732         nvhost_allocator_init(&vm->vma[gmmu_page_size_small], "gk20a_bar1",
1733                 1, (vm->va_limit >> 12) - 1, 1);
1734         /* initialize just in case we try to use it anyway */
1735         nvhost_allocator_init(&vm->vma[gmmu_page_size_big], "gk20a_bar1-unused",
1736                               0x0badc0de, /* start */
1737                               1, /* length */
1738                               1); /* align */
1739
1740
1741         vm->mapped_buffers = RB_ROOT;
1742
1743         vm->alloc_va       = gk20a_vm_alloc_va;
1744         vm->free_va        = gk20a_vm_free_va;
1745         vm->map            = gk20a_vm_map;
1746         vm->unmap          = gk20a_vm_unmap;
1747         vm->unmap_user     = gk20a_vm_unmap_user;
1748         vm->tlb_inval      = gk20a_mm_tlb_invalidate;
1749         vm->remove_support = gk20a_vm_remove_support;
1750
1751         return 0;
1752
1753 clean_up:
1754         /* free, etc */
1755         return err;
1756 }
1757
1758 /* pmu vm, share channel_vm interfaces */
1759 int gk20a_init_pmu_vm(struct mm_gk20a *mm)
1760 {
1761         int err;
1762         struct mem_mgr *nvmap = mem_mgr_from_mm(mm);
1763         phys_addr_t inst_pa;
1764         void *inst_ptr;
1765         struct vm_gk20a *vm = &mm->pmu.vm;
1766         struct inst_desc *inst_block = &mm->pmu.inst_block;
1767         u64 pde_addr;
1768         u32 pde_addr_lo;
1769         u32 pde_addr_hi;
1770
1771         vm->mm = mm;
1772
1773         mm->pmu.aperture_size = GK20A_PMU_VA_SIZE;
1774
1775         nvhost_dbg_info("pmu vm size = 0x%x", mm->pmu.aperture_size);
1776
1777         vm->va_start  = GK20A_PMU_VA_START;
1778         vm->va_limit  = vm->va_start + mm->pmu.aperture_size;
1779
1780         {
1781                 u32 pde_lo, pde_hi;
1782                 pde_range_from_vaddr_range(vm,
1783                                            0, vm->va_limit-1,
1784                                            &pde_lo, &pde_hi);
1785                 vm->pdes.num_pdes = pde_hi + 1;
1786         }
1787
1788         /* The pmu is likely only to ever use/need small page sizes. */
1789         /* But just in case, for now... arrange for both.*/
1790         vm->pdes.ptes[gmmu_page_size_small] =
1791                 kzalloc(sizeof(struct page_table_gk20a) *
1792                         vm->pdes.num_pdes, GFP_KERNEL);
1793
1794         vm->pdes.ptes[gmmu_page_size_big] =
1795                 kzalloc(sizeof(struct page_table_gk20a) *
1796                         vm->pdes.num_pdes, GFP_KERNEL);
1797
1798         if (!(vm->pdes.ptes[gmmu_page_size_small] &&
1799               vm->pdes.ptes[gmmu_page_size_big]))
1800                 return -ENOMEM;
1801
1802         nvhost_dbg_info("init space for pmu va_limit=0x%llx num_pdes=%d",
1803                    vm->va_limit, vm->pdes.num_pdes);
1804
1805         /* allocate the page table directory */
1806         err = alloc_gmmu_pages(vm, 0, &vm->pdes.ref,
1807                                &vm->pdes.sgt);
1808         if (err)
1809                 goto clean_up;
1810
1811         err = map_gmmu_pages(vm->pdes.ref, vm->pdes.sgt, &vm->pdes.kv);
1812         if (err) {
1813                 free_gmmu_pages(vm, vm->pdes.ref, vm->pdes.sgt, 0);
1814                 goto clean_up;
1815         }
1816         nvhost_dbg_info("pmu pdes phys @ 0x%llx",
1817                         (u64)sg_phys(vm->pdes.sgt->sgl));
1818         /* we could release vm->pdes.kv but it's only one page... */
1819
1820         pde_addr = sg_phys(vm->pdes.sgt->sgl);
1821         pde_addr_lo = u64_lo32(pde_addr) >> 12;
1822         pde_addr_hi = u64_hi32(pde_addr);
1823
1824         nvhost_dbg_info("pde pa=0x%llx pde_addr_lo=0x%x pde_addr_hi=0x%x",
1825                         (u64)pde_addr, pde_addr_lo, pde_addr_hi);
1826
1827         /* allocate instance mem for pmu */
1828         inst_block->mem.size = GK20A_PMU_INST_SIZE;
1829         inst_block->mem.ref =
1830                 nvhost_memmgr_alloc(nvmap, inst_block->mem.size,
1831                                     DEFAULT_ALLOC_ALIGNMENT,
1832                                     DEFAULT_ALLOC_FLAGS,
1833                                     0);
1834
1835         if (IS_ERR(inst_block->mem.ref)) {
1836                 inst_block->mem.ref = 0;
1837                 err = -ENOMEM;
1838                 goto clean_up;
1839         }
1840
1841         inst_block->mem.sgt = nvhost_memmgr_sg_table(nvmap,
1842                         inst_block->mem.ref);
1843         /* IS_ERR throws a warning here (expecting void *) */
1844         if (IS_ERR_OR_NULL(inst_block->mem.sgt)) {
1845                 inst_pa = 0;
1846                 err = (int)((uintptr_t)inst_block->mem.sgt);
1847                 goto clean_up;
1848         }
1849         inst_pa = sg_phys(inst_block->mem.sgt->sgl);
1850
1851         nvhost_dbg_info("pmu inst block physical addr: 0x%llx", (u64)inst_pa);
1852
1853         inst_ptr = nvhost_memmgr_mmap(inst_block->mem.ref);
1854         if (IS_ERR(inst_ptr)) {
1855                 return -ENOMEM;
1856                 goto clean_up;
1857         }
1858
1859         memset(inst_ptr, 0, GK20A_PMU_INST_SIZE);
1860
1861         mem_wr32(inst_ptr, ram_in_page_dir_base_lo_w(),
1862                 ram_in_page_dir_base_target_vid_mem_f() |
1863                 ram_in_page_dir_base_vol_true_f() |
1864                 ram_in_page_dir_base_lo_f(pde_addr_lo));
1865
1866         mem_wr32(inst_ptr, ram_in_page_dir_base_hi_w(),
1867                 ram_in_page_dir_base_hi_f(pde_addr_hi));
1868
1869         mem_wr32(inst_ptr, ram_in_adr_limit_lo_w(),
1870                  u64_lo32(vm->va_limit) | 0xFFF);
1871
1872         mem_wr32(inst_ptr, ram_in_adr_limit_hi_w(),
1873                 ram_in_adr_limit_hi_f(u64_hi32(vm->va_limit)));
1874
1875         nvhost_memmgr_munmap(inst_block->mem.ref, inst_ptr);
1876
1877         nvhost_allocator_init(&vm->vma[gmmu_page_size_small], "gk20a_pmu",
1878                 (vm->va_start >> 12), (vm->va_limit >> 12) - 1, 1);
1879         /* initialize just in case we try to use it anyway */
1880         nvhost_allocator_init(&vm->vma[gmmu_page_size_big], "gk20a_pmu-unused",
1881                               0x0badc0de, /* start */
1882                               1, /* length */
1883                               1); /* align */
1884
1885
1886         vm->mapped_buffers = RB_ROOT;
1887
1888         vm->alloc_va       = gk20a_vm_alloc_va;
1889         vm->free_va        = gk20a_vm_free_va;
1890         vm->map            = gk20a_vm_map;
1891         vm->unmap          = gk20a_vm_unmap;
1892         vm->unmap_user     = gk20a_vm_unmap_user;
1893         vm->tlb_inval      = gk20a_mm_tlb_invalidate;
1894         vm->remove_support = gk20a_vm_remove_support;
1895
1896         return 0;
1897
1898 clean_up:
1899         /* free, etc */
1900         return err;
1901 }
1902
1903 void gk20a_mm_fb_flush(struct gk20a *g)
1904 {
1905         u32 data;
1906         s32 retry = 100;
1907
1908         nvhost_dbg_fn("");
1909
1910         /* Make sure all previous writes are committed to the L2. There's no
1911            guarantee that writes are to DRAM. This will be a sysmembar internal
1912            to the L2. */
1913         gk20a_writel(g, flush_fb_flush_r(),
1914                 flush_fb_flush_pending_busy_f());
1915
1916         do {
1917                 data = gk20a_readl(g, flush_fb_flush_r());
1918
1919                 if (flush_fb_flush_outstanding_v(data) ==
1920                         flush_fb_flush_outstanding_true_v() ||
1921                     flush_fb_flush_pending_v(data) ==
1922                         flush_fb_flush_pending_busy_v()) {
1923                                 nvhost_dbg_info("fb_flush 0x%x", data);
1924                                 retry--;
1925                                 udelay(20);
1926                 } else
1927                         break;
1928         } while (retry >= 0);
1929
1930         if (retry < 0)
1931                 nvhost_warn(dev_from_gk20a(g),
1932                         "fb_flush too many retries");
1933 }
1934
1935 void gk20a_mm_l2_flush(struct gk20a *g, bool invalidate)
1936 {
1937         u32 data;
1938         s32 retry = 200;
1939
1940         nvhost_dbg_fn("");
1941         /* Flush all dirty lines from the L2 to DRAM. Lines are left in the L2
1942            as clean, so subsequent reads might hit in the L2. */
1943         gk20a_writel(g, flush_l2_flush_dirty_r(),
1944                 flush_l2_flush_dirty_pending_busy_f());
1945
1946         do {
1947                 data = gk20a_readl(g, flush_l2_flush_dirty_r());
1948
1949                 if (flush_l2_flush_dirty_outstanding_v(data) ==
1950                         flush_l2_flush_dirty_outstanding_true_v() ||
1951                     flush_l2_flush_dirty_pending_v(data) ==
1952                         flush_l2_flush_dirty_pending_busy_v()) {
1953                                 nvhost_dbg_info("l2_flush_dirty 0x%x", data);
1954                                 retry--;
1955                                 udelay(20);
1956                 } else
1957                         break;
1958         } while (retry >= 0);
1959
1960         if (retry < 0)
1961                 nvhost_warn(dev_from_gk20a(g),
1962                         "l2_flush_dirty too many retries");
1963
1964         if (!invalidate)
1965                 return;
1966
1967         /* Invalidate any clean lines from the L2 so subsequent reads go to
1968            DRAM. Dirty lines are not affected by this operation. */
1969         gk20a_writel(g, flush_l2_system_invalidate_r(),
1970                 flush_l2_system_invalidate_pending_busy_f());
1971
1972         do {
1973                 data = gk20a_readl(g, flush_l2_system_invalidate_r());
1974
1975                 if (flush_l2_system_invalidate_outstanding_v(data) ==
1976                         flush_l2_system_invalidate_outstanding_true_v() ||
1977                     flush_l2_system_invalidate_pending_v(data) ==
1978                         flush_l2_system_invalidate_pending_busy_v()) {
1979                                 nvhost_dbg_info("l2_system_invalidate 0x%x", data);
1980                                 retry--;
1981                                 udelay(20);
1982                 } else
1983                         break;
1984         } while (retry >= 0);
1985
1986         if (retry < 0)
1987                 nvhost_warn(dev_from_gk20a(g),
1988                         "l2_system_invalidate too many retries");
1989 }
1990
1991 static void gk20a_mm_tlb_invalidate(struct vm_gk20a *vm)
1992 {
1993         struct gk20a *g = gk20a_from_vm(vm);
1994         u32 addr_lo = u64_lo32(sg_phys(vm->pdes.sgt->sgl) >> 12);
1995         u32 data;
1996         s32 retry = 200;
1997
1998         /* pagetables are considered sw states which are preserved after
1999            prepare_poweroff. When gk20a deinit releases those pagetables,
2000            common code in vm unmap path calls tlb invalidate that touches
2001            hw. Use the power_on flag to skip tlb invalidation when gpu
2002            power is turned off */
2003         if (!g->power_on)
2004                 return;
2005
2006         nvhost_dbg_fn("");
2007
2008         do {
2009                 data = gk20a_readl(g, fb_mmu_ctrl_r());
2010                 if (fb_mmu_ctrl_pri_fifo_space_v(data) != 0)
2011                         break;
2012                 udelay(20);
2013                 retry--;
2014         } while (retry >= 0);
2015
2016         if (retry < 0)
2017                 nvhost_warn(dev_from_gk20a(g),
2018                         "wait mmu fifo space too many retries");
2019
2020         gk20a_writel(g, fb_mmu_invalidate_pdb_r(),
2021                 fb_mmu_invalidate_pdb_addr_f(addr_lo) |
2022                 fb_mmu_invalidate_pdb_aperture_vid_mem_f());
2023
2024         /* this is a sledgehammer, it would seem */
2025         gk20a_writel(g, fb_mmu_invalidate_r(),
2026                 fb_mmu_invalidate_all_pdb_true_f() |
2027                 fb_mmu_invalidate_all_va_true_f() |
2028                 fb_mmu_invalidate_trigger_true_f());
2029
2030         do {
2031                 data = gk20a_readl(g, fb_mmu_ctrl_r());
2032                 if (fb_mmu_ctrl_pri_fifo_empty_v(data) !=
2033                         fb_mmu_ctrl_pri_fifo_empty_false_f())
2034                         break;
2035                 retry--;
2036                 udelay(20);
2037         } while (retry >= 0);
2038
2039         if (retry < 0)
2040                 nvhost_warn(dev_from_gk20a(g),
2041                         "mmu invalidate too many retries");
2042 }
2043
2044 #if 0 /* VM DEBUG */
2045
2046 /* print pdes/ptes for a gpu virtual address range under a vm */
2047 void gk20a_mm_dump_vm(struct vm_gk20a *vm,
2048                 u64 va_begin, u64 va_end, char *label)
2049 {
2050         struct mem_mgr *client = mem_mgr_from_vm(vm);
2051         struct mm_gk20a *mm = vm->mm;
2052         struct page_table_gk20a *pte_s;
2053         u64 pde_va, pte_va;
2054         u32 pde_i, pde_lo, pde_hi;
2055         u32 pte_i, pte_lo, pte_hi;
2056         u32 pte_space_page_cur, pte_space_offset_cur;
2057         u32 pte_space_page_offset;
2058         u32 num_ptes, page_size;
2059         void *pde, *pte;
2060         phys_addr_t pte_addr;
2061         int err;
2062
2063         pde_range_from_vaddr_range(vm, va_begin, va_end,
2064                         &pde_lo, &pde_hi);
2065
2066         nvhost_err(dev_from_vm(vm),
2067                 "%s page table entries for gpu va 0x%016llx -> 0x%016llx\n",
2068                 label, va_begin, va_end);
2069
2070         for (pde_i = pde_lo; pde_i <= pde_hi; pde_i++) {
2071                 pde = pde_from_index(vm, pde_i);
2072                 pde_va = pde_i * mm->pde_stride;
2073                 nvhost_err(dev_from_vm(vm),
2074                         "\t[0x%016llx -> 0x%016llx] pde @ 0x%08x: 0x%08x, 0x%08x\n",
2075                         pde_va, pde_va + mm->pde_stride - 1,
2076                         sg_phys(vm->pdes.sgt->sgl) + pde_i * gmmu_pde__size_v(),
2077                         mem_rd32(pde, 0), mem_rd32(pde, 1));
2078
2079                 pte_s = vm->pdes.ptes[pte_s->pgsz_idx] + pde_i;
2080
2081                 num_ptes = mm->page_table_sizing[pte_s->pgsz_idx].num_ptes;
2082                 page_size = mm->pde_stride / num_ptes;
2083                 pte_lo = 0;
2084                 pte_hi = num_ptes - 1;
2085
2086                 pte_space_page_offset_from_index(pte_lo,
2087                                                 &pte_space_page_cur,
2088                                                 &pte_space_offset_cur);
2089
2090                 err = map_gmmu_pages(pte_s->ref, pte_s->sgt, &pte);
2091                 pte_s->sgt = nvhost_memmgr_sg_table(client, pte_s->ref);
2092                 if (WARN_ON(IS_ERR(pte_s->sgt)))
2093                         return;
2094                 pte_addr = sg_phys(pte_s->sgt->sgl);
2095
2096                 for (pte_i = pte_lo; pte_i <= pte_hi; pte_i++) {
2097
2098                         pte_va = pde_va + pte_i * page_size;
2099
2100                         if (pte_va < va_begin)
2101                                 continue;
2102                         if (pte_va > va_end)
2103                                 break;
2104
2105                         pte_space_page_offset = pte_i;
2106
2107                         nvhost_err(dev_from_vm(vm),
2108                                 "\t\t[0x%016llx -> 0x%016llx] pte @ 0x%08x : 0x%08x, 0x%08x\n",
2109                                 pte_va, pte_va + page_size - 1,
2110                                 pte_addr + pte_i * gmmu_pte__size_v(),
2111                                 mem_rd32(pte + pte_space_page_offset * 8, 0),
2112                                 mem_rd32(pte + pte_space_page_offset * 8, 1));
2113                 }
2114
2115                 unmap_gmmu_pages(pte_s->ref, pte_s->sgt, pte);
2116         }
2117 }
2118 #endif /* VM DEBUG */
2119
2120 int gk20a_mm_suspend(struct gk20a *g)
2121 {
2122         nvhost_dbg_fn("");
2123
2124         gk20a_mm_fb_flush(g);
2125         gk20a_mm_l2_flush(g, true);
2126
2127         nvhost_dbg_fn("done");
2128         return 0;
2129 }