video: tegra: host: Add sparse texture support
[linux-3.10.git] / drivers / video / tegra / host / gk20a / mm_gk20a.c
1 /*
2  * drivers/video/tegra/host/gk20a/mm_gk20a.c
3  *
4  * GK20A memory management
5  *
6  * Copyright (c) 2011-2014, NVIDIA CORPORATION.  All rights reserved.
7  *
8  * This program is free software; you can redistribute it and/or modify it
9  * under the terms and conditions of the GNU General Public License,
10  * version 2, as published by the Free Software Foundation.
11  *
12  * This program is distributed in the hope it will be useful, but WITHOUT
13  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
15  * more details.
16  *
17  * You should have received a copy of the GNU General Public License along with
18  * this program; if not, write to the Free Software Foundation, Inc.,
19  * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
20  */
21
22 #include <linux/delay.h>
23 #include <linux/highmem.h>
24 #include <linux/log2.h>
25 #include <linux/nvhost.h>
26 #include <linux/scatterlist.h>
27 #include <linux/nvmap.h>
28 #include <linux/tegra-soc.h>
29 #include <asm/cacheflush.h>
30
31 #include "dev.h"
32 #include "nvhost_as.h"
33 #include "gk20a.h"
34 #include "mm_gk20a.h"
35 #include "hw_gmmu_gk20a.h"
36 #include "hw_fb_gk20a.h"
37 #include "hw_bus_gk20a.h"
38 #include "hw_ram_gk20a.h"
39 #include "hw_mc_gk20a.h"
40 #include "hw_flush_gk20a.h"
41 #include "hw_ltc_gk20a.h"
42
43 #include "kind_gk20a.h"
44
45 /*
46  * GPU mapping life cycle
47  * ======================
48  *
49  * Kernel mappings
50  * ---------------
51  *
52  * Kernel mappings are created through vm.map(..., false):
53  *
54  *  - Mappings to the same allocations are reused and refcounted.
55  *  - This path does not support deferred unmapping (i.e. kernel must wait for
56  *    all hw operations on the buffer to complete before unmapping).
57  *  - References to memmgr and mem_handle are owned and managed by the (kernel)
58  *    clients of the gk20a_vm layer.
59  *
60  *
61  * User space mappings
62  * -------------------
63  *
64  * User space mappings are created through as.map_buffer -> vm.map(..., true):
65  *
66  *  - Mappings to the same allocations are reused and refcounted.
67  *  - This path supports deferred unmapping (i.e. we delay the actual unmapping
68  *    until all hw operations have completed).
69  *  - References to memmgr and mem_handle are owned and managed by the vm_gk20a
70  *    layer itself. vm.map acquires these refs, and sets
71  *    mapped_buffer->own_mem_ref to record that we must release the refs when we
72  *    actually unmap.
73  *
74  */
75
76 static inline int vm_aspace_id(struct vm_gk20a *vm)
77 {
78         /* -1 is bar1 or pmu, etc. */
79         return vm->as_share ? vm->as_share->id : -1;
80 }
81 static inline u32 hi32(u64 f)
82 {
83         return (u32)(f >> 32);
84 }
85 static inline u32 lo32(u64 f)
86 {
87         return (u32)(f & 0xffffffff);
88 }
89
90 #define FLUSH_CPU_DCACHE(va, pa, size)  \
91         do {    \
92                 __cpuc_flush_dcache_area((void *)(va), (size_t)(size)); \
93                 outer_flush_range(pa, pa + (size_t)(size));             \
94         } while (0)
95
96 static void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer);
97 static struct mapped_buffer_node *find_mapped_buffer_locked(
98                                         struct rb_root *root, u64 addr);
99 static struct mapped_buffer_node *find_mapped_buffer_reverse_locked(
100                                 struct rb_root *root, struct mem_handle *r);
101 static int update_gmmu_ptes_locked(struct vm_gk20a *vm,
102                                    enum gmmu_pgsz_gk20a pgsz_idx,
103                                    struct sg_table *sgt,
104                                    u64 first_vaddr, u64 last_vaddr,
105                                    u8 kind_v, u32 ctag_offset, bool cacheable,
106                                    int rw_flag);
107 static void update_gmmu_pde_locked(struct vm_gk20a *vm, u32 i);
108 static void gk20a_vm_remove_support(struct vm_gk20a *vm);
109
110
111 /* note: keep the page sizes sorted lowest to highest here */
112 static const u32 gmmu_page_sizes[gmmu_nr_page_sizes] = { SZ_4K, SZ_128K };
113 static const u32 gmmu_page_shifts[gmmu_nr_page_sizes] = { 12, 17 };
114 static const u64 gmmu_page_offset_masks[gmmu_nr_page_sizes] = { 0xfffLL,
115                                                                 0x1ffffLL };
116 static const u64 gmmu_page_masks[gmmu_nr_page_sizes] = { ~0xfffLL, ~0x1ffffLL };
117
118 static int gk20a_init_mm_reset_enable_hw(struct gk20a *g)
119 {
120         nvhost_dbg_fn("");
121         gk20a_reset(g, mc_enable_pfb_enabled_f()
122                         | mc_enable_l2_enabled_f()
123                         | mc_enable_ce2_enabled_f()
124                         | mc_enable_xbar_enabled_f()
125                         | mc_enable_hub_enabled_f());
126
127         return 0;
128 }
129
130 void gk20a_remove_mm_support(struct mm_gk20a *mm)
131 {
132         struct gk20a *g = mm->g;
133         struct device *d = dev_from_gk20a(g);
134         struct vm_gk20a *vm = &mm->bar1.vm;
135         struct inst_desc *inst_block = &mm->bar1.inst_block;
136
137         nvhost_dbg_fn("");
138
139         if (inst_block->cpuva)
140                 dma_free_coherent(d, inst_block->size,
141                         inst_block->cpuva, inst_block->iova);
142         inst_block->cpuva = NULL;
143         inst_block->iova = 0;
144
145         gk20a_vm_remove_support(vm);
146 }
147
148 int gk20a_init_mm_setup_sw(struct gk20a *g)
149 {
150         struct mm_gk20a *mm = &g->mm;
151         int i;
152
153         nvhost_dbg_fn("");
154
155         if (mm->sw_ready) {
156                 nvhost_dbg_fn("skip init");
157                 return 0;
158         }
159
160         mm->g = g;
161         mutex_init(&mm->tlb_lock);
162         mutex_init(&mm->l2_op_lock);
163         mm->big_page_size = gmmu_page_sizes[gmmu_page_size_big];
164         mm->pde_stride    = mm->big_page_size << 10;
165         mm->pde_stride_shift = ilog2(mm->pde_stride);
166         BUG_ON(mm->pde_stride_shift > 31); /* we have assumptions about this */
167
168         for (i = 0; i < ARRAY_SIZE(gmmu_page_sizes); i++) {
169
170                 u32 num_ptes, pte_space, num_pages;
171
172                 /* assuming "full" page tables */
173                 num_ptes = mm->pde_stride / gmmu_page_sizes[i];
174
175                 pte_space = num_ptes * gmmu_pte__size_v();
176                 /* allocate whole pages */
177                 pte_space = roundup(pte_space, PAGE_SIZE);
178
179                 num_pages = pte_space / PAGE_SIZE;
180                 /* make sure "order" is viable */
181                 BUG_ON(!is_power_of_2(num_pages));
182
183                 mm->page_table_sizing[i].num_ptes = num_ptes;
184                 mm->page_table_sizing[i].order = ilog2(num_pages);
185         }
186
187         /*TBD: make channel vm size configurable */
188         mm->channel.size = 1ULL << NV_GMMU_VA_RANGE;
189
190         nvhost_dbg_info("channel vm size: %dMB", (int)(mm->channel.size >> 20));
191
192         nvhost_dbg_info("small page-size (%dKB) pte array: %dKB",
193                         gmmu_page_sizes[gmmu_page_size_small] >> 10,
194                         (mm->page_table_sizing[gmmu_page_size_small].num_ptes *
195                          gmmu_pte__size_v()) >> 10);
196
197         nvhost_dbg_info("big page-size (%dKB) pte array: %dKB",
198                         gmmu_page_sizes[gmmu_page_size_big] >> 10,
199                         (mm->page_table_sizing[gmmu_page_size_big].num_ptes *
200                          gmmu_pte__size_v()) >> 10);
201
202
203         gk20a_init_bar1_vm(mm);
204
205         gk20a_init_uncompressed_kind_map();
206         gk20a_init_kind_attr();
207
208         mm->remove_support = gk20a_remove_mm_support;
209         mm->sw_ready = true;
210
211         nvhost_dbg_fn("done");
212         return 0;
213 }
214
215 /* make sure gk20a_init_mm_support is called before */
216 static int gk20a_init_mm_setup_hw(struct gk20a *g)
217 {
218         struct mm_gk20a *mm = &g->mm;
219         struct inst_desc *inst_block = &mm->bar1.inst_block;
220         phys_addr_t inst_pa = inst_block->cpu_pa;
221
222         nvhost_dbg_fn("");
223
224         /* set large page size in fb
225          * note this is very early on, can we defer it ? */
226         {
227                 u32 fb_mmu_ctrl = gk20a_readl(g, fb_mmu_ctrl_r());
228
229                 if (gmmu_page_sizes[gmmu_page_size_big] == SZ_128K)
230                         fb_mmu_ctrl = (fb_mmu_ctrl &
231                                        ~fb_mmu_ctrl_vm_pg_size_f(~0x0)) |
232                                 fb_mmu_ctrl_vm_pg_size_128kb_f();
233                 else
234                         BUG_ON(1); /* no support/testing for larger ones yet */
235
236                 gk20a_writel(g, fb_mmu_ctrl_r(), fb_mmu_ctrl);
237         }
238
239         inst_pa = (u32)(inst_pa >> bar1_instance_block_shift_gk20a());
240         nvhost_dbg_info("bar1 inst block ptr: 0x%08x",  (u32)inst_pa);
241
242         /* this is very early in init... can we defer this? */
243         {
244                 gk20a_writel(g, bus_bar1_block_r(),
245                              bus_bar1_block_target_vid_mem_f() |
246                              bus_bar1_block_mode_virtual_f() |
247                              bus_bar1_block_ptr_f(inst_pa));
248         }
249
250         nvhost_dbg_fn("done");
251         return 0;
252 }
253
254 int gk20a_init_mm_support(struct gk20a *g)
255 {
256         u32 err;
257
258         err = gk20a_init_mm_reset_enable_hw(g);
259         if (err)
260                 return err;
261
262         err = gk20a_init_mm_setup_sw(g);
263         if (err)
264                 return err;
265
266         err = gk20a_init_mm_setup_hw(g);
267         if (err)
268                 return err;
269
270         return err;
271 }
272
273 #ifdef TEGRA_GRHOST_GK20A_PHYS_PAGE_TABLES
274 static int alloc_gmmu_pages(struct vm_gk20a *vm, u32 order,
275                             void **handle,
276                             struct sg_table **sgt)
277 {
278         u32 num_pages = 1 << order;
279         u32 len = num_pages * PAGE_SIZE;
280         int err;
281         struct page *pages;
282
283         nvhost_dbg_fn("");
284
285         pages = alloc_pages(GFP_KERNEL, order);
286         if (!pages) {
287                 nvhost_dbg(dbg_pte, "alloc_pages failed\n");
288                 goto err_out;
289         }
290         *sgt = kzalloc(sizeof(*sgt), GFP_KERNEL);
291         if (!sgt) {
292                 nvhost_dbg(dbg_pte, "cannot allocate sg table");
293                 goto err_alloced;
294         }
295         err = sg_alloc_table(*sgt, 1, GFP_KERNEL);
296         if (err) {
297                 nvhost_dbg(dbg_pte, "sg_alloc_table failed\n");
298                 goto err_sg_table;
299         }
300         sg_set_page((*sgt)->sgl, pages, len, 0);
301         *handle = page_address(pages);
302         memset(*handle, 0, len);
303         FLUSH_CPU_DCACHE(*handle, sg_phys((*sgt)->sgl), len);
304
305         return 0;
306
307 err_sg_table:
308         kfree(*sgt);
309 err_alloced:
310         __free_pages(pages, order);
311 err_out:
312         return -ENOMEM;
313 }
314
315 static void free_gmmu_pages(struct vm_gk20a *vm, void *handle,
316                             struct sg_table *sgt, u32 order)
317 {
318         nvhost_dbg_fn("");
319         BUG_ON(sgt == NULL);
320         free_pages((unsigned long)handle, order);
321         sg_free_table(sgt);
322         kfree(sgt);
323 }
324
325 static int map_gmmu_pages(void *handle, struct sg_table *sgt, void **va)
326 {
327         FLUSH_CPU_DCACHE(handle, sg_phys(sgt->sgl), sgt->sgl->length);
328         *va = handle;
329         return 0;
330 }
331
332 static void unmap_gmmu_pages(void *handle, struct sg_table *sgt, u32 *va)
333 {
334         FLUSH_CPU_DCACHE(handle, sg_phys(sgt->sgl), sgt->sgl->length);
335 }
336 #else
337 static int alloc_gmmu_pages(struct vm_gk20a *vm, u32 order,
338                             void **handle,
339                             struct sg_table **sgt)
340 {
341         struct mem_mgr *client = mem_mgr_from_vm(vm);
342         struct mem_handle *r;
343         u32 num_pages = 1 << order;
344         u32 len = num_pages * PAGE_SIZE;
345         void *va;
346
347         nvhost_dbg_fn("");
348
349         r = nvhost_memmgr_alloc(client, len,
350                                 DEFAULT_ALLOC_ALIGNMENT,
351                                 DEFAULT_ALLOC_FLAGS,
352                                 0);
353         if (IS_ERR(r)) {
354                 nvhost_dbg(dbg_pte, "nvmap_alloc failed\n");
355                 goto err_out;
356         }
357         va = nvhost_memmgr_mmap(r);
358         if (!va) {
359                 nvhost_dbg(dbg_pte, "nvmap_mmap failed\n");
360                 goto err_alloced;
361         }
362         memset(va, 0, len);
363         nvhost_memmgr_munmap(r, va);
364
365         *sgt = nvhost_memmgr_pin(client, r, dev_from_vm(vm), mem_flag_none);
366         if (IS_ERR(*sgt)) {
367                 *sgt = NULL;
368                 goto err_alloced;
369         }
370
371         *handle = (void *)r;
372
373         return 0;
374
375 err_alloced:
376         nvhost_memmgr_put(client, r);
377 err_out:
378         return -ENOMEM;
379 }
380
381 static void free_gmmu_pages(struct vm_gk20a *vm, void *handle,
382                             struct sg_table *sgt, u32 order)
383 {
384         struct mem_mgr *client = mem_mgr_from_vm(vm);
385         nvhost_dbg_fn("");
386         BUG_ON(sgt == NULL);
387         nvhost_memmgr_unpin(client, handle, dev_from_vm(vm), sgt);
388         nvhost_memmgr_put(client, handle);
389 }
390
391 static int map_gmmu_pages(void *handle, struct sg_table *sgt, void **va)
392 {
393         struct mem_handle *r = handle;
394         u32 *tmp_va;
395
396         nvhost_dbg_fn("");
397
398         tmp_va = nvhost_memmgr_mmap(r);
399         if (!tmp_va)
400                 goto err_out;
401
402         *va = tmp_va;
403         return 0;
404
405 err_out:
406         return -ENOMEM;
407 }
408
409 static void unmap_gmmu_pages(void *handle, struct sg_table *sgt, u32 *va)
410 {
411         struct mem_handle *r = handle;
412         nvhost_dbg_fn("");
413         nvhost_memmgr_munmap(r, va);
414 }
415 #endif
416
417 /* allocate a phys contig region big enough for a full
418  * sized gmmu page table for the given gmmu_page_size.
419  * the whole range is zeroed so it's "invalid"/will fault
420  */
421
422 static int zalloc_gmmu_page_table_gk20a(struct vm_gk20a *vm,
423                                         enum gmmu_pgsz_gk20a gmmu_pgsz_idx,
424                                         struct page_table_gk20a *pte)
425 {
426         int err;
427         u32 pte_order;
428         void *handle;
429         struct sg_table *sgt;
430
431         nvhost_dbg_fn("");
432
433         /* allocate enough pages for the table */
434         pte_order = vm->mm->page_table_sizing[gmmu_pgsz_idx].order;
435
436         err = alloc_gmmu_pages(vm, pte_order, &handle, &sgt);
437         if (err)
438                 return err;
439
440         nvhost_dbg(dbg_pte, "pte = 0x%p, addr=%08llx, size %d",
441                         pte, gk20a_mm_iova_addr(sgt->sgl), pte_order);
442
443         pte->ref = handle;
444         pte->sgt = sgt;
445
446         return 0;
447 }
448
449 /* given address range (inclusive) determine the pdes crossed */
450 static inline void pde_range_from_vaddr_range(struct vm_gk20a *vm,
451                                               u64 addr_lo, u64 addr_hi,
452                                               u32 *pde_lo, u32 *pde_hi)
453 {
454         *pde_lo = (u32)(addr_lo >> vm->mm->pde_stride_shift);
455         *pde_hi = (u32)(addr_hi >> vm->mm->pde_stride_shift);
456         nvhost_dbg(dbg_pte, "addr_lo=0x%llx addr_hi=0x%llx pde_ss=%d",
457                    addr_lo, addr_hi, vm->mm->pde_stride_shift);
458         nvhost_dbg(dbg_pte, "pde_lo=%d pde_hi=%d",
459                    *pde_lo, *pde_hi);
460 }
461
462 static inline u32 *pde_from_index(struct vm_gk20a *vm, u32 i)
463 {
464         return (u32 *) (((u8 *)vm->pdes.kv) + i*gmmu_pde__size_v());
465 }
466
467 static inline u32 pte_index_from_vaddr(struct vm_gk20a *vm,
468                                        u64 addr, enum gmmu_pgsz_gk20a pgsz_idx)
469 {
470         u32 ret;
471         /* mask off pde part */
472         addr = addr & ((((u64)1) << vm->mm->pde_stride_shift) - ((u64)1));
473         /* shift over to get pte index. note assumption that pte index
474          * doesn't leak over into the high 32b */
475         ret = (u32)(addr >> gmmu_page_shifts[pgsz_idx]);
476
477         nvhost_dbg(dbg_pte, "addr=0x%llx pte_i=0x%x", addr, ret);
478         return ret;
479 }
480
481 static inline void pte_space_page_offset_from_index(u32 i, u32 *pte_page,
482                                                     u32 *pte_offset)
483 {
484         /* ptes are 8B regardless of pagesize */
485         /* pte space pages are 4KB. so 512 ptes per 4KB page*/
486         *pte_page = i >> 9;
487
488         /* this offset is a pte offset, not a byte offset */
489         *pte_offset = i & ((1<<9)-1);
490
491         nvhost_dbg(dbg_pte, "i=0x%x pte_page=0x%x pte_offset=0x%x",
492                    i, *pte_page, *pte_offset);
493 }
494
495
496 /*
497  * given a pde index/page table number make sure it has
498  * backing store and if not go ahead allocate it and
499  * record it in the appropriate pde
500  */
501 static int validate_gmmu_page_table_gk20a_locked(struct vm_gk20a *vm,
502                                 u32 i, enum gmmu_pgsz_gk20a gmmu_pgsz_idx)
503 {
504         int err;
505         struct page_table_gk20a *pte =
506                 vm->pdes.ptes[gmmu_pgsz_idx] + i;
507
508         nvhost_dbg_fn("");
509
510         /* if it's already in place it's valid */
511         if (pte->ref)
512                 return 0;
513
514         nvhost_dbg(dbg_pte, "alloc %dKB ptes for pde %d",
515                    gmmu_page_sizes[gmmu_pgsz_idx]/1024, i);
516
517         err = zalloc_gmmu_page_table_gk20a(vm, gmmu_pgsz_idx, pte);
518         if (err)
519                 return err;
520
521         /* rewrite pde */
522         update_gmmu_pde_locked(vm, i);
523
524         return 0;
525 }
526
527 static struct vm_reserved_va_node *addr_to_reservation(struct vm_gk20a *vm,
528                                                        u64 addr)
529 {
530         struct vm_reserved_va_node *va_node;
531         list_for_each_entry(va_node, &vm->reserved_va_list, reserved_va_list)
532                 if (addr >= va_node->vaddr_start &&
533                     addr < (u64)va_node->vaddr_start + (u64)va_node->size)
534                         return va_node;
535
536         return NULL;
537 }
538
539 int gk20a_vm_get_buffers(struct vm_gk20a *vm,
540                          struct mapped_buffer_node ***mapped_buffers,
541                          int *num_buffers)
542 {
543         struct mapped_buffer_node *mapped_buffer;
544         struct mapped_buffer_node **buffer_list;
545         struct rb_node *node;
546         int i = 0;
547
548         mutex_lock(&vm->update_gmmu_lock);
549
550         buffer_list = kzalloc(sizeof(*buffer_list) *
551                               vm->num_user_mapped_buffers, GFP_KERNEL);
552         if (!buffer_list) {
553                 mutex_unlock(&vm->update_gmmu_lock);
554                 return -ENOMEM;
555         }
556
557         node = rb_first(&vm->mapped_buffers);
558         while (node) {
559                 mapped_buffer =
560                         container_of(node, struct mapped_buffer_node, node);
561                 if (mapped_buffer->user_mapped) {
562                         buffer_list[i] = mapped_buffer;
563                         kref_get(&mapped_buffer->ref);
564                         i++;
565                 }
566                 node = rb_next(&mapped_buffer->node);
567         }
568
569         BUG_ON(i != vm->num_user_mapped_buffers);
570
571         *num_buffers = vm->num_user_mapped_buffers;
572         *mapped_buffers = buffer_list;
573
574         mutex_unlock(&vm->update_gmmu_lock);
575
576         return 0;
577 }
578
579 static void gk20a_vm_unmap_locked_kref(struct kref *ref)
580 {
581         struct mapped_buffer_node *mapped_buffer =
582                 container_of(ref, struct mapped_buffer_node, ref);
583         gk20a_vm_unmap_locked(mapped_buffer);
584 }
585
586 void gk20a_vm_put_buffers(struct vm_gk20a *vm,
587                                  struct mapped_buffer_node **mapped_buffers,
588                                  int num_buffers)
589 {
590         int i;
591
592         mutex_lock(&vm->update_gmmu_lock);
593
594         for (i = 0; i < num_buffers; ++i)
595                 kref_put(&mapped_buffers[i]->ref,
596                          gk20a_vm_unmap_locked_kref);
597
598         mutex_unlock(&vm->update_gmmu_lock);
599
600         kfree(mapped_buffers);
601 }
602
603 static void gk20a_vm_unmap_user(struct vm_gk20a *vm, u64 offset)
604 {
605         struct device *d = dev_from_vm(vm);
606         int retries;
607         struct mapped_buffer_node *mapped_buffer;
608
609         mutex_lock(&vm->update_gmmu_lock);
610
611         mapped_buffer = find_mapped_buffer_locked(&vm->mapped_buffers, offset);
612         if (!mapped_buffer) {
613                 mutex_unlock(&vm->update_gmmu_lock);
614                 nvhost_err(d, "invalid addr to unmap 0x%llx", offset);
615                 return;
616         }
617
618         if (mapped_buffer->flags & NVHOST_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) {
619                 mutex_unlock(&vm->update_gmmu_lock);
620
621                 retries = 1000;
622                 while (retries) {
623                         if (atomic_read(&mapped_buffer->ref.refcount) == 1)
624                                 break;
625                         retries--;
626                         udelay(50);
627                 }
628                 if (!retries)
629                         nvhost_err(d, "sync-unmap failed on 0x%llx",
630                                                                 offset);
631                 mutex_lock(&vm->update_gmmu_lock);
632         }
633
634         mapped_buffer->user_mapped--;
635         if (mapped_buffer->user_mapped == 0)
636                 vm->num_user_mapped_buffers--;
637         kref_put(&mapped_buffer->ref, gk20a_vm_unmap_locked_kref);
638
639         mutex_unlock(&vm->update_gmmu_lock);
640 }
641
642 static u64 gk20a_vm_alloc_va(struct vm_gk20a *vm,
643                              u64 size,
644                              enum gmmu_pgsz_gk20a gmmu_pgsz_idx)
645
646 {
647         struct nvhost_allocator *vma = &vm->vma[gmmu_pgsz_idx];
648         int err;
649         u64 offset;
650         u32 start_page_nr = 0, num_pages;
651         u64 gmmu_page_size = gmmu_page_sizes[gmmu_pgsz_idx];
652
653         if (gmmu_pgsz_idx >= ARRAY_SIZE(gmmu_page_sizes)) {
654                 dev_warn(dev_from_vm(vm),
655                          "invalid page size requested in gk20a vm alloc");
656                 return -EINVAL;
657         }
658
659         if ((gmmu_pgsz_idx == gmmu_page_size_big) && !vm->big_pages) {
660                 dev_warn(dev_from_vm(vm),
661                          "unsupportd page size requested");
662                 return -EINVAL;
663
664         }
665
666         /* be certain we round up to gmmu_page_size if needed */
667         /* TBD: DIV_ROUND_UP -> undefined reference to __aeabi_uldivmod */
668         size = (size + ((u64)gmmu_page_size - 1)) & ~((u64)gmmu_page_size - 1);
669
670         nvhost_dbg_info("size=0x%llx @ pgsz=%dKB", size,
671                         gmmu_page_sizes[gmmu_pgsz_idx]>>10);
672
673         /* The vma allocator represents page accounting. */
674         num_pages = size >> gmmu_page_shifts[gmmu_pgsz_idx];
675
676         err = vma->alloc(vma, &start_page_nr, num_pages);
677
678         if (err) {
679                 nvhost_err(dev_from_vm(vm),
680                            "%s oom: sz=0x%llx", vma->name, size);
681                 return 0;
682         }
683
684         offset = (u64)start_page_nr << gmmu_page_shifts[gmmu_pgsz_idx];
685         nvhost_dbg_fn("%s found addr: 0x%llx", vma->name, offset);
686
687         return offset;
688 }
689
690 static void gk20a_vm_free_va(struct vm_gk20a *vm,
691                              u64 offset, u64 size,
692                              enum gmmu_pgsz_gk20a pgsz_idx)
693 {
694         struct nvhost_allocator *vma = &vm->vma[pgsz_idx];
695         u32 page_size = gmmu_page_sizes[pgsz_idx];
696         u32 page_shift = gmmu_page_shifts[pgsz_idx];
697         u32 start_page_nr, num_pages;
698         int err;
699
700         nvhost_dbg_info("%s free addr=0x%llx, size=0x%llx",
701                         vma->name, offset, size);
702
703         start_page_nr = (u32)(offset >> page_shift);
704         num_pages = (u32)((size + page_size - 1) >> page_shift);
705
706         err = vma->free(vma, start_page_nr, num_pages);
707         if (err) {
708                 nvhost_err(dev_from_vm(vm),
709                            "not found: offset=0x%llx, sz=0x%llx",
710                            offset, size);
711         }
712 }
713
714 static int insert_mapped_buffer(struct rb_root *root,
715                                 struct mapped_buffer_node *mapped_buffer)
716 {
717         struct rb_node **new_node = &(root->rb_node), *parent = NULL;
718
719         /* Figure out where to put new node */
720         while (*new_node) {
721                 struct mapped_buffer_node *cmp_with =
722                         container_of(*new_node, struct mapped_buffer_node,
723                                      node);
724
725                 parent = *new_node;
726
727                 if (cmp_with->addr > mapped_buffer->addr) /* u64 cmp */
728                         new_node = &((*new_node)->rb_left);
729                 else if (cmp_with->addr != mapped_buffer->addr) /* u64 cmp */
730                         new_node = &((*new_node)->rb_right);
731                 else
732                         return -EINVAL; /* no fair dup'ing */
733         }
734
735         /* Add new node and rebalance tree. */
736         rb_link_node(&mapped_buffer->node, parent, new_node);
737         rb_insert_color(&mapped_buffer->node, root);
738
739         return 0;
740 }
741
742 static struct mapped_buffer_node *find_mapped_buffer_reverse_locked(
743                                 struct rb_root *root, struct mem_handle *r)
744 {
745         struct rb_node *node = rb_first(root);
746         while (node) {
747                 struct mapped_buffer_node *mapped_buffer =
748                         container_of(node, struct mapped_buffer_node, node);
749                 if (mapped_buffer->handle_ref == r)
750                         return mapped_buffer;
751                 node = rb_next(&mapped_buffer->node);
752         }
753         return 0;
754 }
755
756 static struct mapped_buffer_node *find_mapped_buffer_locked(
757                                         struct rb_root *root, u64 addr)
758 {
759
760         struct rb_node *node = root->rb_node;
761         while (node) {
762                 struct mapped_buffer_node *mapped_buffer =
763                         container_of(node, struct mapped_buffer_node, node);
764                 if (mapped_buffer->addr > addr) /* u64 cmp */
765                         node = node->rb_left;
766                 else if (mapped_buffer->addr != addr) /* u64 cmp */
767                         node = node->rb_right;
768                 else
769                         return mapped_buffer;
770         }
771         return 0;
772 }
773
774 static struct mapped_buffer_node *find_mapped_buffer_range_locked(
775                                         struct rb_root *root, u64 addr)
776 {
777         struct rb_node *node = root->rb_node;
778         while (node) {
779                 struct mapped_buffer_node *m =
780                         container_of(node, struct mapped_buffer_node, node);
781                 if (m->addr <= addr && m->addr + m->size > addr)
782                         return m;
783                 else if (m->addr > addr) /* u64 cmp */
784                         node = node->rb_left;
785                 else
786                         node = node->rb_right;
787         }
788         return 0;
789 }
790
791 /* convenience setup for nvmap buffer attr queries */
792 struct bfr_attr_query {
793         int err;
794         u64 v;
795 };
796 static u32 nvmap_bfr_param[] = {
797 #define BFR_SIZE   0
798         NVMAP_HANDLE_PARAM_SIZE,
799 #define BFR_ALIGN  1
800         NVMAP_HANDLE_PARAM_ALIGNMENT,
801 #define BFR_HEAP   2
802         NVMAP_HANDLE_PARAM_HEAP,
803 #define BFR_KIND   3
804         NVMAP_HANDLE_PARAM_KIND,
805 };
806 #define BFR_ATTRS (sizeof(nvmap_bfr_param)/sizeof(nvmap_bfr_param[0]))
807
808 struct buffer_attrs {
809         struct sg_table *sgt;
810         u64 size;
811         u64 align;
812         u32 ctag_offset;
813         u32 ctag_lines;
814         int pgsz_idx;
815         u8 kind_v;
816         u8 uc_kind_v;
817 };
818
819 static void gmmu_select_page_size(struct buffer_attrs *bfr)
820 {
821         int i;
822         /*  choose the biggest first (top->bottom) */
823         for (i = (gmmu_nr_page_sizes-1); i >= 0; i--)
824                 if (!(gmmu_page_offset_masks[i] & bfr->align)) {
825                         /* would like to add this too but nvmap returns the
826                          * original requested size not the allocated size.
827                          * (!(gmmu_page_offset_masks[i] & bfr->size)) */
828                         bfr->pgsz_idx = i;
829                         break;
830                 }
831 }
832
833 static int setup_buffer_size_and_align(struct device *d,
834                                        struct buffer_attrs *bfr,
835                                        struct bfr_attr_query *query,
836                                        u64 offset, u32 flags)
837 {
838         /* buffer allocation size and alignment must be a multiple
839            of one of the supported page sizes.*/
840         bfr->size = query[BFR_SIZE].v;
841         bfr->align = query[BFR_ALIGN].v;
842         bfr->pgsz_idx = -1;
843
844         /* If FIX_OFFSET is set, pgsz is determined. Otherwise, select
845          * page size according to memory alignment */
846         if (flags & NVHOST_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) {
847                 bfr->pgsz_idx = NV_GMMU_VA_IS_UPPER(offset) ?
848                                 gmmu_page_size_big : gmmu_page_size_small;
849         } else {
850                 gmmu_select_page_size(bfr);
851         }
852
853         if (unlikely(bfr->pgsz_idx == -1)) {
854                 nvhost_warn(d, "unsupported buffer alignment: 0x%llx",
855                            bfr->align);
856                 return -EINVAL;
857         }
858
859         bfr->kind_v = query[BFR_KIND].v;
860
861         return 0;
862 }
863
864
865 static int setup_buffer_kind_and_compression(struct device *d,
866                                              u32 flags,
867                                              struct buffer_attrs *bfr,
868                                              enum gmmu_pgsz_gk20a pgsz_idx)
869 {
870         bool kind_compressible;
871
872         if (unlikely(bfr->kind_v == gmmu_pte_kind_invalid_v()))
873                 bfr->kind_v = gmmu_pte_kind_pitch_v();
874
875         if (unlikely(!gk20a_kind_is_supported(bfr->kind_v))) {
876                 nvhost_err(d, "kind 0x%x not supported", bfr->kind_v);
877                 return -EINVAL;
878         }
879
880         bfr->uc_kind_v = gmmu_pte_kind_invalid_v();
881         /* find a suitable uncompressed kind if it becomes necessary later */
882         kind_compressible = gk20a_kind_is_compressible(bfr->kind_v);
883         if (kind_compressible) {
884                 bfr->uc_kind_v = gk20a_get_uncompressed_kind(bfr->kind_v);
885                 if (unlikely(bfr->uc_kind_v == gmmu_pte_kind_invalid_v())) {
886                         /* shouldn't happen, but it is worth cross-checking */
887                         nvhost_err(d, "comptag kind 0x%x can't be"
888                                    " downgraded to uncompressed kind",
889                                    bfr->kind_v);
890                         return -EINVAL;
891                 }
892         }
893         /* comptags only supported for suitable kinds, 128KB pagesize */
894         if (unlikely(kind_compressible &&
895                      (gmmu_page_sizes[pgsz_idx] != 128*1024))) {
896                 /*
897                 nvhost_warn(d, "comptags specified"
898                 " but pagesize being used doesn't support it");*/
899                 /* it is safe to fall back to uncompressed as
900                    functionality is not harmed */
901                 bfr->kind_v = bfr->uc_kind_v;
902                 kind_compressible = false;
903         }
904         if (kind_compressible)
905                 bfr->ctag_lines = ALIGN(bfr->size, COMP_TAG_LINE_SIZE) >>
906                         COMP_TAG_LINE_SIZE_SHIFT;
907         else
908                 bfr->ctag_lines = 0;
909
910         return 0;
911 }
912
913 static int validate_fixed_buffer(struct vm_gk20a *vm,
914                                  struct buffer_attrs *bfr,
915                                  u64 map_offset)
916 {
917         struct device *dev = dev_from_vm(vm);
918         struct vm_reserved_va_node *va_node;
919         struct mapped_buffer_node *buffer;
920
921         if (map_offset & gmmu_page_offset_masks[bfr->pgsz_idx]) {
922                 nvhost_err(dev, "map offset must be buffer page size aligned 0x%llx",
923                            map_offset);
924                 return -EINVAL;
925         }
926
927         /* find the space reservation */
928         va_node = addr_to_reservation(vm, map_offset);
929         if (!va_node) {
930                 nvhost_warn(dev, "fixed offset mapping without space allocation");
931                 return -EINVAL;
932         }
933
934         /* check that this mappings does not collide with existing
935          * mappings by checking the overlapping area between the current
936          * buffer and all other mapped buffers */
937
938         list_for_each_entry(buffer,
939                 &va_node->va_buffers_list, va_buffers_list) {
940                 s64 begin = max(buffer->addr, map_offset);
941                 s64 end = min(buffer->addr +
942                         buffer->size, map_offset + bfr->size);
943                 if (end - begin > 0) {
944                         nvhost_warn(dev, "overlapping buffer map requested");
945                         return -EINVAL;
946                 }
947         }
948
949         return 0;
950 }
951
952 static u64 __locked_gmmu_map(struct vm_gk20a *vm,
953                                 u64 map_offset,
954                                 struct sg_table *sgt,
955                                 u64 size,
956                                 int pgsz_idx,
957                                 u8 kind_v,
958                                 u32 ctag_offset,
959                                 u32 flags,
960                                 int rw_flag)
961 {
962         int err = 0, i = 0;
963         u32 pde_lo, pde_hi;
964         struct device *d = dev_from_vm(vm);
965
966         /* Allocate (or validate when map_offset != 0) the virtual address. */
967         if (!map_offset) {
968                 map_offset = gk20a_vm_alloc_va(vm, size,
969                                           pgsz_idx);
970                 if (!map_offset) {
971                         nvhost_err(d, "failed to allocate va space");
972                         err = -ENOMEM;
973                         goto fail;
974                 }
975         }
976
977         pde_range_from_vaddr_range(vm,
978                                    map_offset,
979                                    map_offset + size - 1,
980                                    &pde_lo, &pde_hi);
981
982         /* mark the addr range valid (but with 0 phys addr, which will fault) */
983         for (i = pde_lo; i <= pde_hi; i++) {
984                 err = validate_gmmu_page_table_gk20a_locked(vm, i,
985                                                             pgsz_idx);
986                 if (err) {
987                         nvhost_err(d, "failed to validate page table %d: %d",
988                                                            i, err);
989                         goto fail;
990                 }
991         }
992
993         err = update_gmmu_ptes_locked(vm, pgsz_idx,
994                                       sgt,
995                                       map_offset, map_offset + size - 1,
996                                       kind_v,
997                                       ctag_offset,
998                                       flags &
999                                       NVHOST_MAP_BUFFER_FLAGS_CACHEABLE_TRUE,
1000                                       rw_flag);
1001         if (err) {
1002                 nvhost_err(d, "failed to update ptes on map");
1003                 goto fail;
1004         }
1005
1006         return map_offset;
1007  fail:
1008         nvhost_err(d, "%s: failed with err=%d\n", __func__, err);
1009         return 0;
1010 }
1011
1012 static void __locked_gmmu_unmap(struct vm_gk20a *vm,
1013                                 u64 vaddr,
1014                                 u64 size,
1015                                 int pgsz_idx,
1016                                 bool va_allocated,
1017                                 int rw_flag)
1018 {
1019         int err = 0;
1020         struct gk20a *g = gk20a_from_vm(vm);
1021
1022         if (va_allocated)
1023                 gk20a_vm_free_va(vm, vaddr, size, pgsz_idx);
1024
1025         /* unmap here needs to know the page size we assigned at mapping */
1026         err = update_gmmu_ptes_locked(vm,
1027                                 pgsz_idx,
1028                                 0, /* n/a for unmap */
1029                                 vaddr,
1030                                 vaddr + size - 1,
1031                                 0, 0, false /* n/a for unmap */,
1032                                 rw_flag);
1033         if (err)
1034                 dev_err(dev_from_vm(vm),
1035                         "failed to update gmmu ptes on unmap");
1036
1037         /* detect which if any pdes/ptes can now be released */
1038
1039         /* flush l2 so any dirty lines are written out *now*.
1040          *  also as we could potentially be switching this buffer
1041          * from nonvolatile (l2 cacheable) to volatile (l2 non-cacheable) at
1042          * some point in the future we need to invalidate l2.  e.g. switching
1043          * from a render buffer unmap (here) to later using the same memory
1044          * for gmmu ptes.  note the positioning of this relative to any smmu
1045          * unmapping (below). */
1046
1047         gk20a_mm_l2_flush(g, true);
1048 }
1049
1050 static u64 gk20a_vm_map_duplicate_locked(struct vm_gk20a *vm,
1051                                          struct mem_mgr *memmgr,
1052                                          struct mem_handle *r,
1053                                          u64 offset_align,
1054                                          u32 flags,
1055                                          u32 kind,
1056                                          struct sg_table **sgt,
1057                                          bool user_mapped,
1058                                          int rw_flag)
1059 {
1060         struct mapped_buffer_node *mapped_buffer = 0;
1061
1062         mapped_buffer = find_mapped_buffer_reverse_locked(
1063                                                 &vm->mapped_buffers, r);
1064         if (!mapped_buffer)
1065                 return 0;
1066
1067         if (mapped_buffer->flags != flags)
1068                 return 0;
1069
1070         if (flags & NVHOST_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET &&
1071             mapped_buffer->addr != offset_align)
1072                 return 0;
1073
1074         WARN_ON(mapped_buffer->memmgr != memmgr);
1075         BUG_ON(mapped_buffer->vm != vm);
1076
1077         /* mark the buffer as used */
1078         if (user_mapped) {
1079                 if (mapped_buffer->user_mapped == 0)
1080                         vm->num_user_mapped_buffers++;
1081                 mapped_buffer->user_mapped++;
1082
1083                 /* If the mapping comes from user space, we own
1084                  * the memmgr and handle refs. Since we reuse an
1085                  * existing mapping here, we need to give back those
1086                  * refs once in order not to leak.
1087                  */
1088                 if (mapped_buffer->own_mem_ref) {
1089                         nvhost_memmgr_put(mapped_buffer->memmgr,
1090                                           mapped_buffer->handle_ref);
1091                         nvhost_memmgr_put_mgr(mapped_buffer->memmgr);
1092                 } else
1093                         mapped_buffer->own_mem_ref = true;
1094
1095                 mapped_buffer->memmgr = memmgr;
1096         }
1097         kref_get(&mapped_buffer->ref);
1098
1099         nvhost_dbg(dbg_map,
1100                    "reusing as=%d pgsz=%d flags=0x%x ctags=%d "
1101                    "start=%d gv=0x%x,%08x -> 0x%x,%08x -> 0x%x,%08x "
1102                    "own_mem_ref=%d user_mapped=%d",
1103                    vm_aspace_id(vm), mapped_buffer->pgsz_idx,
1104                    mapped_buffer->flags,
1105                    mapped_buffer->ctag_lines,
1106                    mapped_buffer->ctag_offset,
1107                    hi32(mapped_buffer->addr), lo32(mapped_buffer->addr),
1108                    hi32((u64)sg_dma_address(mapped_buffer->sgt->sgl)),
1109                    lo32((u64)sg_dma_address(mapped_buffer->sgt->sgl)),
1110                    hi32((u64)sg_phys(mapped_buffer->sgt->sgl)),
1111                    lo32((u64)sg_phys(mapped_buffer->sgt->sgl)),
1112                    mapped_buffer->own_mem_ref, user_mapped);
1113
1114         if (sgt)
1115                 *sgt = mapped_buffer->sgt;
1116         return mapped_buffer->addr;
1117 }
1118
1119 u64 gk20a_vm_map(struct vm_gk20a *vm,
1120                         struct mem_mgr *memmgr,
1121                         struct mem_handle *r,
1122                         u64 offset_align,
1123                         u32 flags /*NVHOST_AS_MAP_BUFFER_FLAGS_*/,
1124                         u32 kind,
1125                         struct sg_table **sgt,
1126                         bool user_mapped,
1127                         int rw_flag)
1128 {
1129         struct gk20a *g = gk20a_from_vm(vm);
1130         struct nvhost_allocator *ctag_allocator = &g->gr.comp_tags;
1131         struct device *d = dev_from_vm(vm);
1132         struct mapped_buffer_node *mapped_buffer = 0;
1133         bool inserted = false, va_allocated = false;
1134         u32 gmmu_page_size = 0;
1135         u64 map_offset = 0;
1136         int attr, err = 0;
1137         struct buffer_attrs bfr = {0};
1138         struct bfr_attr_query query[BFR_ATTRS];
1139         struct nvhost_comptags comptags;
1140
1141         mutex_lock(&vm->update_gmmu_lock);
1142
1143         /* check if this buffer is already mapped */
1144         map_offset = gk20a_vm_map_duplicate_locked(vm, memmgr, r, offset_align,
1145                                                    flags, kind, sgt,
1146                                                    user_mapped, rw_flag);
1147         if (map_offset) {
1148                 mutex_unlock(&vm->update_gmmu_lock);
1149                 return map_offset;
1150         }
1151
1152         /* pin buffer to get phys/iovmm addr */
1153         bfr.sgt = nvhost_memmgr_pin(memmgr, r, d, rw_flag);
1154         if (IS_ERR(bfr.sgt)) {
1155                 /* Falling back to physical is actually possible
1156                  * here in many cases if we use 4K phys pages in the
1157                  * gmmu.  However we have some regions which require
1158                  * contig regions to work properly (either phys-contig
1159                  * or contig through smmu io_vaspace).  Until we can
1160                  * track the difference between those two cases we have
1161                  * to fail the mapping when we run out of SMMU space.
1162                  */
1163                 nvhost_warn(d, "oom allocating tracking buffer");
1164                 goto clean_up;
1165         }
1166
1167         if (sgt)
1168                 *sgt = bfr.sgt;
1169
1170         /* query bfr attributes: size, align, heap, kind */
1171         for (attr = 0; attr < BFR_ATTRS; attr++) {
1172                 query[attr].err =
1173                         nvhost_memmgr_get_param(memmgr, r,
1174                                                 nvmap_bfr_param[attr],
1175                                                 &query[attr].v);
1176                 if (unlikely(query[attr].err != 0)) {
1177                         nvhost_err(d,
1178                                    "failed to get nvmap buffer param %d: %d\n",
1179                                    nvmap_bfr_param[attr],
1180                                    query[attr].err);
1181                         err = query[attr].err;
1182                         goto clean_up;
1183                 }
1184         }
1185
1186         /* validate/adjust bfr attributes */
1187         err = setup_buffer_size_and_align(d, &bfr, query, offset_align, flags);
1188         if (unlikely(err))
1189                 goto clean_up;
1190         if (unlikely(bfr.pgsz_idx < gmmu_page_size_small ||
1191                      bfr.pgsz_idx > gmmu_page_size_big)) {
1192                 BUG_ON(1);
1193                 err = -EINVAL;
1194                 goto clean_up;
1195         }
1196         gmmu_page_size = gmmu_page_sizes[bfr.pgsz_idx];
1197
1198         /* Check if we should use a fixed offset for mapping this buffer */
1199         if (flags & NVHOST_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET)  {
1200                 err = validate_fixed_buffer(vm, &bfr, offset_align);
1201                 if (err)
1202                         goto clean_up;
1203
1204                 map_offset = offset_align;
1205                 va_allocated = false;
1206         } else
1207                 va_allocated = true;
1208
1209         if (sgt)
1210                 *sgt = bfr.sgt;
1211
1212         err = setup_buffer_kind_and_compression(d, flags, &bfr, bfr.pgsz_idx);
1213         if (unlikely(err)) {
1214                 nvhost_err(d, "failure setting up kind and compression");
1215                 goto clean_up;
1216         }
1217
1218         /* bar1 and pmu vm don't need ctag */
1219         if (!vm->enable_ctag)
1220                 bfr.ctag_lines = 0;
1221
1222         nvhost_memmgr_get_comptags(r, &comptags);
1223
1224         if (bfr.ctag_lines && !comptags.lines) {
1225                 /* allocate compression resources if needed */
1226                 err = nvhost_memmgr_alloc_comptags(r,
1227                                 ctag_allocator, bfr.ctag_lines);
1228                 if (err) {
1229                         /* ok to fall back here if we ran out */
1230                         /* TBD: we can partially alloc ctags as well... */
1231                         bfr.ctag_lines = bfr.ctag_offset = 0;
1232                         bfr.kind_v = bfr.uc_kind_v;
1233                 } else {
1234                         nvhost_memmgr_get_comptags(r, &comptags);
1235
1236                         /* init/clear the ctag buffer */
1237                         gk20a_gr_clear_comptags(g,
1238                                 comptags.offset,
1239                                 comptags.offset + comptags.lines - 1);
1240                 }
1241         }
1242
1243         /* store the comptag info */
1244         WARN_ON(bfr.ctag_lines != comptags.lines);
1245         bfr.ctag_offset = comptags.offset;
1246
1247         /* update gmmu ptes */
1248         map_offset = __locked_gmmu_map(vm, map_offset,
1249                                         bfr.sgt,
1250                                         bfr.size,
1251                                         bfr.pgsz_idx,
1252                                         bfr.kind_v,
1253                                         bfr.ctag_offset,
1254                                         flags, rw_flag);
1255         if (!map_offset)
1256                 goto clean_up;
1257
1258         nvhost_dbg(dbg_map,
1259            "as=%d pgsz=%d "
1260            "kind=0x%x kind_uc=0x%x flags=0x%x "
1261            "ctags=%d start=%d gv=0x%x,%08x -> 0x%x,%08x -> 0x%x,%08x",
1262            vm_aspace_id(vm), gmmu_page_size,
1263            bfr.kind_v, bfr.uc_kind_v, flags,
1264            bfr.ctag_lines, bfr.ctag_offset,
1265            hi32(map_offset), lo32(map_offset),
1266            hi32((u64)sg_dma_address(bfr.sgt->sgl)),
1267            lo32((u64)sg_dma_address(bfr.sgt->sgl)),
1268            hi32((u64)sg_phys(bfr.sgt->sgl)),
1269            lo32((u64)sg_phys(bfr.sgt->sgl)));
1270
1271 #if defined(NVHOST_DEBUG)
1272         {
1273                 int i;
1274                 struct scatterlist *sg = NULL;
1275                 nvhost_dbg(dbg_pte, "for_each_sg(bfr.sgt->sgl, sg, bfr.sgt->nents, i)");
1276                 for_each_sg(bfr.sgt->sgl, sg, bfr.sgt->nents, i ) {
1277                         u64 da = sg_dma_address(sg);
1278                         u64 pa = sg_phys(sg);
1279                         u64 len = sg->length;
1280                         nvhost_dbg(dbg_pte, "i=%d pa=0x%x,%08x da=0x%x,%08x len=0x%x,%08x",
1281                                    i, hi32(pa), lo32(pa), hi32(da), lo32(da),
1282                                    hi32(len), lo32(len));
1283                 }
1284         }
1285 #endif
1286
1287         /* keep track of the buffer for unmapping */
1288         /* TBD: check for multiple mapping of same buffer */
1289         mapped_buffer = kzalloc(sizeof(*mapped_buffer), GFP_KERNEL);
1290         if (!mapped_buffer) {
1291                 nvhost_warn(d, "oom allocating tracking buffer");
1292                 goto clean_up;
1293         }
1294         mapped_buffer->memmgr      = memmgr;
1295         mapped_buffer->handle_ref  = r;
1296         mapped_buffer->sgt         = bfr.sgt;
1297         mapped_buffer->addr        = map_offset;
1298         mapped_buffer->size        = bfr.size;
1299         mapped_buffer->pgsz_idx    = bfr.pgsz_idx;
1300         mapped_buffer->ctag_offset = bfr.ctag_offset;
1301         mapped_buffer->ctag_lines  = bfr.ctag_lines;
1302         mapped_buffer->vm          = vm;
1303         mapped_buffer->flags       = flags;
1304         mapped_buffer->va_allocated = va_allocated;
1305         mapped_buffer->user_mapped = user_mapped ? 1 : 0;
1306         mapped_buffer->own_mem_ref = user_mapped;
1307         INIT_LIST_HEAD(&mapped_buffer->unmap_list);
1308         INIT_LIST_HEAD(&mapped_buffer->va_buffers_list);
1309         kref_init(&mapped_buffer->ref);
1310
1311         err = insert_mapped_buffer(&vm->mapped_buffers, mapped_buffer);
1312         if (err) {
1313                 nvhost_err(d, "failed to insert into mapped buffer tree");
1314                 goto clean_up;
1315         }
1316         inserted = true;
1317         if (user_mapped)
1318                 vm->num_user_mapped_buffers++;
1319
1320         nvhost_dbg_info("allocated va @ 0x%llx", map_offset);
1321
1322         if (!va_allocated) {
1323                 struct vm_reserved_va_node *va_node;
1324
1325                 /* find the space reservation */
1326                 va_node = addr_to_reservation(vm, map_offset);
1327                 list_add_tail(&mapped_buffer->va_buffers_list,
1328                               &va_node->va_buffers_list);
1329                 mapped_buffer->va_node = va_node;
1330         }
1331
1332         mutex_unlock(&vm->update_gmmu_lock);
1333
1334         /* Invalidate kernel mappings immediately */
1335         if (vm_aspace_id(vm) == -1)
1336                 gk20a_mm_tlb_invalidate(vm);
1337
1338         return map_offset;
1339
1340 clean_up:
1341         if (inserted) {
1342                 rb_erase(&mapped_buffer->node, &vm->mapped_buffers);
1343                 if (user_mapped)
1344                         vm->num_user_mapped_buffers--;
1345         }
1346         kfree(mapped_buffer);
1347         if (va_allocated)
1348                 gk20a_vm_free_va(vm, map_offset, bfr.size, bfr.pgsz_idx);
1349         if (!IS_ERR(bfr.sgt))
1350                 nvhost_memmgr_unpin(memmgr, r, d, bfr.sgt);
1351
1352         mutex_unlock(&vm->update_gmmu_lock);
1353         nvhost_dbg_info("err=%d\n", err);
1354         return 0;
1355 }
1356
1357 u64 gk20a_gmmu_map(struct vm_gk20a *vm,
1358                 struct sg_table **sgt,
1359                 u64 size,
1360                 u32 flags,
1361                 int rw_flag)
1362 {
1363         u64 vaddr;
1364
1365         mutex_lock(&vm->update_gmmu_lock);
1366         vaddr = __locked_gmmu_map(vm, 0, /* already mapped? - No */
1367                                 *sgt, /* sg table */
1368                                 size,
1369                                 0, /* page size index = 0 i.e. SZ_4K */
1370                                 0, /* kind */
1371                                 0, /* ctag_offset */
1372                                 flags, rw_flag);
1373         mutex_unlock(&vm->update_gmmu_lock);
1374         if (!vaddr) {
1375                 nvhost_err(dev_from_vm(vm), "failed to allocate va space");
1376                 return 0;
1377         }
1378
1379         /* Invalidate kernel mappings immediately */
1380         gk20a_mm_tlb_invalidate(vm);
1381
1382         return vaddr;
1383 }
1384
1385 void gk20a_gmmu_unmap(struct vm_gk20a *vm,
1386                 u64 vaddr,
1387                 u64 size,
1388                 int rw_flag)
1389 {
1390         mutex_lock(&vm->update_gmmu_lock);
1391         __locked_gmmu_unmap(vm,
1392                         vaddr,
1393                         size,
1394                         0, /* page size 4K */
1395                         true, /*va_allocated */
1396                         rw_flag);
1397         mutex_unlock(&vm->update_gmmu_lock);
1398 }
1399
1400 phys_addr_t gk20a_get_phys_from_iova(struct device *d,
1401                                 u64 dma_addr)
1402 {
1403         phys_addr_t phys;
1404         u64 iova;
1405
1406         struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(d);
1407         if (!mapping)
1408                 return dma_addr;
1409
1410         iova = dma_addr & PAGE_MASK;
1411         phys = iommu_iova_to_phys(mapping->domain, iova);
1412         return phys;
1413 }
1414
1415 /* get sg_table from already allocated buffer */
1416 int gk20a_get_sgtable(struct device *d, struct sg_table **sgt,
1417                         void *cpuva, u64 iova,
1418                         size_t size)
1419 {
1420         int err = 0;
1421         *sgt = kzalloc(sizeof(struct sg_table), GFP_KERNEL);
1422         if (!(*sgt)) {
1423                 dev_err(d, "failed to allocate memory\n");
1424                 err = -ENOMEM;
1425                 goto fail;
1426         }
1427         err = dma_get_sgtable(d, *sgt,
1428                         cpuva, iova,
1429                         size);
1430         if (err) {
1431                 dev_err(d, "failed to create sg table\n");
1432                 goto fail;
1433         }
1434         sg_dma_address((*sgt)->sgl) = iova;
1435
1436         return 0;
1437  fail:
1438         if (*sgt) {
1439                 kfree(*sgt);
1440                 *sgt = NULL;
1441         }
1442         return err;
1443 }
1444
1445 void gk20a_free_sgtable(struct sg_table **sgt)
1446 {
1447         sg_free_table(*sgt);
1448         kfree(*sgt);
1449         *sgt = NULL;
1450 }
1451
1452 u64 gk20a_mm_iova_addr(struct scatterlist *sgl)
1453 {
1454         u64 result = sg_phys(sgl);
1455 #ifdef CONFIG_TEGRA_IOMMU_SMMU
1456         if (sg_dma_address(sgl) == DMA_ERROR_CODE)
1457                 result = 0;
1458         else if (sg_dma_address(sgl)) {
1459                 result = sg_dma_address(sgl) |
1460                         1ULL << NV_MC_SMMU_VADDR_TRANSLATION_BIT;
1461         }
1462 #endif
1463         return result;
1464 }
1465
1466 static int update_gmmu_ptes_locked(struct vm_gk20a *vm,
1467                                    enum gmmu_pgsz_gk20a pgsz_idx,
1468                                    struct sg_table *sgt,
1469                                    u64 first_vaddr, u64 last_vaddr,
1470                                    u8 kind_v, u32 ctag_offset,
1471                                    bool cacheable,
1472                                    int rw_flag)
1473 {
1474         int err;
1475         u32 pde_lo, pde_hi, pde_i;
1476         struct scatterlist *cur_chunk;
1477         unsigned int cur_offset;
1478         u32 pte_w[2] = {0, 0}; /* invalid pte */
1479         u32 ctag = ctag_offset;
1480         u32 ctag_incr;
1481         u32 page_size  = gmmu_page_sizes[pgsz_idx];
1482         u64 addr = 0;
1483
1484         pde_range_from_vaddr_range(vm, first_vaddr, last_vaddr,
1485                                    &pde_lo, &pde_hi);
1486
1487         nvhost_dbg(dbg_pte, "size_idx=%d, pde_lo=%d, pde_hi=%d",
1488                    pgsz_idx, pde_lo, pde_hi);
1489
1490         /* If ctag_offset !=0 add 1 else add 0.  The idea is to avoid a branch
1491          * below (per-pte). Note: this doesn't work unless page size (when
1492          * comptags are active) is 128KB. We have checks elsewhere for that. */
1493         ctag_incr = !!ctag_offset;
1494
1495         if (sgt)
1496                 cur_chunk = sgt->sgl;
1497         else
1498                 cur_chunk = NULL;
1499
1500         cur_offset = 0;
1501
1502         for (pde_i = pde_lo; pde_i <= pde_hi; pde_i++) {
1503                 u32 pte_lo, pte_hi;
1504                 u32 pte_cur;
1505                 void *pte_kv_cur;
1506
1507                 struct page_table_gk20a *pte = vm->pdes.ptes[pgsz_idx] + pde_i;
1508
1509                 if (pde_i == pde_lo)
1510                         pte_lo = pte_index_from_vaddr(vm, first_vaddr,
1511                                                       pgsz_idx);
1512                 else
1513                         pte_lo = 0;
1514
1515                 if ((pde_i != pde_hi) && (pde_hi != pde_lo))
1516                         pte_hi = vm->mm->page_table_sizing[pgsz_idx].num_ptes-1;
1517                 else
1518                         pte_hi = pte_index_from_vaddr(vm, last_vaddr,
1519                                                       pgsz_idx);
1520
1521                 /* get cpu access to the ptes */
1522                 err = map_gmmu_pages(pte->ref, pte->sgt, &pte_kv_cur);
1523                 if (err) {
1524                         nvhost_err(dev_from_vm(vm),
1525                                    "couldn't map ptes for update as=%d pte_ref_cnt=%d",
1526                                    vm_aspace_id(vm), pte->ref_cnt);
1527                         goto clean_up;
1528                 }
1529
1530                 nvhost_dbg(dbg_pte, "pte_lo=%d, pte_hi=%d", pte_lo, pte_hi);
1531                 for (pte_cur = pte_lo; pte_cur <= pte_hi; pte_cur++) {
1532
1533                         if (likely(sgt)) {
1534                                 u64 new_addr = gk20a_mm_iova_addr(cur_chunk);
1535                                 if (new_addr) {
1536                                         addr = new_addr;
1537                                         addr += cur_offset;
1538                                 }
1539
1540                                 pte_w[0] = gmmu_pte_valid_true_f() |
1541                                         gmmu_pte_address_sys_f(addr
1542                                                 >> gmmu_pte_address_shift_v());
1543                                 pte_w[1] = gmmu_pte_aperture_video_memory_f() |
1544                                         gmmu_pte_kind_f(kind_v) |
1545                                         gmmu_pte_comptagline_f(ctag);
1546
1547                                 if (rw_flag == mem_flag_read_only) {
1548                                         pte_w[0] |= gmmu_pte_read_only_true_f();
1549                                         pte_w[1] |=
1550                                                 gmmu_pte_write_disable_true_f();
1551                                 } else if (rw_flag == mem_flag_write_only) {
1552                                         pte_w[1] |=
1553                                                 gmmu_pte_read_disable_true_f();
1554                                 }
1555
1556                                 if (!cacheable)
1557                                         pte_w[1] |= gmmu_pte_vol_true_f();
1558
1559                                 pte->ref_cnt++;
1560
1561                                 nvhost_dbg(dbg_pte,
1562                                            "pte_cur=%d addr=0x%x,%08x kind=%d"
1563                                            " ctag=%d vol=%d refs=%d"
1564                                            " [0x%08x,0x%08x]",
1565                                            pte_cur, hi32(addr), lo32(addr),
1566                                            kind_v, ctag, !cacheable,
1567                                            pte->ref_cnt, pte_w[1], pte_w[0]);
1568
1569                                 ctag += ctag_incr;
1570                                 cur_offset += page_size;
1571                                 addr += page_size;
1572                                 while (cur_chunk &&
1573                                         cur_offset >= cur_chunk->length) {
1574                                         cur_offset -= cur_chunk->length;
1575                                         cur_chunk = sg_next(cur_chunk);
1576                                 }
1577
1578                         } else {
1579                                 pte->ref_cnt--;
1580                                 nvhost_dbg(dbg_pte,
1581                                            "pte_cur=%d ref=%d [0x0,0x0]",
1582                                            pte_cur, pte->ref_cnt);
1583                         }
1584
1585                         mem_wr32(pte_kv_cur + pte_cur*8, 0, pte_w[0]);
1586                         mem_wr32(pte_kv_cur + pte_cur*8, 1, pte_w[1]);
1587                 }
1588
1589                 unmap_gmmu_pages(pte->ref, pte->sgt, pte_kv_cur);
1590
1591                 if (pte->ref_cnt == 0) {
1592                         /* It can make sense to keep around one page table for
1593                          * each flavor (empty)... in case a new map is coming
1594                          * right back to alloc (and fill it in) again.
1595                          * But: deferring unmapping should help with pathologic
1596                          * unmap/map/unmap/map cases where we'd trigger pte
1597                          * free/alloc/free/alloc.
1598                          */
1599                         free_gmmu_pages(vm, pte->ref, pte->sgt,
1600                                 vm->mm->page_table_sizing[pgsz_idx].order);
1601                         pte->ref = NULL;
1602
1603                         /* rewrite pde */
1604                         update_gmmu_pde_locked(vm, pde_i);
1605                 }
1606
1607         }
1608
1609         smp_mb();
1610         vm->tlb_dirty = true;
1611         nvhost_dbg_fn("set tlb dirty");
1612
1613         return 0;
1614
1615 clean_up:
1616         /*TBD: potentially rewrite above to pre-map everything it needs to
1617          * as that's the only way it can fail */
1618         return err;
1619
1620 }
1621
1622
1623 /* for gk20a the "video memory" apertures here are misnomers. */
1624 static inline u32 big_valid_pde0_bits(u64 pte_addr)
1625 {
1626         u32 pde0_bits =
1627                 gmmu_pde_aperture_big_video_memory_f() |
1628                 gmmu_pde_address_big_sys_f(
1629                            (u32)(pte_addr >> gmmu_pde_address_shift_v()));
1630         return  pde0_bits;
1631 }
1632 static inline u32 small_valid_pde1_bits(u64 pte_addr)
1633 {
1634         u32 pde1_bits =
1635                 gmmu_pde_aperture_small_video_memory_f() |
1636                 gmmu_pde_vol_small_true_f() | /* tbd: why? */
1637                 gmmu_pde_address_small_sys_f(
1638                            (u32)(pte_addr >> gmmu_pde_address_shift_v()));
1639         return pde1_bits;
1640 }
1641
1642 /* Given the current state of the ptes associated with a pde,
1643    determine value and write it out.  There's no checking
1644    here to determine whether or not a change was actually
1645    made.  So, superfluous updates will cause unnecessary
1646    pde invalidations.
1647 */
1648 static void update_gmmu_pde_locked(struct vm_gk20a *vm, u32 i)
1649 {
1650         bool small_valid, big_valid;
1651         u64 pte_addr[2] = {0, 0};
1652         struct page_table_gk20a *small_pte =
1653                 vm->pdes.ptes[gmmu_page_size_small] + i;
1654         struct page_table_gk20a *big_pte =
1655                 vm->pdes.ptes[gmmu_page_size_big] + i;
1656         u32 pde_v[2] = {0, 0};
1657         u32 *pde;
1658
1659         small_valid = small_pte && small_pte->ref;
1660         big_valid   = big_pte && big_pte->ref;
1661
1662         if (small_valid)
1663                 pte_addr[gmmu_page_size_small] =
1664                         gk20a_mm_iova_addr(small_pte->sgt->sgl);
1665         if (big_valid)
1666                 pte_addr[gmmu_page_size_big] =
1667                         gk20a_mm_iova_addr(big_pte->sgt->sgl);
1668
1669         pde_v[0] = gmmu_pde_size_full_f();
1670         pde_v[0] |= big_valid ?
1671                 big_valid_pde0_bits(pte_addr[gmmu_page_size_big])
1672                 :
1673                 (gmmu_pde_aperture_big_invalid_f());
1674
1675         pde_v[1] |= (small_valid ?
1676                      small_valid_pde1_bits(pte_addr[gmmu_page_size_small])
1677                      :
1678                      (gmmu_pde_aperture_small_invalid_f() |
1679                       gmmu_pde_vol_small_false_f())
1680                      )
1681                 |
1682                 (big_valid ? (gmmu_pde_vol_big_true_f()) :
1683                  gmmu_pde_vol_big_false_f());
1684
1685         pde = pde_from_index(vm, i);
1686
1687         mem_wr32(pde, 0, pde_v[0]);
1688         mem_wr32(pde, 1, pde_v[1]);
1689
1690         smp_mb();
1691
1692         FLUSH_CPU_DCACHE(pde,
1693                          sg_phys(vm->pdes.sgt->sgl) + (i*gmmu_pde__size_v()),
1694                          sizeof(u32)*2);
1695
1696         gk20a_mm_l2_invalidate(vm->mm->g);
1697
1698         nvhost_dbg(dbg_pte, "pde:%d = 0x%x,0x%08x\n", i, pde_v[1], pde_v[0]);
1699
1700         vm->tlb_dirty  = true;
1701 }
1702
1703
1704 static int gk20a_vm_put_empty(struct vm_gk20a *vm, u64 vaddr,
1705                                u32 num_pages, u32 pgsz_idx)
1706 {
1707         struct mm_gk20a *mm = vm->mm;
1708         struct gk20a *g = mm->g;
1709         u32 pgsz = gmmu_page_sizes[pgsz_idx];
1710         u32 i;
1711
1712         /* allocate the zero page if the va does not already have one */
1713         if (!vm->zero_page_cpuva) {
1714                 int err = 0;
1715                 vm->zero_page_cpuva = dma_alloc_coherent(&g->dev->dev,
1716                                                          mm->big_page_size,
1717                                                          &vm->zero_page_iova,
1718                                                          GFP_KERNEL);
1719                 if (!vm->zero_page_cpuva) {
1720                         dev_err(&g->dev->dev, "failed to allocate zero page\n");
1721                         return -ENOMEM;
1722                 }
1723
1724                 err = gk20a_get_sgtable(&g->dev->dev, &vm->zero_page_sgt,
1725                                         vm->zero_page_cpuva, vm->zero_page_iova,
1726                                         mm->big_page_size);
1727                 if (err) {
1728                         dma_free_coherent(&g->dev->dev, mm->big_page_size,
1729                                           vm->zero_page_cpuva,
1730                                           vm->zero_page_iova);
1731                         vm->zero_page_iova = 0;
1732                         vm->zero_page_cpuva = NULL;
1733
1734                         dev_err(&g->dev->dev, "failed to create sg table for zero page\n");
1735                         return -ENOMEM;
1736                 }
1737         }
1738
1739         for (i = 0; i < num_pages; i++) {
1740                 u64 page_vaddr = __locked_gmmu_map(vm, vaddr,
1741                         vm->zero_page_sgt, pgsz, pgsz_idx, 0, 0,
1742                         NVHOST_AS_ALLOC_SPACE_FLAGS_FIXED_OFFSET,
1743                         mem_flag_none);
1744
1745                 if (!page_vaddr) {
1746                         nvhost_err(dev_from_vm(vm), "failed to remap clean buffers!");
1747                         goto err_unmap;
1748                 }
1749                 vaddr += pgsz;
1750         }
1751
1752         gk20a_mm_l2_flush(mm->g, true);
1753
1754         return 0;
1755
1756 err_unmap:
1757
1758         WARN_ON(1);
1759         /* something went wrong. unmap pages */
1760         while (i--) {
1761                 vaddr -= pgsz;
1762                 __locked_gmmu_unmap(vm, vaddr, pgsz, pgsz_idx, 0,
1763                                     mem_flag_none);
1764         }
1765
1766         return -EINVAL;
1767 }
1768
1769 /* return mem_mgr and mem_handle to caller. If the mem_handle is a kernel dup
1770    from user space (as_ioctl), caller releases the kernel duplicated handle */
1771 /* NOTE! mapped_buffers lock must be held */
1772 static void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer)
1773 {
1774         struct vm_gk20a *vm = mapped_buffer->vm;
1775
1776         if (mapped_buffer->va_node &&
1777             mapped_buffer->va_node->sparse) {
1778                 u64 vaddr = mapped_buffer->addr;
1779                 u32 pgsz_idx = mapped_buffer->pgsz_idx;
1780                 u32 num_pages = mapped_buffer->size >>
1781                         gmmu_page_shifts[pgsz_idx];
1782
1783                 /* there is little we can do if this fails... */
1784                 gk20a_vm_put_empty(vm, vaddr, num_pages, pgsz_idx);
1785
1786         } else
1787                 __locked_gmmu_unmap(vm,
1788                                 mapped_buffer->addr,
1789                                 mapped_buffer->size,
1790                                 mapped_buffer->pgsz_idx,
1791                                 mapped_buffer->va_allocated,
1792                                 mem_flag_none);
1793
1794         nvhost_dbg(dbg_map, "as=%d pgsz=%d gv=0x%x,%08x own_mem_ref=%d",
1795                    vm_aspace_id(vm), gmmu_page_sizes[mapped_buffer->pgsz_idx],
1796                    hi32(mapped_buffer->addr), lo32(mapped_buffer->addr),
1797                    mapped_buffer->own_mem_ref);
1798
1799         nvhost_memmgr_unpin(mapped_buffer->memmgr,
1800                             mapped_buffer->handle_ref,
1801                             dev_from_vm(vm),
1802                             mapped_buffer->sgt);
1803
1804         /* remove from mapped buffer tree and remove list, free */
1805         rb_erase(&mapped_buffer->node, &vm->mapped_buffers);
1806         if (!list_empty(&mapped_buffer->va_buffers_list))
1807                 list_del(&mapped_buffer->va_buffers_list);
1808
1809         /* keep track of mapped buffers */
1810         if (mapped_buffer->user_mapped)
1811                 vm->num_user_mapped_buffers--;
1812
1813         if (mapped_buffer->own_mem_ref) {
1814                 nvhost_memmgr_put(mapped_buffer->memmgr,
1815                                   mapped_buffer->handle_ref);
1816                 nvhost_memmgr_put_mgr(mapped_buffer->memmgr);
1817         }
1818
1819         kfree(mapped_buffer);
1820
1821         return;
1822 }
1823
1824 void gk20a_vm_unmap(struct vm_gk20a *vm, u64 offset)
1825 {
1826         struct device *d = dev_from_vm(vm);
1827         struct mapped_buffer_node *mapped_buffer;
1828
1829         mutex_lock(&vm->update_gmmu_lock);
1830         mapped_buffer = find_mapped_buffer_locked(&vm->mapped_buffers, offset);
1831         if (!mapped_buffer) {
1832                 mutex_unlock(&vm->update_gmmu_lock);
1833                 nvhost_err(d, "invalid addr to unmap 0x%llx", offset);
1834                 return;
1835         }
1836         kref_put(&mapped_buffer->ref, gk20a_vm_unmap_locked_kref);
1837         mutex_unlock(&vm->update_gmmu_lock);
1838 }
1839
1840 static void gk20a_vm_remove_support(struct vm_gk20a *vm)
1841 {
1842         struct gk20a *g = vm->mm->g;
1843         struct mapped_buffer_node *mapped_buffer;
1844         struct vm_reserved_va_node *va_node, *va_node_tmp;
1845         struct rb_node *node;
1846
1847         nvhost_dbg_fn("");
1848         mutex_lock(&vm->update_gmmu_lock);
1849
1850         /* TBD: add a flag here for the unmap code to recognize teardown
1851          * and short-circuit any otherwise expensive operations. */
1852
1853         node = rb_first(&vm->mapped_buffers);
1854         while (node) {
1855                 mapped_buffer =
1856                         container_of(node, struct mapped_buffer_node, node);
1857                 gk20a_vm_unmap_locked(mapped_buffer);
1858                 node = rb_first(&vm->mapped_buffers);
1859         }
1860
1861         /* destroy remaining reserved memory areas */
1862         list_for_each_entry_safe(va_node, va_node_tmp, &vm->reserved_va_list,
1863                 reserved_va_list) {
1864                 list_del(&va_node->reserved_va_list);
1865                 kfree(va_node);
1866         }
1867
1868         /* TBD: unmapping all buffers above may not actually free
1869          * all vm ptes.  jettison them here for certain... */
1870
1871         unmap_gmmu_pages(vm->pdes.ref, vm->pdes.sgt, vm->pdes.kv);
1872         free_gmmu_pages(vm, vm->pdes.ref, vm->pdes.sgt, 0);
1873
1874         kfree(vm->pdes.ptes[gmmu_page_size_small]);
1875         kfree(vm->pdes.ptes[gmmu_page_size_big]);
1876         nvhost_allocator_destroy(&vm->vma[gmmu_page_size_small]);
1877         nvhost_allocator_destroy(&vm->vma[gmmu_page_size_big]);
1878
1879         mutex_unlock(&vm->update_gmmu_lock);
1880
1881         /* release zero page if used */
1882         if (vm->zero_page_cpuva)
1883                 dma_free_coherent(&g->dev->dev, vm->mm->big_page_size,
1884                                   vm->zero_page_cpuva, vm->zero_page_iova);
1885
1886         /* vm is not used anymore. release it. */
1887         kfree(vm);
1888 }
1889
1890 static void gk20a_vm_remove_support_kref(struct kref *ref)
1891 {
1892         struct vm_gk20a *vm = container_of(ref, struct vm_gk20a, ref);
1893         gk20a_vm_remove_support(vm);
1894 }
1895
1896 void gk20a_vm_get(struct vm_gk20a *vm)
1897 {
1898         kref_get(&vm->ref);
1899 }
1900
1901 void gk20a_vm_put(struct vm_gk20a *vm)
1902 {
1903         kref_put(&vm->ref, gk20a_vm_remove_support_kref);
1904 }
1905
1906 /* address space interfaces for the gk20a module */
1907 static int gk20a_as_alloc_share(struct nvhost_as_share *as_share)
1908 {
1909         struct nvhost_as *as = as_share->as;
1910         struct gk20a *gk20a = get_gk20a(as->ch->dev);
1911         struct mm_gk20a *mm = &gk20a->mm;
1912         struct vm_gk20a *vm;
1913         u64 vma_size;
1914         u32 num_pages, low_hole_pages;
1915         char name[32];
1916         int err;
1917
1918         nvhost_dbg_fn("");
1919
1920         vm = kzalloc(sizeof(*vm), GFP_KERNEL);
1921         if (!vm)
1922                 return -ENOMEM;
1923
1924         as_share->priv = (void *)vm;
1925
1926         vm->mm = mm;
1927         vm->as_share = as_share;
1928
1929         vm->big_pages = true;
1930
1931         vm->va_start  = mm->pde_stride;   /* create a one pde hole */
1932         vm->va_limit  = mm->channel.size; /* note this means channel.size is
1933                                              really just the max */
1934         {
1935                 u32 pde_lo, pde_hi;
1936                 pde_range_from_vaddr_range(vm,
1937                                            0, vm->va_limit-1,
1938                                            &pde_lo, &pde_hi);
1939                 vm->pdes.num_pdes = pde_hi + 1;
1940         }
1941
1942         vm->pdes.ptes[gmmu_page_size_small] =
1943                 kzalloc(sizeof(struct page_table_gk20a) *
1944                         vm->pdes.num_pdes, GFP_KERNEL);
1945
1946         vm->pdes.ptes[gmmu_page_size_big] =
1947                 kzalloc(sizeof(struct page_table_gk20a) *
1948                         vm->pdes.num_pdes, GFP_KERNEL);
1949
1950         if (!(vm->pdes.ptes[gmmu_page_size_small] &&
1951               vm->pdes.ptes[gmmu_page_size_big]))
1952                 return -ENOMEM;
1953
1954         nvhost_dbg_info("init space for va_limit=0x%llx num_pdes=%d",
1955                    vm->va_limit, vm->pdes.num_pdes);
1956
1957         /* allocate the page table directory */
1958         err = alloc_gmmu_pages(vm, 0, &vm->pdes.ref,
1959                                &vm->pdes.sgt);
1960         if (err)
1961                 return -ENOMEM;
1962
1963         err = map_gmmu_pages(vm->pdes.ref, vm->pdes.sgt, &vm->pdes.kv);
1964         if (err) {
1965                 free_gmmu_pages(vm, vm->pdes.ref, vm->pdes.sgt, 0);
1966                 return -ENOMEM;
1967         }
1968         nvhost_dbg(dbg_pte, "pdes.kv = 0x%p, pdes.phys = 0x%llx",
1969                         vm->pdes.kv,
1970                         gk20a_mm_iova_addr(vm->pdes.sgt->sgl));
1971         /* we could release vm->pdes.kv but it's only one page... */
1972
1973
1974         /* low-half: alloc small pages */
1975         /* high-half: alloc big pages */
1976         vma_size = mm->channel.size >> 1;
1977
1978         snprintf(name, sizeof(name), "gk20a_as_%d-%dKB", as_share->id,
1979                  gmmu_page_sizes[gmmu_page_size_small]>>10);
1980         num_pages = (u32)(vma_size >> gmmu_page_shifts[gmmu_page_size_small]);
1981
1982         /* num_pages above is without regard to the low-side hole. */
1983         low_hole_pages = (vm->va_start >>
1984                           gmmu_page_shifts[gmmu_page_size_small]);
1985
1986         nvhost_allocator_init(&vm->vma[gmmu_page_size_small], name,
1987               low_hole_pages,             /* start */
1988               num_pages - low_hole_pages, /* length */
1989               1);                         /* align */
1990
1991         snprintf(name, sizeof(name), "gk20a_as_%d-%dKB", as_share->id,
1992                  gmmu_page_sizes[gmmu_page_size_big]>>10);
1993
1994         num_pages = (u32)(vma_size >> gmmu_page_shifts[gmmu_page_size_big]);
1995         nvhost_allocator_init(&vm->vma[gmmu_page_size_big], name,
1996                               num_pages, /* start */
1997                               num_pages, /* length */
1998                               1); /* align */
1999
2000         vm->mapped_buffers = RB_ROOT;
2001
2002         mutex_init(&vm->update_gmmu_lock);
2003         kref_init(&vm->ref);
2004         INIT_LIST_HEAD(&vm->reserved_va_list);
2005
2006         vm->enable_ctag = true;
2007
2008         return 0;
2009 }
2010
2011
2012 static int gk20a_as_release_share(struct nvhost_as_share *as_share)
2013 {
2014         struct vm_gk20a *vm = (struct vm_gk20a *)as_share->priv;
2015
2016         nvhost_dbg_fn("");
2017
2018         vm->as_share = NULL;
2019
2020         /* put as reference to vm */
2021         gk20a_vm_put(vm);
2022
2023         as_share->priv = NULL;
2024
2025         return 0;
2026 }
2027
2028
2029 static int gk20a_as_alloc_space(struct nvhost_as_share *as_share,
2030                                 struct nvhost_as_alloc_space_args *args)
2031
2032 {       int err = -ENOMEM;
2033         int pgsz_idx;
2034         u32 start_page_nr;
2035         struct nvhost_allocator *vma;
2036         struct vm_gk20a *vm = (struct vm_gk20a *)as_share->priv;
2037         struct vm_reserved_va_node *va_node;
2038         u64 vaddr_start = 0;
2039
2040         nvhost_dbg_fn("flags=0x%x pgsz=0x%x nr_pages=0x%x o/a=0x%llx",
2041                         args->flags, args->page_size, args->pages,
2042                         args->o_a.offset);
2043
2044         /* determine pagesz idx */
2045         for (pgsz_idx = gmmu_page_size_small;
2046              pgsz_idx < gmmu_nr_page_sizes;
2047              pgsz_idx++) {
2048                 if (gmmu_page_sizes[pgsz_idx] == args->page_size)
2049                         break;
2050         }
2051
2052         if (pgsz_idx >= gmmu_nr_page_sizes) {
2053                 err = -EINVAL;
2054                 goto clean_up;
2055         }
2056
2057         va_node = kzalloc(sizeof(*va_node), GFP_KERNEL);
2058         if (!va_node) {
2059                 err = -ENOMEM;
2060                 goto clean_up;
2061         }
2062
2063         if (args->flags & NVHOST_AS_ALLOC_SPACE_FLAGS_SPARSE &&
2064             pgsz_idx != gmmu_page_size_big) {
2065                 err = -ENOSYS;
2066                 goto clean_up;
2067         }
2068
2069         start_page_nr = 0;
2070         if (args->flags & NVHOST_AS_ALLOC_SPACE_FLAGS_FIXED_OFFSET)
2071                 start_page_nr = (u32)(args->o_a.offset >>
2072                                       gmmu_page_shifts[pgsz_idx]);
2073
2074         vma = &vm->vma[pgsz_idx];
2075         err = vma->alloc(vma, &start_page_nr, args->pages);
2076         if (err) {
2077                 kfree(va_node);
2078                 goto clean_up;
2079         }
2080
2081         vaddr_start = (u64)start_page_nr << gmmu_page_shifts[pgsz_idx];
2082
2083         va_node->vaddr_start = vaddr_start;
2084         va_node->size = (u64)args->page_size * (u64)args->pages;
2085         va_node->pgsz_idx = args->page_size;
2086         INIT_LIST_HEAD(&va_node->va_buffers_list);
2087         INIT_LIST_HEAD(&va_node->reserved_va_list);
2088
2089         mutex_lock(&vm->update_gmmu_lock);
2090
2091         /* mark that we need to use sparse mappings here */
2092         if (args->flags & NVHOST_AS_ALLOC_SPACE_FLAGS_SPARSE) {
2093                 err = gk20a_vm_put_empty(vm, vaddr_start, args->pages,
2094                                          pgsz_idx);
2095                 if (err) {
2096                         mutex_unlock(&vm->update_gmmu_lock);
2097                         vma->free(vma, start_page_nr, args->pages);
2098                         kfree(va_node);
2099                         goto clean_up;
2100                 }
2101
2102                 va_node->sparse = true;
2103         }
2104
2105         list_add_tail(&va_node->reserved_va_list, &vm->reserved_va_list);
2106
2107         mutex_unlock(&vm->update_gmmu_lock);
2108
2109         args->o_a.offset = vaddr_start;
2110
2111 clean_up:
2112         return err;
2113 }
2114
2115 static int gk20a_as_free_space(struct nvhost_as_share *as_share,
2116                                struct nvhost_as_free_space_args *args)
2117 {
2118         int err = -ENOMEM;
2119         int pgsz_idx;
2120         u32 start_page_nr;
2121         struct nvhost_allocator *vma;
2122         struct vm_gk20a *vm = (struct vm_gk20a *)as_share->priv;
2123         struct vm_reserved_va_node *va_node;
2124
2125         nvhost_dbg_fn("pgsz=0x%x nr_pages=0x%x o/a=0x%llx", args->page_size,
2126                         args->pages, args->offset);
2127
2128         /* determine pagesz idx */
2129         for (pgsz_idx = gmmu_page_size_small;
2130              pgsz_idx < gmmu_nr_page_sizes;
2131              pgsz_idx++) {
2132                 if (gmmu_page_sizes[pgsz_idx] == args->page_size)
2133                         break;
2134         }
2135
2136         if (pgsz_idx >= gmmu_nr_page_sizes) {
2137                 err = -EINVAL;
2138                 goto clean_up;
2139         }
2140
2141         start_page_nr = (u32)(args->offset >>
2142                               gmmu_page_shifts[pgsz_idx]);
2143
2144         vma = &vm->vma[pgsz_idx];
2145         err = vma->free(vma, start_page_nr, args->pages);
2146
2147         if (err)
2148                 goto clean_up;
2149
2150         mutex_lock(&vm->update_gmmu_lock);
2151         va_node = addr_to_reservation(vm, args->offset);
2152         if (va_node) {
2153                 struct mapped_buffer_node *buffer;
2154
2155                 /* there is no need to unallocate the buffers in va. Just
2156                  * convert them into normal buffers */
2157
2158                 list_for_each_entry(buffer,
2159                         &va_node->va_buffers_list, va_buffers_list)
2160                         list_del_init(&buffer->va_buffers_list);
2161
2162                 list_del(&va_node->reserved_va_list);
2163
2164                 /* if this was a sparse mapping, free the va */
2165                 if (va_node->sparse)
2166                         __locked_gmmu_unmap(vm,
2167                                 va_node->vaddr_start,
2168                                 va_node->size,
2169                                 va_node->pgsz_idx,
2170                                 false,
2171                                 mem_flag_none);
2172                 kfree(va_node);
2173         }
2174         mutex_unlock(&vm->update_gmmu_lock);
2175
2176 clean_up:
2177         return err;
2178 }
2179
2180 static int gk20a_as_bind_hwctx(struct nvhost_as_share *as_share,
2181                                struct nvhost_hwctx *hwctx)
2182 {
2183         int err = 0;
2184         struct vm_gk20a *vm = (struct vm_gk20a *)as_share->priv;
2185         struct channel_gk20a *c = hwctx->priv;
2186
2187         nvhost_dbg_fn("");
2188
2189         c->vm = vm;
2190         err = channel_gk20a_commit_va(c);
2191         if (err)
2192                 c->vm = 0;
2193
2194         return err;
2195 }
2196
2197 static int gk20a_as_map_buffer(struct nvhost_as_share *as_share,
2198                                int memmgr_fd,
2199                                ulong mem_id,
2200                                u64 *offset_align,
2201                                u32 flags /*NVHOST_AS_MAP_BUFFER_FLAGS_*/)
2202 {
2203         int err = 0;
2204         struct vm_gk20a *vm = (struct vm_gk20a *)as_share->priv;
2205         struct gk20a *g = gk20a_from_vm(vm);
2206         struct mem_mgr *memmgr;
2207         struct mem_handle *r;
2208         u64 ret_va;
2209
2210         nvhost_dbg_fn("");
2211
2212         /* get ref to the memmgr (released on unmap_locked) */
2213         memmgr = nvhost_memmgr_get_mgr_file(memmgr_fd);
2214         if (IS_ERR(memmgr))
2215                 return 0;
2216
2217         /* get ref to the mem handle (released on unmap_locked) */
2218         r = nvhost_memmgr_get(memmgr, mem_id, g->dev);
2219         if (!r) {
2220                 nvhost_memmgr_put_mgr(memmgr);
2221                 return 0;
2222         }
2223
2224         ret_va = gk20a_vm_map(vm, memmgr, r, *offset_align,
2225                         flags, 0/*no kind here, to be removed*/, NULL, true,
2226                         mem_flag_none);
2227         *offset_align = ret_va;
2228         if (!ret_va) {
2229                 nvhost_memmgr_put(memmgr, r);
2230                 nvhost_memmgr_put_mgr(memmgr);
2231                 err = -EINVAL;
2232         }
2233
2234         return err;
2235 }
2236
2237 static int gk20a_as_unmap_buffer(struct nvhost_as_share *as_share, u64 offset)
2238 {
2239         struct vm_gk20a *vm = (struct vm_gk20a *)as_share->priv;
2240
2241         nvhost_dbg_fn("");
2242
2243         gk20a_vm_unmap_user(vm, offset);
2244         return 0;
2245 }
2246
2247
2248 const struct nvhost_as_moduleops tegra_gk20a_as_ops = {
2249         .alloc_share   = gk20a_as_alloc_share,
2250         .release_share = gk20a_as_release_share,
2251         .alloc_space   = gk20a_as_alloc_space,
2252         .free_space    = gk20a_as_free_space,
2253         .bind_hwctx    = gk20a_as_bind_hwctx,
2254         .map_buffer    = gk20a_as_map_buffer,
2255         .unmap_buffer  = gk20a_as_unmap_buffer,
2256 };
2257
2258 int gk20a_init_bar1_vm(struct mm_gk20a *mm)
2259 {
2260         int err;
2261         phys_addr_t inst_pa;
2262         void *inst_ptr;
2263         struct vm_gk20a *vm = &mm->bar1.vm;
2264         struct gk20a *g = gk20a_from_mm(mm);
2265         struct device *d = dev_from_gk20a(g);
2266         struct inst_desc *inst_block = &mm->bar1.inst_block;
2267         u64 pde_addr;
2268         u32 pde_addr_lo;
2269         u32 pde_addr_hi;
2270
2271         vm->mm = mm;
2272
2273         mm->bar1.aperture_size = bar1_aperture_size_mb_gk20a() << 20;
2274
2275         nvhost_dbg_info("bar1 vm size = 0x%x", mm->bar1.aperture_size);
2276
2277         vm->va_start = mm->pde_stride * 1;
2278         vm->va_limit = mm->bar1.aperture_size;
2279
2280         {
2281                 u32 pde_lo, pde_hi;
2282                 pde_range_from_vaddr_range(vm,
2283                                            0, vm->va_limit-1,
2284                                            &pde_lo, &pde_hi);
2285                 vm->pdes.num_pdes = pde_hi + 1;
2286         }
2287
2288         /* bar1 is likely only to ever use/need small page sizes. */
2289         /* But just in case, for now... arrange for both.*/
2290         vm->pdes.ptes[gmmu_page_size_small] =
2291                 kzalloc(sizeof(struct page_table_gk20a) *
2292                         vm->pdes.num_pdes, GFP_KERNEL);
2293
2294         vm->pdes.ptes[gmmu_page_size_big] =
2295                 kzalloc(sizeof(struct page_table_gk20a) *
2296                         vm->pdes.num_pdes, GFP_KERNEL);
2297
2298         if (!(vm->pdes.ptes[gmmu_page_size_small] &&
2299               vm->pdes.ptes[gmmu_page_size_big]))
2300                 return -ENOMEM;
2301
2302         nvhost_dbg_info("init space for bar1 va_limit=0x%llx num_pdes=%d",
2303                    vm->va_limit, vm->pdes.num_pdes);
2304
2305
2306         /* allocate the page table directory */
2307         err = alloc_gmmu_pages(vm, 0, &vm->pdes.ref,
2308                                &vm->pdes.sgt);
2309         if (err)
2310                 goto clean_up;
2311
2312         err = map_gmmu_pages(vm->pdes.ref, vm->pdes.sgt, &vm->pdes.kv);
2313         if (err) {
2314                 free_gmmu_pages(vm, vm->pdes.ref, vm->pdes.sgt, 0);
2315                 goto clean_up;
2316         }
2317         nvhost_dbg(dbg_pte, "bar 1 pdes.kv = 0x%p, pdes.phys = 0x%llx",
2318                         vm->pdes.kv, gk20a_mm_iova_addr(vm->pdes.sgt->sgl));
2319         /* we could release vm->pdes.kv but it's only one page... */
2320
2321         pde_addr = gk20a_mm_iova_addr(vm->pdes.sgt->sgl);
2322         pde_addr_lo = u64_lo32(pde_addr >> 12);
2323         pde_addr_hi = u64_hi32(pde_addr);
2324
2325         nvhost_dbg_info("pde pa=0x%llx pde_addr_lo=0x%x pde_addr_hi=0x%x",
2326                 (u64)gk20a_mm_iova_addr(vm->pdes.sgt->sgl),
2327                 pde_addr_lo, pde_addr_hi);
2328
2329         /* allocate instance mem for bar1 */
2330         inst_block->size = ram_in_alloc_size_v();
2331         inst_block->cpuva = dma_alloc_coherent(d, inst_block->size,
2332                                 &inst_block->iova, GFP_KERNEL);
2333         if (!inst_block->cpuva) {
2334                 nvhost_err(d, "%s: memory allocation failed\n", __func__);
2335                 err = -ENOMEM;
2336                 goto clean_up;
2337         }
2338
2339         inst_block->cpu_pa = gk20a_get_phys_from_iova(d, inst_block->iova);
2340         if (!inst_block->cpu_pa) {
2341                 nvhost_err(d, "%s: failed to get phys address\n", __func__);
2342                 err = -ENOMEM;
2343                 goto clean_up;
2344         }
2345
2346         inst_pa = inst_block->cpu_pa;
2347         inst_ptr = inst_block->cpuva;
2348
2349         nvhost_dbg_info("bar1 inst block physical phys = 0x%llx, kv = 0x%p",
2350                 (u64)inst_pa, inst_ptr);
2351
2352         memset(inst_ptr, 0, ram_fc_size_val_v());
2353
2354         mem_wr32(inst_ptr, ram_in_page_dir_base_lo_w(),
2355                 ram_in_page_dir_base_target_vid_mem_f() |
2356                 ram_in_page_dir_base_vol_true_f() |
2357                 ram_in_page_dir_base_lo_f(pde_addr_lo));
2358
2359         mem_wr32(inst_ptr, ram_in_page_dir_base_hi_w(),
2360                 ram_in_page_dir_base_hi_f(pde_addr_hi));
2361
2362         mem_wr32(inst_ptr, ram_in_adr_limit_lo_w(),
2363                  u64_lo32(vm->va_limit) | 0xFFF);
2364
2365         mem_wr32(inst_ptr, ram_in_adr_limit_hi_w(),
2366                 ram_in_adr_limit_hi_f(u64_hi32(vm->va_limit)));
2367
2368         nvhost_dbg_info("bar1 inst block ptr: %08llx",  (u64)inst_pa);
2369         nvhost_allocator_init(&vm->vma[gmmu_page_size_small], "gk20a_bar1",
2370                               1,/*start*/
2371                               (vm->va_limit >> 12) - 1 /* length*/,
2372                               1); /* align */
2373         /* initialize just in case we try to use it anyway */
2374         nvhost_allocator_init(&vm->vma[gmmu_page_size_big], "gk20a_bar1-unused",
2375                               0x0badc0de, /* start */
2376                               1, /* length */
2377                               1); /* align */
2378
2379         vm->mapped_buffers = RB_ROOT;
2380
2381         mutex_init(&vm->update_gmmu_lock);
2382         kref_init(&vm->ref);
2383         INIT_LIST_HEAD(&vm->reserved_va_list);
2384
2385         return 0;
2386
2387 clean_up:
2388         /* free, etc */
2389         if (inst_block->cpuva)
2390                 dma_free_coherent(d, inst_block->size,
2391                         inst_block->cpuva, inst_block->iova);
2392         inst_block->cpuva = NULL;
2393         inst_block->iova = 0;
2394         return err;
2395 }
2396
2397 /* pmu vm, share channel_vm interfaces */
2398 int gk20a_init_pmu_vm(struct mm_gk20a *mm)
2399 {
2400         int err;
2401         phys_addr_t inst_pa;
2402         void *inst_ptr;
2403         struct vm_gk20a *vm = &mm->pmu.vm;
2404         struct gk20a *g = gk20a_from_mm(mm);
2405         struct device *d = dev_from_gk20a(g);
2406         struct inst_desc *inst_block = &mm->pmu.inst_block;
2407         u64 pde_addr;
2408         u32 pde_addr_lo;
2409         u32 pde_addr_hi;
2410
2411         vm->mm = mm;
2412
2413         mm->pmu.aperture_size = GK20A_PMU_VA_SIZE;
2414
2415         nvhost_dbg_info("pmu vm size = 0x%x", mm->pmu.aperture_size);
2416
2417         vm->va_start  = GK20A_PMU_VA_START;
2418         vm->va_limit  = vm->va_start + mm->pmu.aperture_size;
2419
2420         {
2421                 u32 pde_lo, pde_hi;
2422                 pde_range_from_vaddr_range(vm,
2423                                            0, vm->va_limit-1,
2424                                            &pde_lo, &pde_hi);
2425                 vm->pdes.num_pdes = pde_hi + 1;
2426         }
2427
2428         /* The pmu is likely only to ever use/need small page sizes. */
2429         /* But just in case, for now... arrange for both.*/
2430         vm->pdes.ptes[gmmu_page_size_small] =
2431                 kzalloc(sizeof(struct page_table_gk20a) *
2432                         vm->pdes.num_pdes, GFP_KERNEL);
2433
2434         vm->pdes.ptes[gmmu_page_size_big] =
2435                 kzalloc(sizeof(struct page_table_gk20a) *
2436                         vm->pdes.num_pdes, GFP_KERNEL);
2437
2438         if (!(vm->pdes.ptes[gmmu_page_size_small] &&
2439               vm->pdes.ptes[gmmu_page_size_big]))
2440                 return -ENOMEM;
2441
2442         nvhost_dbg_info("init space for pmu va_limit=0x%llx num_pdes=%d",
2443                    vm->va_limit, vm->pdes.num_pdes);
2444
2445         /* allocate the page table directory */
2446         err = alloc_gmmu_pages(vm, 0, &vm->pdes.ref,
2447                                &vm->pdes.sgt);
2448         if (err)
2449                 goto clean_up;
2450
2451         err = map_gmmu_pages(vm->pdes.ref, vm->pdes.sgt, &vm->pdes.kv);
2452         if (err) {
2453                 free_gmmu_pages(vm, vm->pdes.ref, vm->pdes.sgt, 0);
2454                 goto clean_up;
2455         }
2456         nvhost_dbg_info("pmu pdes phys @ 0x%llx",
2457                         (u64)gk20a_mm_iova_addr(vm->pdes.sgt->sgl));
2458         /* we could release vm->pdes.kv but it's only one page... */
2459
2460         pde_addr = gk20a_mm_iova_addr(vm->pdes.sgt->sgl);
2461         pde_addr_lo = u64_lo32(pde_addr >> 12);
2462         pde_addr_hi = u64_hi32(pde_addr);
2463
2464         nvhost_dbg_info("pde pa=0x%llx pde_addr_lo=0x%x pde_addr_hi=0x%x",
2465                         (u64)pde_addr, pde_addr_lo, pde_addr_hi);
2466
2467         /* allocate instance mem for pmu */
2468         inst_block->size = GK20A_PMU_INST_SIZE;
2469         inst_block->cpuva = dma_alloc_coherent(d, inst_block->size,
2470                                 &inst_block->iova, GFP_KERNEL);
2471         if (!inst_block->cpuva) {
2472                 nvhost_err(d, "%s: memory allocation failed\n", __func__);
2473                 err = -ENOMEM;
2474                 goto clean_up;
2475         }
2476
2477         inst_block->cpu_pa = gk20a_get_phys_from_iova(d, inst_block->iova);
2478         if (!inst_block->cpu_pa) {
2479                 nvhost_err(d, "%s: failed to get phys address\n", __func__);
2480                 err = -ENOMEM;
2481                 goto clean_up;
2482         }
2483
2484         inst_pa = inst_block->cpu_pa;
2485         inst_ptr = inst_block->cpuva;
2486
2487         nvhost_dbg_info("pmu inst block physical addr: 0x%llx", (u64)inst_pa);
2488
2489         memset(inst_ptr, 0, GK20A_PMU_INST_SIZE);
2490
2491         mem_wr32(inst_ptr, ram_in_page_dir_base_lo_w(),
2492                 ram_in_page_dir_base_target_vid_mem_f() |
2493                 ram_in_page_dir_base_vol_true_f() |
2494                 ram_in_page_dir_base_lo_f(pde_addr_lo));
2495
2496         mem_wr32(inst_ptr, ram_in_page_dir_base_hi_w(),
2497                 ram_in_page_dir_base_hi_f(pde_addr_hi));
2498
2499         mem_wr32(inst_ptr, ram_in_adr_limit_lo_w(),
2500                  u64_lo32(vm->va_limit) | 0xFFF);
2501
2502         mem_wr32(inst_ptr, ram_in_adr_limit_hi_w(),
2503                 ram_in_adr_limit_hi_f(u64_hi32(vm->va_limit)));
2504
2505         nvhost_allocator_init(&vm->vma[gmmu_page_size_small], "gk20a_pmu",
2506                               (vm->va_start >> 12), /* start */
2507                               (vm->va_limit - vm->va_start) >> 12, /*length*/
2508                               1); /* align */
2509         /* initialize just in case we try to use it anyway */
2510         nvhost_allocator_init(&vm->vma[gmmu_page_size_big], "gk20a_pmu-unused",
2511                               0x0badc0de, /* start */
2512                               1, /* length */
2513                               1); /* align */
2514
2515
2516         vm->mapped_buffers = RB_ROOT;
2517
2518         mutex_init(&vm->update_gmmu_lock);
2519         kref_init(&vm->ref);
2520         INIT_LIST_HEAD(&vm->reserved_va_list);
2521
2522         return 0;
2523
2524 clean_up:
2525         /* free, etc */
2526         if (inst_block->cpuva)
2527                 dma_free_coherent(d, inst_block->size,
2528                         inst_block->cpuva, inst_block->iova);
2529         inst_block->cpuva = NULL;
2530         inst_block->iova = 0;
2531         return err;
2532 }
2533
2534 /* Flushes the compression bit cache as well as "data".
2535  * Note: the name here is a bit of a misnomer.  ELPG uses this
2536  * internally... but ELPG doesn't have to be on to do it manually.
2537  */
2538 static void gk20a_mm_g_elpg_flush_locked(struct gk20a *g)
2539 {
2540         u32 data;
2541         s32 retry = 100;
2542
2543         nvhost_dbg_fn("");
2544
2545         /* Make sure all previous writes are committed to the L2. There's no
2546            guarantee that writes are to DRAM. This will be a sysmembar internal
2547            to the L2. */
2548         gk20a_writel(g, ltc_ltss_g_elpg_r(),
2549                      ltc_ltss_g_elpg_flush_pending_f());
2550         do {
2551                 data = gk20a_readl(g, ltc_ltss_g_elpg_r());
2552
2553                 if (ltc_ltss_g_elpg_flush_v(data) ==
2554                     ltc_ltss_g_elpg_flush_pending_v()) {
2555                         nvhost_dbg_info("g_elpg_flush 0x%x", data);
2556                         retry--;
2557                         usleep_range(20, 40);
2558                 } else
2559                         break;
2560         } while (retry >= 0);
2561
2562         if (retry < 0)
2563                 nvhost_warn(dev_from_gk20a(g),
2564                             "g_elpg_flush too many retries");
2565
2566 }
2567
2568 void gk20a_mm_fb_flush(struct gk20a *g)
2569 {
2570         struct mm_gk20a *mm = &g->mm;
2571         u32 data;
2572         s32 retry = 100;
2573
2574         nvhost_dbg_fn("");
2575
2576         mutex_lock(&mm->l2_op_lock);
2577
2578         gk20a_mm_g_elpg_flush_locked(g);
2579
2580         /* Make sure all previous writes are committed to the L2. There's no
2581            guarantee that writes are to DRAM. This will be a sysmembar internal
2582            to the L2. */
2583         gk20a_writel(g, flush_fb_flush_r(),
2584                 flush_fb_flush_pending_busy_f());
2585
2586         do {
2587                 data = gk20a_readl(g, flush_fb_flush_r());
2588
2589                 if (flush_fb_flush_outstanding_v(data) ==
2590                         flush_fb_flush_outstanding_true_v() ||
2591                     flush_fb_flush_pending_v(data) ==
2592                         flush_fb_flush_pending_busy_v()) {
2593                                 nvhost_dbg_info("fb_flush 0x%x", data);
2594                                 retry--;
2595                                 usleep_range(20, 40);
2596                 } else
2597                         break;
2598         } while (retry >= 0);
2599
2600         if (retry < 0)
2601                 nvhost_warn(dev_from_gk20a(g),
2602                         "fb_flush too many retries");
2603
2604         mutex_unlock(&mm->l2_op_lock);
2605 }
2606
2607 static void gk20a_mm_l2_invalidate_locked(struct gk20a *g)
2608 {
2609         u32 data;
2610         s32 retry = 200;
2611
2612         /* Invalidate any clean lines from the L2 so subsequent reads go to
2613            DRAM. Dirty lines are not affected by this operation. */
2614         gk20a_writel(g, flush_l2_system_invalidate_r(),
2615                 flush_l2_system_invalidate_pending_busy_f());
2616
2617         do {
2618                 data = gk20a_readl(g, flush_l2_system_invalidate_r());
2619
2620                 if (flush_l2_system_invalidate_outstanding_v(data) ==
2621                         flush_l2_system_invalidate_outstanding_true_v() ||
2622                     flush_l2_system_invalidate_pending_v(data) ==
2623                         flush_l2_system_invalidate_pending_busy_v()) {
2624                                 nvhost_dbg_info("l2_system_invalidate 0x%x",
2625                                                 data);
2626                                 retry--;
2627                                 usleep_range(20, 40);
2628                 } else
2629                         break;
2630         } while (retry >= 0);
2631
2632         if (retry < 0)
2633                 nvhost_warn(dev_from_gk20a(g),
2634                         "l2_system_invalidate too many retries");
2635 }
2636
2637 void gk20a_mm_l2_invalidate(struct gk20a *g)
2638 {
2639         struct mm_gk20a *mm = &g->mm;
2640         mutex_lock(&mm->l2_op_lock);
2641         gk20a_mm_l2_invalidate_locked(g);
2642         mutex_unlock(&mm->l2_op_lock);
2643 }
2644
2645 void gk20a_mm_l2_flush(struct gk20a *g, bool invalidate)
2646 {
2647         struct mm_gk20a *mm = &g->mm;
2648         u32 data;
2649         s32 retry = 200;
2650
2651         nvhost_dbg_fn("");
2652
2653         mutex_lock(&mm->l2_op_lock);
2654
2655         /* Flush all dirty lines from the L2 to DRAM. Lines are left in the L2
2656            as clean, so subsequent reads might hit in the L2. */
2657         gk20a_writel(g, flush_l2_flush_dirty_r(),
2658                 flush_l2_flush_dirty_pending_busy_f());
2659
2660         do {
2661                 data = gk20a_readl(g, flush_l2_flush_dirty_r());
2662
2663                 if (flush_l2_flush_dirty_outstanding_v(data) ==
2664                         flush_l2_flush_dirty_outstanding_true_v() ||
2665                     flush_l2_flush_dirty_pending_v(data) ==
2666                         flush_l2_flush_dirty_pending_busy_v()) {
2667                                 nvhost_dbg_info("l2_flush_dirty 0x%x", data);
2668                                 retry--;
2669                                 usleep_range(20, 40);
2670                 } else
2671                         break;
2672         } while (retry >= 0);
2673
2674         if (retry < 0)
2675                 nvhost_warn(dev_from_gk20a(g),
2676                         "l2_flush_dirty too many retries");
2677
2678         if (invalidate)
2679                 gk20a_mm_l2_invalidate_locked(g);
2680
2681         mutex_unlock(&mm->l2_op_lock);
2682 }
2683
2684
2685 int gk20a_vm_find_buffer(struct vm_gk20a *vm, u64 gpu_va,
2686                          struct mem_mgr **mgr, struct mem_handle **r,
2687                          u64 *offset)
2688 {
2689         struct mapped_buffer_node *mapped_buffer;
2690
2691         nvhost_dbg_fn("gpu_va=0x%llx", gpu_va);
2692
2693         mutex_lock(&vm->update_gmmu_lock);
2694
2695         mapped_buffer = find_mapped_buffer_range_locked(&vm->mapped_buffers,
2696                                                         gpu_va);
2697         if (!mapped_buffer) {
2698                 mutex_unlock(&vm->update_gmmu_lock);
2699                 return -EINVAL;
2700         }
2701
2702         *mgr = mapped_buffer->memmgr;
2703         *r = mapped_buffer->handle_ref;
2704         *offset = gpu_va - mapped_buffer->addr;
2705
2706         mutex_unlock(&vm->update_gmmu_lock);
2707
2708         return 0;
2709 }
2710
2711 void gk20a_mm_tlb_invalidate(struct vm_gk20a *vm)
2712 {
2713         struct mm_gk20a *mm = vm->mm;
2714         struct gk20a *g = gk20a_from_vm(vm);
2715         u32 addr_lo = u64_lo32(gk20a_mm_iova_addr(vm->pdes.sgt->sgl) >> 12);
2716         u32 data;
2717         s32 retry = 200;
2718
2719         nvhost_dbg_fn("");
2720
2721         /* pagetables are considered sw states which are preserved after
2722            prepare_poweroff. When gk20a deinit releases those pagetables,
2723            common code in vm unmap path calls tlb invalidate that touches
2724            hw. Use the power_on flag to skip tlb invalidation when gpu
2725            power is turned off */
2726
2727         if (!g->power_on)
2728                 return;
2729
2730         /* No need to invalidate if tlb is clean */
2731         mutex_lock(&vm->update_gmmu_lock);
2732         if (!vm->tlb_dirty) {
2733                 mutex_unlock(&vm->update_gmmu_lock);
2734                 return;
2735         }
2736         vm->tlb_dirty = false;
2737         mutex_unlock(&vm->update_gmmu_lock);
2738
2739         mutex_lock(&mm->tlb_lock);
2740         do {
2741                 data = gk20a_readl(g, fb_mmu_ctrl_r());
2742                 if (fb_mmu_ctrl_pri_fifo_space_v(data) != 0)
2743                         break;
2744                 usleep_range(20, 40);
2745                 retry--;
2746         } while (retry >= 0);
2747
2748         if (retry < 0)
2749                 nvhost_warn(dev_from_gk20a(g),
2750                         "wait mmu fifo space too many retries");
2751
2752         gk20a_writel(g, fb_mmu_invalidate_pdb_r(),
2753                 fb_mmu_invalidate_pdb_addr_f(addr_lo) |
2754                 fb_mmu_invalidate_pdb_aperture_vid_mem_f());
2755
2756         /* this is a sledgehammer, it would seem */
2757         gk20a_writel(g, fb_mmu_invalidate_r(),
2758                 fb_mmu_invalidate_all_pdb_true_f() |
2759                 fb_mmu_invalidate_all_va_true_f() |
2760                 fb_mmu_invalidate_trigger_true_f());
2761
2762         do {
2763                 data = gk20a_readl(g, fb_mmu_ctrl_r());
2764                 if (fb_mmu_ctrl_pri_fifo_empty_v(data) !=
2765                         fb_mmu_ctrl_pri_fifo_empty_false_f())
2766                         break;
2767                 retry--;
2768                 usleep_range(20, 40);
2769         } while (retry >= 0);
2770
2771         if (retry < 0)
2772                 nvhost_warn(dev_from_gk20a(g),
2773                         "mmu invalidate too many retries");
2774
2775         mutex_unlock(&mm->tlb_lock);
2776 }
2777
2778 #if 0 /* VM DEBUG */
2779
2780 /* print pdes/ptes for a gpu virtual address range under a vm */
2781 void gk20a_mm_dump_vm(struct vm_gk20a *vm,
2782                 u64 va_begin, u64 va_end, char *label)
2783 {
2784         struct mem_mgr *client = mem_mgr_from_vm(vm);
2785         struct mm_gk20a *mm = vm->mm;
2786         struct page_table_gk20a *pte_s;
2787         u64 pde_va, pte_va;
2788         u32 pde_i, pde_lo, pde_hi;
2789         u32 pte_i, pte_lo, pte_hi;
2790         u32 pte_space_page_cur, pte_space_offset_cur;
2791         u32 pte_space_page_offset;
2792         u32 num_ptes, page_size;
2793         void *pde, *pte;
2794         phys_addr_t pte_addr;
2795         int err;
2796
2797         pde_range_from_vaddr_range(vm, va_begin, va_end,
2798                         &pde_lo, &pde_hi);
2799
2800         nvhost_err(dev_from_vm(vm),
2801                 "%s page table entries for gpu va 0x%016llx -> 0x%016llx\n",
2802                 label, va_begin, va_end);
2803
2804         for (pde_i = pde_lo; pde_i <= pde_hi; pde_i++) {
2805                 pde = pde_from_index(vm, pde_i);
2806                 pde_va = pde_i * mm->pde_stride;
2807                 nvhost_err(dev_from_vm(vm),
2808                         "\t[0x%016llx -> 0x%016llx] pde @ 0x%08x: 0x%08x, 0x%08x\n",
2809                         pde_va, pde_va + mm->pde_stride - 1,
2810                         gk20a_mm_iova_addr(vm->pdes.sgt->sgl)
2811                                 + pde_i * gmmu_pde__size_v(),
2812                         mem_rd32(pde, 0), mem_rd32(pde, 1));
2813
2814                 pte_s = vm->pdes.ptes[pte_s->pgsz_idx] + pde_i;
2815
2816                 num_ptes = mm->page_table_sizing[pte_s->pgsz_idx].num_ptes;
2817                 page_size = mm->pde_stride / num_ptes;
2818                 pte_lo = 0;
2819                 pte_hi = num_ptes - 1;
2820
2821                 pte_space_page_offset_from_index(pte_lo,
2822                                                 &pte_space_page_cur,
2823                                                 &pte_space_offset_cur);
2824
2825                 err = map_gmmu_pages(pte_s->ref, pte_s->sgt, &pte);
2826                 pte_s->sgt = nvhost_memmgr_sg_table(client, pte_s->ref);
2827                 if (WARN_ON(IS_ERR(pte_s->sgt)))
2828                         return;
2829                 pte_addr = gk20a_mm_iova_addr(pte_s->sgt->sgl);
2830
2831                 for (pte_i = pte_lo; pte_i <= pte_hi; pte_i++) {
2832
2833                         pte_va = pde_va + pte_i * page_size;
2834
2835                         if (pte_va < va_begin)
2836                                 continue;
2837                         if (pte_va > va_end)
2838                                 break;
2839
2840                         pte_space_page_offset = pte_i;
2841
2842                         nvhost_err(dev_from_vm(vm),
2843                                 "\t\t[0x%016llx -> 0x%016llx] pte @ 0x%08x : 0x%08x, 0x%08x\n",
2844                                 pte_va, pte_va + page_size - 1,
2845                                 pte_addr + pte_i * gmmu_pte__size_v(),
2846                                 mem_rd32(pte + pte_space_page_offset * 8, 0),
2847                                 mem_rd32(pte + pte_space_page_offset * 8, 1));
2848                 }
2849
2850                 unmap_gmmu_pages(pte_s->ref, pte_s->sgt, pte);
2851         }
2852 }
2853 #endif /* VM DEBUG */
2854
2855 int gk20a_mm_suspend(struct gk20a *g)
2856 {
2857         nvhost_dbg_fn("");
2858
2859         gk20a_mm_fb_flush(g);
2860         gk20a_mm_l2_flush(g, true);
2861
2862         nvhost_dbg_fn("done");
2863         return 0;
2864 }
2865
2866 void gk20a_mm_ltc_isr(struct gk20a *g)
2867 {
2868         u32 intr;
2869
2870         intr = gk20a_readl(g, ltc_ltc0_ltss_intr_r());
2871         nvhost_err(dev_from_gk20a(g), "ltc: %08x\n", intr);
2872         gk20a_writel(g, ltc_ltc0_ltss_intr_r(), intr);
2873 }
2874
2875 bool gk20a_mm_mmu_debug_mode_enabled(struct gk20a *g)
2876 {
2877         u32 debug_ctrl = gk20a_readl(g, fb_mmu_debug_ctrl_r());
2878         return fb_mmu_debug_ctrl_debug_v(debug_ctrl) ==
2879                 fb_mmu_debug_ctrl_debug_enabled_v();
2880 }
2881