video: tegra: host: gk20a: Fix duplication checks
[linux-3.10.git] / drivers / video / tegra / host / gk20a / mm_gk20a.c
1 /*
2  * drivers/video/tegra/host/gk20a/mm_gk20a.c
3  *
4  * GK20A memory management
5  *
6  * Copyright (c) 2011-2014, NVIDIA CORPORATION.  All rights reserved.
7  *
8  * This program is free software; you can redistribute it and/or modify it
9  * under the terms and conditions of the GNU General Public License,
10  * version 2, as published by the Free Software Foundation.
11  *
12  * This program is distributed in the hope it will be useful, but WITHOUT
13  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
15  * more details.
16  *
17  * You should have received a copy of the GNU General Public License along with
18  * this program; if not, write to the Free Software Foundation, Inc.,
19  * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
20  */
21
22 #include <linux/delay.h>
23 #include <linux/highmem.h>
24 #include <linux/log2.h>
25 #include <linux/nvhost.h>
26 #include <linux/scatterlist.h>
27 #include <linux/nvmap.h>
28 #include <linux/tegra-soc.h>
29 #include <asm/cacheflush.h>
30
31 #include "dev.h"
32 #include "nvhost_as.h"
33 #include "gk20a.h"
34 #include "mm_gk20a.h"
35 #include "hw_gmmu_gk20a.h"
36 #include "hw_fb_gk20a.h"
37 #include "hw_bus_gk20a.h"
38 #include "hw_ram_gk20a.h"
39 #include "hw_mc_gk20a.h"
40 #include "hw_flush_gk20a.h"
41 #include "hw_ltc_gk20a.h"
42
43 #include "kind_gk20a.h"
44
45 /*
46  * GPU mapping life cycle
47  * ======================
48  *
49  * Kernel mappings
50  * ---------------
51  *
52  * Kernel mappings are created through vm.map(..., false):
53  *
54  *  - Mappings to the same allocations are reused and refcounted.
55  *  - This path does not support deferred unmapping (i.e. kernel must wait for
56  *    all hw operations on the buffer to complete before unmapping).
57  *  - References to memmgr and mem_handle are owned and managed by the (kernel)
58  *    clients of the gk20a_vm layer.
59  *
60  *
61  * User space mappings
62  * -------------------
63  *
64  * User space mappings are created through as.map_buffer -> vm.map(..., true):
65  *
66  *  - Mappings to the same allocations are reused and refcounted.
67  *  - This path supports deferred unmapping (i.e. we delay the actual unmapping
68  *    until all hw operations have completed).
69  *  - References to memmgr and mem_handle are owned and managed by the vm_gk20a
70  *    layer itself. vm.map acquires these refs, and sets
71  *    mapped_buffer->own_mem_ref to record that we must release the refs when we
72  *    actually unmap.
73  *
74  */
75
76 static inline int vm_aspace_id(struct vm_gk20a *vm)
77 {
78         /* -1 is bar1 or pmu, etc. */
79         return vm->as_share ? vm->as_share->id : -1;
80 }
81 static inline u32 hi32(u64 f)
82 {
83         return (u32)(f >> 32);
84 }
85 static inline u32 lo32(u64 f)
86 {
87         return (u32)(f & 0xffffffff);
88 }
89
90 #define FLUSH_CPU_DCACHE(va, pa, size)  \
91         do {    \
92                 __cpuc_flush_dcache_area((void *)(va), (size_t)(size)); \
93                 outer_flush_range(pa, pa + (size_t)(size));             \
94         } while (0)
95
96 static void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer);
97 static struct mapped_buffer_node *find_mapped_buffer_locked(
98                                         struct rb_root *root, u64 addr);
99 static struct mapped_buffer_node *find_mapped_buffer_reverse_locked(
100                                 struct rb_root *root, struct mem_handle *r);
101 static int update_gmmu_ptes_locked(struct vm_gk20a *vm,
102                                    enum gmmu_pgsz_gk20a pgsz_idx,
103                                    struct sg_table *sgt,
104                                    u64 first_vaddr, u64 last_vaddr,
105                                    u8 kind_v, u32 ctag_offset, bool cacheable,
106                                    int rw_flag);
107 static void update_gmmu_pde_locked(struct vm_gk20a *vm, u32 i);
108 static void gk20a_vm_remove_support(struct vm_gk20a *vm);
109
110
111 /* note: keep the page sizes sorted lowest to highest here */
112 static const u32 gmmu_page_sizes[gmmu_nr_page_sizes] = { SZ_4K, SZ_128K };
113 static const u32 gmmu_page_shifts[gmmu_nr_page_sizes] = { 12, 17 };
114 static const u64 gmmu_page_offset_masks[gmmu_nr_page_sizes] = { 0xfffLL,
115                                                                 0x1ffffLL };
116 static const u64 gmmu_page_masks[gmmu_nr_page_sizes] = { ~0xfffLL, ~0x1ffffLL };
117
118 static int gk20a_init_mm_reset_enable_hw(struct gk20a *g)
119 {
120         nvhost_dbg_fn("");
121         gk20a_reset(g, mc_enable_pfb_enabled_f()
122                         | mc_enable_l2_enabled_f()
123                         | mc_enable_ce2_enabled_f()
124                         | mc_enable_xbar_enabled_f()
125                         | mc_enable_hub_enabled_f());
126
127         return 0;
128 }
129
130 void gk20a_remove_mm_support(struct mm_gk20a *mm)
131 {
132         struct gk20a *g = mm->g;
133         struct device *d = dev_from_gk20a(g);
134         struct vm_gk20a *vm = &mm->bar1.vm;
135         struct inst_desc *inst_block = &mm->bar1.inst_block;
136
137         nvhost_dbg_fn("");
138
139         if (inst_block->cpuva)
140                 dma_free_coherent(d, inst_block->size,
141                         inst_block->cpuva, inst_block->iova);
142         inst_block->cpuva = NULL;
143         inst_block->iova = 0;
144
145         gk20a_vm_remove_support(vm);
146 }
147
148 int gk20a_init_mm_setup_sw(struct gk20a *g)
149 {
150         struct mm_gk20a *mm = &g->mm;
151         int i;
152
153         nvhost_dbg_fn("");
154
155         if (mm->sw_ready) {
156                 nvhost_dbg_fn("skip init");
157                 return 0;
158         }
159
160         mm->g = g;
161         mutex_init(&mm->tlb_lock);
162         mutex_init(&mm->l2_op_lock);
163         mm->big_page_size = gmmu_page_sizes[gmmu_page_size_big];
164         mm->pde_stride    = mm->big_page_size << 10;
165         mm->pde_stride_shift = ilog2(mm->pde_stride);
166         BUG_ON(mm->pde_stride_shift > 31); /* we have assumptions about this */
167
168         for (i = 0; i < ARRAY_SIZE(gmmu_page_sizes); i++) {
169
170                 u32 num_ptes, pte_space, num_pages;
171
172                 /* assuming "full" page tables */
173                 num_ptes = mm->pde_stride / gmmu_page_sizes[i];
174
175                 pte_space = num_ptes * gmmu_pte__size_v();
176                 /* allocate whole pages */
177                 pte_space = roundup(pte_space, PAGE_SIZE);
178
179                 num_pages = pte_space / PAGE_SIZE;
180                 /* make sure "order" is viable */
181                 BUG_ON(!is_power_of_2(num_pages));
182
183                 mm->page_table_sizing[i].num_ptes = num_ptes;
184                 mm->page_table_sizing[i].order = ilog2(num_pages);
185         }
186
187         /*TBD: make channel vm size configurable */
188         mm->channel.size = 1ULL << NV_GMMU_VA_RANGE;
189
190         nvhost_dbg_info("channel vm size: %dMB", (int)(mm->channel.size >> 20));
191
192         nvhost_dbg_info("small page-size (%dKB) pte array: %dKB",
193                         gmmu_page_sizes[gmmu_page_size_small] >> 10,
194                         (mm->page_table_sizing[gmmu_page_size_small].num_ptes *
195                          gmmu_pte__size_v()) >> 10);
196
197         nvhost_dbg_info("big page-size (%dKB) pte array: %dKB",
198                         gmmu_page_sizes[gmmu_page_size_big] >> 10,
199                         (mm->page_table_sizing[gmmu_page_size_big].num_ptes *
200                          gmmu_pte__size_v()) >> 10);
201
202
203         gk20a_init_bar1_vm(mm);
204
205         gk20a_init_uncompressed_kind_map();
206         gk20a_init_kind_attr();
207
208         mm->remove_support = gk20a_remove_mm_support;
209         mm->sw_ready = true;
210
211         nvhost_dbg_fn("done");
212         return 0;
213 }
214
215 /* make sure gk20a_init_mm_support is called before */
216 static int gk20a_init_mm_setup_hw(struct gk20a *g)
217 {
218         struct mm_gk20a *mm = &g->mm;
219         struct inst_desc *inst_block = &mm->bar1.inst_block;
220         phys_addr_t inst_pa = inst_block->cpu_pa;
221
222         nvhost_dbg_fn("");
223
224         /* set large page size in fb
225          * note this is very early on, can we defer it ? */
226         {
227                 u32 fb_mmu_ctrl = gk20a_readl(g, fb_mmu_ctrl_r());
228
229                 if (gmmu_page_sizes[gmmu_page_size_big] == SZ_128K)
230                         fb_mmu_ctrl = (fb_mmu_ctrl &
231                                        ~fb_mmu_ctrl_vm_pg_size_f(~0x0)) |
232                                 fb_mmu_ctrl_vm_pg_size_128kb_f();
233                 else
234                         BUG_ON(1); /* no support/testing for larger ones yet */
235
236                 gk20a_writel(g, fb_mmu_ctrl_r(), fb_mmu_ctrl);
237         }
238
239         inst_pa = (u32)(inst_pa >> bar1_instance_block_shift_gk20a());
240         nvhost_dbg_info("bar1 inst block ptr: 0x%08x",  (u32)inst_pa);
241
242         /* this is very early in init... can we defer this? */
243         {
244                 gk20a_writel(g, bus_bar1_block_r(),
245                              bus_bar1_block_target_vid_mem_f() |
246                              bus_bar1_block_mode_virtual_f() |
247                              bus_bar1_block_ptr_f(inst_pa));
248         }
249
250         nvhost_dbg_fn("done");
251         return 0;
252 }
253
254 int gk20a_init_mm_support(struct gk20a *g)
255 {
256         u32 err;
257
258         err = gk20a_init_mm_reset_enable_hw(g);
259         if (err)
260                 return err;
261
262         err = gk20a_init_mm_setup_sw(g);
263         if (err)
264                 return err;
265
266         err = gk20a_init_mm_setup_hw(g);
267         if (err)
268                 return err;
269
270         return err;
271 }
272
273 #ifdef TEGRA_GRHOST_GK20A_PHYS_PAGE_TABLES
274 static int alloc_gmmu_pages(struct vm_gk20a *vm, u32 order,
275                             void **handle,
276                             struct sg_table **sgt)
277 {
278         u32 num_pages = 1 << order;
279         u32 len = num_pages * PAGE_SIZE;
280         int err;
281         struct page *pages;
282
283         nvhost_dbg_fn("");
284
285         pages = alloc_pages(GFP_KERNEL, order);
286         if (!pages) {
287                 nvhost_dbg(dbg_pte, "alloc_pages failed\n");
288                 goto err_out;
289         }
290         *sgt = kzalloc(sizeof(*sgt), GFP_KERNEL);
291         if (!sgt) {
292                 nvhost_dbg(dbg_pte, "cannot allocate sg table");
293                 goto err_alloced;
294         }
295         err = sg_alloc_table(*sgt, 1, GFP_KERNEL);
296         if (err) {
297                 nvhost_dbg(dbg_pte, "sg_alloc_table failed\n");
298                 goto err_sg_table;
299         }
300         sg_set_page((*sgt)->sgl, pages, len, 0);
301         *handle = page_address(pages);
302         memset(*handle, 0, len);
303         FLUSH_CPU_DCACHE(*handle, sg_phys((*sgt)->sgl), len);
304
305         return 0;
306
307 err_sg_table:
308         kfree(*sgt);
309 err_alloced:
310         __free_pages(pages, order);
311 err_out:
312         return -ENOMEM;
313 }
314
315 static void free_gmmu_pages(struct vm_gk20a *vm, void *handle,
316                             struct sg_table *sgt, u32 order)
317 {
318         nvhost_dbg_fn("");
319         BUG_ON(sgt == NULL);
320         free_pages((unsigned long)handle, order);
321         sg_free_table(sgt);
322         kfree(sgt);
323 }
324
325 static int map_gmmu_pages(void *handle, struct sg_table *sgt, void **va)
326 {
327         FLUSH_CPU_DCACHE(handle, sg_phys(sgt->sgl), sgt->sgl->length);
328         *va = handle;
329         return 0;
330 }
331
332 static void unmap_gmmu_pages(void *handle, struct sg_table *sgt, u32 *va)
333 {
334         FLUSH_CPU_DCACHE(handle, sg_phys(sgt->sgl), sgt->sgl->length);
335 }
336 #else
337 static int alloc_gmmu_pages(struct vm_gk20a *vm, u32 order,
338                             void **handle,
339                             struct sg_table **sgt)
340 {
341         struct mem_mgr *client = mem_mgr_from_vm(vm);
342         struct mem_handle *r;
343         u32 num_pages = 1 << order;
344         u32 len = num_pages * PAGE_SIZE;
345         void *va;
346
347         nvhost_dbg_fn("");
348
349         r = nvhost_memmgr_alloc(client, len,
350                                 DEFAULT_ALLOC_ALIGNMENT,
351                                 DEFAULT_ALLOC_FLAGS,
352                                 0);
353         if (IS_ERR(r)) {
354                 nvhost_dbg(dbg_pte, "nvmap_alloc failed\n");
355                 goto err_out;
356         }
357         va = nvhost_memmgr_mmap(r);
358         if (!va) {
359                 nvhost_dbg(dbg_pte, "nvmap_mmap failed\n");
360                 goto err_alloced;
361         }
362         memset(va, 0, len);
363         nvhost_memmgr_munmap(r, va);
364
365         *sgt = nvhost_memmgr_pin(client, r, dev_from_vm(vm), mem_flag_none);
366         if (IS_ERR(*sgt)) {
367                 *sgt = NULL;
368                 goto err_alloced;
369         }
370
371         *handle = (void *)r;
372
373         return 0;
374
375 err_alloced:
376         nvhost_memmgr_put(client, r);
377 err_out:
378         return -ENOMEM;
379 }
380
381 static void free_gmmu_pages(struct vm_gk20a *vm, void *handle,
382                             struct sg_table *sgt, u32 order)
383 {
384         struct mem_mgr *client = mem_mgr_from_vm(vm);
385         nvhost_dbg_fn("");
386         BUG_ON(sgt == NULL);
387         nvhost_memmgr_unpin(client, handle, dev_from_vm(vm), sgt);
388         nvhost_memmgr_put(client, handle);
389 }
390
391 static int map_gmmu_pages(void *handle, struct sg_table *sgt, void **va)
392 {
393         struct mem_handle *r = handle;
394         u32 *tmp_va;
395
396         nvhost_dbg_fn("");
397
398         tmp_va = nvhost_memmgr_mmap(r);
399         if (!tmp_va)
400                 goto err_out;
401
402         *va = tmp_va;
403         return 0;
404
405 err_out:
406         return -ENOMEM;
407 }
408
409 static void unmap_gmmu_pages(void *handle, struct sg_table *sgt, u32 *va)
410 {
411         struct mem_handle *r = handle;
412         nvhost_dbg_fn("");
413         nvhost_memmgr_munmap(r, va);
414 }
415 #endif
416
417 /* allocate a phys contig region big enough for a full
418  * sized gmmu page table for the given gmmu_page_size.
419  * the whole range is zeroed so it's "invalid"/will fault
420  */
421
422 static int zalloc_gmmu_page_table_gk20a(struct vm_gk20a *vm,
423                                         enum gmmu_pgsz_gk20a gmmu_pgsz_idx,
424                                         struct page_table_gk20a *pte)
425 {
426         int err;
427         u32 pte_order;
428         void *handle;
429         struct sg_table *sgt;
430
431         nvhost_dbg_fn("");
432
433         /* allocate enough pages for the table */
434         pte_order = vm->mm->page_table_sizing[gmmu_pgsz_idx].order;
435
436         err = alloc_gmmu_pages(vm, pte_order, &handle, &sgt);
437         if (err)
438                 return err;
439
440         nvhost_dbg(dbg_pte, "pte = 0x%p, addr=%08llx, size %d",
441                         pte, gk20a_mm_iova_addr(sgt->sgl), pte_order);
442
443         pte->ref = handle;
444         pte->sgt = sgt;
445
446         return 0;
447 }
448
449 /* given address range (inclusive) determine the pdes crossed */
450 static inline void pde_range_from_vaddr_range(struct vm_gk20a *vm,
451                                               u64 addr_lo, u64 addr_hi,
452                                               u32 *pde_lo, u32 *pde_hi)
453 {
454         *pde_lo = (u32)(addr_lo >> vm->mm->pde_stride_shift);
455         *pde_hi = (u32)(addr_hi >> vm->mm->pde_stride_shift);
456         nvhost_dbg(dbg_pte, "addr_lo=0x%llx addr_hi=0x%llx pde_ss=%d",
457                    addr_lo, addr_hi, vm->mm->pde_stride_shift);
458         nvhost_dbg(dbg_pte, "pde_lo=%d pde_hi=%d",
459                    *pde_lo, *pde_hi);
460 }
461
462 static inline u32 *pde_from_index(struct vm_gk20a *vm, u32 i)
463 {
464         return (u32 *) (((u8 *)vm->pdes.kv) + i*gmmu_pde__size_v());
465 }
466
467 static inline u32 pte_index_from_vaddr(struct vm_gk20a *vm,
468                                        u64 addr, enum gmmu_pgsz_gk20a pgsz_idx)
469 {
470         u32 ret;
471         /* mask off pde part */
472         addr = addr & ((((u64)1) << vm->mm->pde_stride_shift) - ((u64)1));
473         /* shift over to get pte index. note assumption that pte index
474          * doesn't leak over into the high 32b */
475         ret = (u32)(addr >> gmmu_page_shifts[pgsz_idx]);
476
477         nvhost_dbg(dbg_pte, "addr=0x%llx pte_i=0x%x", addr, ret);
478         return ret;
479 }
480
481 static inline void pte_space_page_offset_from_index(u32 i, u32 *pte_page,
482                                                     u32 *pte_offset)
483 {
484         /* ptes are 8B regardless of pagesize */
485         /* pte space pages are 4KB. so 512 ptes per 4KB page*/
486         *pte_page = i >> 9;
487
488         /* this offset is a pte offset, not a byte offset */
489         *pte_offset = i & ((1<<9)-1);
490
491         nvhost_dbg(dbg_pte, "i=0x%x pte_page=0x%x pte_offset=0x%x",
492                    i, *pte_page, *pte_offset);
493 }
494
495
496 /*
497  * given a pde index/page table number make sure it has
498  * backing store and if not go ahead allocate it and
499  * record it in the appropriate pde
500  */
501 static int validate_gmmu_page_table_gk20a_locked(struct vm_gk20a *vm,
502                                 u32 i, enum gmmu_pgsz_gk20a gmmu_pgsz_idx)
503 {
504         int err;
505         struct page_table_gk20a *pte =
506                 vm->pdes.ptes[gmmu_pgsz_idx] + i;
507
508         nvhost_dbg_fn("");
509
510         /* if it's already in place it's valid */
511         if (pte->ref)
512                 return 0;
513
514         nvhost_dbg(dbg_pte, "alloc %dKB ptes for pde %d",
515                    gmmu_page_sizes[gmmu_pgsz_idx]/1024, i);
516
517         err = zalloc_gmmu_page_table_gk20a(vm, gmmu_pgsz_idx, pte);
518         if (err)
519                 return err;
520
521         /* rewrite pde */
522         update_gmmu_pde_locked(vm, i);
523
524         return 0;
525 }
526
527 static struct vm_reserved_va_node *addr_to_reservation(struct vm_gk20a *vm,
528                                                        u64 addr)
529 {
530         struct vm_reserved_va_node *va_node;
531         list_for_each_entry(va_node, &vm->reserved_va_list, reserved_va_list)
532                 if (addr >= va_node->vaddr_start &&
533                     addr < (u64)va_node->vaddr_start + (u64)va_node->size)
534                         return va_node;
535
536         return NULL;
537 }
538
539 int gk20a_vm_get_buffers(struct vm_gk20a *vm,
540                          struct mapped_buffer_node ***mapped_buffers,
541                          int *num_buffers)
542 {
543         struct mapped_buffer_node *mapped_buffer;
544         struct mapped_buffer_node **buffer_list;
545         struct rb_node *node;
546         int i = 0;
547
548         mutex_lock(&vm->update_gmmu_lock);
549
550         buffer_list = kzalloc(sizeof(*buffer_list) *
551                               vm->num_user_mapped_buffers, GFP_KERNEL);
552         if (!buffer_list) {
553                 mutex_unlock(&vm->update_gmmu_lock);
554                 return -ENOMEM;
555         }
556
557         node = rb_first(&vm->mapped_buffers);
558         while (node) {
559                 mapped_buffer =
560                         container_of(node, struct mapped_buffer_node, node);
561                 if (mapped_buffer->user_mapped) {
562                         buffer_list[i] = mapped_buffer;
563                         kref_get(&mapped_buffer->ref);
564                         i++;
565                 }
566                 node = rb_next(&mapped_buffer->node);
567         }
568
569         BUG_ON(i != vm->num_user_mapped_buffers);
570
571         *num_buffers = vm->num_user_mapped_buffers;
572         *mapped_buffers = buffer_list;
573
574         mutex_unlock(&vm->update_gmmu_lock);
575
576         return 0;
577 }
578
579 static void gk20a_vm_unmap_locked_kref(struct kref *ref)
580 {
581         struct mapped_buffer_node *mapped_buffer =
582                 container_of(ref, struct mapped_buffer_node, ref);
583         gk20a_vm_unmap_locked(mapped_buffer);
584 }
585
586 void gk20a_vm_put_buffers(struct vm_gk20a *vm,
587                                  struct mapped_buffer_node **mapped_buffers,
588                                  int num_buffers)
589 {
590         int i;
591
592         mutex_lock(&vm->update_gmmu_lock);
593
594         for (i = 0; i < num_buffers; ++i)
595                 kref_put(&mapped_buffers[i]->ref,
596                          gk20a_vm_unmap_locked_kref);
597
598         mutex_unlock(&vm->update_gmmu_lock);
599
600         kfree(mapped_buffers);
601 }
602
603 static void gk20a_vm_unmap_user(struct vm_gk20a *vm, u64 offset)
604 {
605         struct device *d = dev_from_vm(vm);
606         int retries;
607         struct mapped_buffer_node *mapped_buffer;
608
609         mutex_lock(&vm->update_gmmu_lock);
610
611         mapped_buffer = find_mapped_buffer_locked(&vm->mapped_buffers, offset);
612         if (!mapped_buffer) {
613                 mutex_unlock(&vm->update_gmmu_lock);
614                 nvhost_err(d, "invalid addr to unmap 0x%llx", offset);
615                 return;
616         }
617
618         if (mapped_buffer->flags & NVHOST_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) {
619                 mutex_unlock(&vm->update_gmmu_lock);
620
621                 retries = 1000;
622                 while (retries) {
623                         if (atomic_read(&mapped_buffer->ref.refcount) == 1)
624                                 break;
625                         retries--;
626                         udelay(50);
627                 }
628                 if (!retries)
629                         nvhost_err(d, "sync-unmap failed on 0x%llx",
630                                                                 offset);
631                 mutex_lock(&vm->update_gmmu_lock);
632         }
633
634         mapped_buffer->user_mapped--;
635         if (mapped_buffer->user_mapped == 0)
636                 vm->num_user_mapped_buffers--;
637         kref_put(&mapped_buffer->ref, gk20a_vm_unmap_locked_kref);
638
639         mutex_unlock(&vm->update_gmmu_lock);
640 }
641
642 static u64 gk20a_vm_alloc_va(struct vm_gk20a *vm,
643                              u64 size,
644                              enum gmmu_pgsz_gk20a gmmu_pgsz_idx)
645
646 {
647         struct nvhost_allocator *vma = &vm->vma[gmmu_pgsz_idx];
648         int err;
649         u64 offset;
650         u32 start_page_nr = 0, num_pages;
651         u64 gmmu_page_size = gmmu_page_sizes[gmmu_pgsz_idx];
652
653         if (gmmu_pgsz_idx >= ARRAY_SIZE(gmmu_page_sizes)) {
654                 dev_warn(dev_from_vm(vm),
655                          "invalid page size requested in gk20a vm alloc");
656                 return -EINVAL;
657         }
658
659         if ((gmmu_pgsz_idx == gmmu_page_size_big) && !vm->big_pages) {
660                 dev_warn(dev_from_vm(vm),
661                          "unsupportd page size requested");
662                 return -EINVAL;
663
664         }
665
666         /* be certain we round up to gmmu_page_size if needed */
667         /* TBD: DIV_ROUND_UP -> undefined reference to __aeabi_uldivmod */
668         size = (size + ((u64)gmmu_page_size - 1)) & ~((u64)gmmu_page_size - 1);
669
670         nvhost_dbg_info("size=0x%llx @ pgsz=%dKB", size,
671                         gmmu_page_sizes[gmmu_pgsz_idx]>>10);
672
673         /* The vma allocator represents page accounting. */
674         num_pages = size >> gmmu_page_shifts[gmmu_pgsz_idx];
675
676         err = vma->alloc(vma, &start_page_nr, num_pages);
677
678         if (err) {
679                 nvhost_err(dev_from_vm(vm),
680                            "%s oom: sz=0x%llx", vma->name, size);
681                 return 0;
682         }
683
684         offset = (u64)start_page_nr << gmmu_page_shifts[gmmu_pgsz_idx];
685         nvhost_dbg_fn("%s found addr: 0x%llx", vma->name, offset);
686
687         return offset;
688 }
689
690 static void gk20a_vm_free_va(struct vm_gk20a *vm,
691                              u64 offset, u64 size,
692                              enum gmmu_pgsz_gk20a pgsz_idx)
693 {
694         struct nvhost_allocator *vma = &vm->vma[pgsz_idx];
695         u32 page_size = gmmu_page_sizes[pgsz_idx];
696         u32 page_shift = gmmu_page_shifts[pgsz_idx];
697         u32 start_page_nr, num_pages;
698         int err;
699
700         nvhost_dbg_info("%s free addr=0x%llx, size=0x%llx",
701                         vma->name, offset, size);
702
703         start_page_nr = (u32)(offset >> page_shift);
704         num_pages = (u32)((size + page_size - 1) >> page_shift);
705
706         err = vma->free(vma, start_page_nr, num_pages);
707         if (err) {
708                 nvhost_err(dev_from_vm(vm),
709                            "not found: offset=0x%llx, sz=0x%llx",
710                            offset, size);
711         }
712 }
713
714 static int insert_mapped_buffer(struct rb_root *root,
715                                 struct mapped_buffer_node *mapped_buffer)
716 {
717         struct rb_node **new_node = &(root->rb_node), *parent = NULL;
718
719         /* Figure out where to put new node */
720         while (*new_node) {
721                 struct mapped_buffer_node *cmp_with =
722                         container_of(*new_node, struct mapped_buffer_node,
723                                      node);
724
725                 parent = *new_node;
726
727                 if (cmp_with->addr > mapped_buffer->addr) /* u64 cmp */
728                         new_node = &((*new_node)->rb_left);
729                 else if (cmp_with->addr != mapped_buffer->addr) /* u64 cmp */
730                         new_node = &((*new_node)->rb_right);
731                 else
732                         return -EINVAL; /* no fair dup'ing */
733         }
734
735         /* Add new node and rebalance tree. */
736         rb_link_node(&mapped_buffer->node, parent, new_node);
737         rb_insert_color(&mapped_buffer->node, root);
738
739         return 0;
740 }
741
742 static struct mapped_buffer_node *find_mapped_buffer_reverse_locked(
743                                 struct rb_root *root, struct mem_handle *r)
744 {
745         struct rb_node *node = rb_first(root);
746         while (node) {
747                 struct mapped_buffer_node *mapped_buffer =
748                         container_of(node, struct mapped_buffer_node, node);
749                 if (mapped_buffer->handle_ref == r)
750                         return mapped_buffer;
751                 node = rb_next(&mapped_buffer->node);
752         }
753         return 0;
754 }
755
756 static struct mapped_buffer_node *find_mapped_buffer_locked(
757                                         struct rb_root *root, u64 addr)
758 {
759
760         struct rb_node *node = root->rb_node;
761         while (node) {
762                 struct mapped_buffer_node *mapped_buffer =
763                         container_of(node, struct mapped_buffer_node, node);
764                 if (mapped_buffer->addr > addr) /* u64 cmp */
765                         node = node->rb_left;
766                 else if (mapped_buffer->addr != addr) /* u64 cmp */
767                         node = node->rb_right;
768                 else
769                         return mapped_buffer;
770         }
771         return 0;
772 }
773
774 static struct mapped_buffer_node *find_mapped_buffer_range_locked(
775                                         struct rb_root *root, u64 addr)
776 {
777         struct rb_node *node = root->rb_node;
778         while (node) {
779                 struct mapped_buffer_node *m =
780                         container_of(node, struct mapped_buffer_node, node);
781                 if (m->addr <= addr && m->addr + m->size > addr)
782                         return m;
783                 else if (m->addr > addr) /* u64 cmp */
784                         node = node->rb_left;
785                 else
786                         node = node->rb_right;
787         }
788         return 0;
789 }
790
791 /* convenience setup for nvmap buffer attr queries */
792 struct bfr_attr_query {
793         int err;
794         u64 v;
795 };
796 static u32 nvmap_bfr_param[] = {
797 #define BFR_SIZE   0
798         NVMAP_HANDLE_PARAM_SIZE,
799 #define BFR_ALIGN  1
800         NVMAP_HANDLE_PARAM_ALIGNMENT,
801 #define BFR_HEAP   2
802         NVMAP_HANDLE_PARAM_HEAP,
803 #define BFR_KIND   3
804         NVMAP_HANDLE_PARAM_KIND,
805 };
806 #define BFR_ATTRS (sizeof(nvmap_bfr_param)/sizeof(nvmap_bfr_param[0]))
807
808 struct buffer_attrs {
809         struct sg_table *sgt;
810         u64 size;
811         u64 align;
812         u32 ctag_offset;
813         u32 ctag_lines;
814         int pgsz_idx;
815         u8 kind_v;
816         u8 uc_kind_v;
817 };
818
819 static void gmmu_select_page_size(struct buffer_attrs *bfr)
820 {
821         int i;
822         /*  choose the biggest first (top->bottom) */
823         for (i = (gmmu_nr_page_sizes-1); i >= 0; i--)
824                 if (!(gmmu_page_offset_masks[i] & bfr->align)) {
825                         /* would like to add this too but nvmap returns the
826                          * original requested size not the allocated size.
827                          * (!(gmmu_page_offset_masks[i] & bfr->size)) */
828                         bfr->pgsz_idx = i;
829                         break;
830                 }
831 }
832
833 static int setup_buffer_size_and_align(struct device *d,
834                                        struct buffer_attrs *bfr,
835                                        struct bfr_attr_query *query,
836                                        u64 offset, u32 flags)
837 {
838         /* buffer allocation size and alignment must be a multiple
839            of one of the supported page sizes.*/
840         bfr->size = query[BFR_SIZE].v;
841         bfr->align = query[BFR_ALIGN].v;
842         bfr->pgsz_idx = -1;
843
844         /* If FIX_OFFSET is set, pgsz is determined. Otherwise, select
845          * page size according to memory alignment */
846         if (flags & NVHOST_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) {
847                 bfr->pgsz_idx = NV_GMMU_VA_IS_UPPER(offset) ?
848                                 gmmu_page_size_big : gmmu_page_size_small;
849         } else {
850                 gmmu_select_page_size(bfr);
851         }
852
853         if (unlikely(bfr->pgsz_idx == -1)) {
854                 nvhost_warn(d, "unsupported buffer alignment: 0x%llx",
855                            bfr->align);
856                 return -EINVAL;
857         }
858
859         bfr->kind_v = query[BFR_KIND].v;
860
861         return 0;
862 }
863
864
865 static int setup_buffer_kind_and_compression(struct device *d,
866                                              u32 flags,
867                                              struct buffer_attrs *bfr,
868                                              enum gmmu_pgsz_gk20a pgsz_idx)
869 {
870         bool kind_compressible;
871
872         if (unlikely(bfr->kind_v == gmmu_pte_kind_invalid_v()))
873                 bfr->kind_v = gmmu_pte_kind_pitch_v();
874
875         if (unlikely(!gk20a_kind_is_supported(bfr->kind_v))) {
876                 nvhost_err(d, "kind 0x%x not supported", bfr->kind_v);
877                 return -EINVAL;
878         }
879
880         bfr->uc_kind_v = gmmu_pte_kind_invalid_v();
881         /* find a suitable uncompressed kind if it becomes necessary later */
882         kind_compressible = gk20a_kind_is_compressible(bfr->kind_v);
883         if (kind_compressible) {
884                 bfr->uc_kind_v = gk20a_get_uncompressed_kind(bfr->kind_v);
885                 if (unlikely(bfr->uc_kind_v == gmmu_pte_kind_invalid_v())) {
886                         /* shouldn't happen, but it is worth cross-checking */
887                         nvhost_err(d, "comptag kind 0x%x can't be"
888                                    " downgraded to uncompressed kind",
889                                    bfr->kind_v);
890                         return -EINVAL;
891                 }
892         }
893         /* comptags only supported for suitable kinds, 128KB pagesize */
894         if (unlikely(kind_compressible &&
895                      (gmmu_page_sizes[pgsz_idx] != 128*1024))) {
896                 /*
897                 nvhost_warn(d, "comptags specified"
898                 " but pagesize being used doesn't support it");*/
899                 /* it is safe to fall back to uncompressed as
900                    functionality is not harmed */
901                 bfr->kind_v = bfr->uc_kind_v;
902                 kind_compressible = false;
903         }
904         if (kind_compressible)
905                 bfr->ctag_lines = ALIGN(bfr->size, COMP_TAG_LINE_SIZE) >>
906                         COMP_TAG_LINE_SIZE_SHIFT;
907         else
908                 bfr->ctag_lines = 0;
909
910         return 0;
911 }
912
913 static int validate_fixed_buffer(struct vm_gk20a *vm,
914                                  struct buffer_attrs *bfr,
915                                  u64 map_offset)
916 {
917         struct device *dev = dev_from_vm(vm);
918         struct vm_reserved_va_node *va_node;
919         struct mapped_buffer_node *buffer;
920
921         if (map_offset & gmmu_page_offset_masks[bfr->pgsz_idx]) {
922                 nvhost_err(dev, "map offset must be buffer page size aligned 0x%llx",
923                            map_offset);
924                 return -EINVAL;
925         }
926
927         /* find the space reservation */
928         va_node = addr_to_reservation(vm, map_offset);
929         if (!va_node) {
930                 nvhost_warn(dev, "fixed offset mapping without space allocation");
931                 return -EINVAL;
932         }
933
934         /* check that this mappings does not collide with existing
935          * mappings by checking the overlapping area between the current
936          * buffer and all other mapped buffers */
937
938         list_for_each_entry(buffer,
939                 &va_node->va_buffers_list, va_buffers_list) {
940                 s64 begin = max(buffer->addr, map_offset);
941                 s64 end = min(buffer->addr +
942                         buffer->size, map_offset + bfr->size);
943                 if (end - begin > 0) {
944                         nvhost_warn(dev, "overlapping buffer map requested");
945                         return -EINVAL;
946                 }
947         }
948
949         return 0;
950 }
951
952 static u64 __locked_gmmu_map(struct vm_gk20a *vm,
953                                 u64 map_offset,
954                                 struct sg_table *sgt,
955                                 u64 size,
956                                 int pgsz_idx,
957                                 u8 kind_v,
958                                 u32 ctag_offset,
959                                 u32 flags,
960                                 int rw_flag)
961 {
962         int err = 0, i = 0;
963         u32 pde_lo, pde_hi;
964         struct device *d = dev_from_vm(vm);
965
966         /* Allocate (or validate when map_offset != 0) the virtual address. */
967         if (!map_offset) {
968                 map_offset = gk20a_vm_alloc_va(vm, size,
969                                           pgsz_idx);
970                 if (!map_offset) {
971                         nvhost_err(d, "failed to allocate va space");
972                         err = -ENOMEM;
973                         goto fail;
974                 }
975         }
976
977         pde_range_from_vaddr_range(vm,
978                                    map_offset,
979                                    map_offset + size - 1,
980                                    &pde_lo, &pde_hi);
981
982         /* mark the addr range valid (but with 0 phys addr, which will fault) */
983         for (i = pde_lo; i <= pde_hi; i++) {
984                 err = validate_gmmu_page_table_gk20a_locked(vm, i,
985                                                             pgsz_idx);
986                 if (err) {
987                         nvhost_err(d, "failed to validate page table %d: %d",
988                                                            i, err);
989                         goto fail;
990                 }
991         }
992
993         err = update_gmmu_ptes_locked(vm, pgsz_idx,
994                                       sgt,
995                                       map_offset, map_offset + size - 1,
996                                       kind_v,
997                                       ctag_offset,
998                                       flags &
999                                       NVHOST_MAP_BUFFER_FLAGS_CACHEABLE_TRUE,
1000                                       rw_flag);
1001         if (err) {
1002                 nvhost_err(d, "failed to update ptes on map");
1003                 goto fail;
1004         }
1005
1006         return map_offset;
1007  fail:
1008         nvhost_err(d, "%s: failed with err=%d\n", __func__, err);
1009         return 0;
1010 }
1011
1012 static void __locked_gmmu_unmap(struct vm_gk20a *vm,
1013                                 u64 vaddr,
1014                                 u64 size,
1015                                 int pgsz_idx,
1016                                 bool va_allocated,
1017                                 int rw_flag)
1018 {
1019         int err = 0;
1020         struct gk20a *g = gk20a_from_vm(vm);
1021
1022         if (va_allocated)
1023                 gk20a_vm_free_va(vm, vaddr, size, pgsz_idx);
1024
1025         /* unmap here needs to know the page size we assigned at mapping */
1026         err = update_gmmu_ptes_locked(vm,
1027                                 pgsz_idx,
1028                                 0, /* n/a for unmap */
1029                                 vaddr,
1030                                 vaddr + size - 1,
1031                                 0, 0, false /* n/a for unmap */,
1032                                 rw_flag);
1033         if (err)
1034                 dev_err(dev_from_vm(vm),
1035                         "failed to update gmmu ptes on unmap");
1036
1037         /* detect which if any pdes/ptes can now be released */
1038
1039         /* flush l2 so any dirty lines are written out *now*.
1040          *  also as we could potentially be switching this buffer
1041          * from nonvolatile (l2 cacheable) to volatile (l2 non-cacheable) at
1042          * some point in the future we need to invalidate l2.  e.g. switching
1043          * from a render buffer unmap (here) to later using the same memory
1044          * for gmmu ptes.  note the positioning of this relative to any smmu
1045          * unmapping (below). */
1046
1047         gk20a_mm_l2_flush(g, true);
1048 }
1049
1050 static u64 gk20a_vm_map_duplicate_locked(struct vm_gk20a *vm,
1051                                          struct mem_mgr *memmgr,
1052                                          struct mem_handle *r,
1053                                          u64 offset_align,
1054                                          u32 flags,
1055                                          u32 kind,
1056                                          struct sg_table **sgt,
1057                                          bool user_mapped,
1058                                          int rw_flag)
1059 {
1060         struct mapped_buffer_node *mapped_buffer = 0;
1061
1062         mapped_buffer = find_mapped_buffer_reverse_locked(
1063                                                 &vm->mapped_buffers, r);
1064         if (!mapped_buffer)
1065                 return 0;
1066
1067         if (mapped_buffer->flags != flags)
1068                 return 0;
1069
1070         if (flags & NVHOST_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET &&
1071             mapped_buffer->addr != offset_align)
1072                 return 0;
1073
1074         WARN_ON(mapped_buffer->memmgr != memmgr);
1075         BUG_ON(mapped_buffer->vm != vm);
1076
1077         /* mark the buffer as used */
1078         if (user_mapped) {
1079                 if (mapped_buffer->user_mapped == 0)
1080                         vm->num_user_mapped_buffers++;
1081                 mapped_buffer->user_mapped++;
1082
1083                 /* If the mapping comes from user space, we own
1084                  * the memmgr and handle refs. Since we reuse an
1085                  * existing mapping here, we need to give back those
1086                  * refs once in order not to leak.
1087                  */
1088                 if (mapped_buffer->own_mem_ref) {
1089                         nvhost_memmgr_put(mapped_buffer->memmgr,
1090                                           mapped_buffer->handle_ref);
1091                         nvhost_memmgr_put_mgr(mapped_buffer->memmgr);
1092                 } else
1093                         mapped_buffer->own_mem_ref = true;
1094
1095                 mapped_buffer->memmgr = memmgr;
1096         }
1097         kref_get(&mapped_buffer->ref);
1098
1099         nvhost_dbg(dbg_map,
1100                    "reusing as=%d pgsz=%d flags=0x%x ctags=%d "
1101                    "start=%d gv=0x%x,%08x -> 0x%x,%08x -> 0x%x,%08x "
1102                    "own_mem_ref=%d user_mapped=%d",
1103                    vm_aspace_id(vm), mapped_buffer->pgsz_idx,
1104                    mapped_buffer->flags,
1105                    mapped_buffer->ctag_lines,
1106                    mapped_buffer->ctag_offset,
1107                    hi32(mapped_buffer->addr), lo32(mapped_buffer->addr),
1108                    hi32((u64)sg_dma_address(mapped_buffer->sgt->sgl)),
1109                    lo32((u64)sg_dma_address(mapped_buffer->sgt->sgl)),
1110                    hi32((u64)sg_phys(mapped_buffer->sgt->sgl)),
1111                    lo32((u64)sg_phys(mapped_buffer->sgt->sgl)),
1112                    mapped_buffer->own_mem_ref, user_mapped);
1113
1114         if (sgt)
1115                 *sgt = mapped_buffer->sgt;
1116         return mapped_buffer->addr;
1117 }
1118
1119 u64 gk20a_vm_map(struct vm_gk20a *vm,
1120                         struct mem_mgr *memmgr,
1121                         struct mem_handle *r,
1122                         u64 offset_align,
1123                         u32 flags /*NVHOST_AS_MAP_BUFFER_FLAGS_*/,
1124                         u32 kind,
1125                         struct sg_table **sgt,
1126                         bool user_mapped,
1127                         int rw_flag)
1128 {
1129         struct gk20a *g = gk20a_from_vm(vm);
1130         struct nvhost_allocator *ctag_allocator = &g->gr.comp_tags;
1131         struct device *d = dev_from_vm(vm);
1132         struct mapped_buffer_node *mapped_buffer = 0;
1133         bool inserted = false, va_allocated = false;
1134         u32 gmmu_page_size = 0;
1135         u64 map_offset = 0;
1136         int attr, err = 0;
1137         struct buffer_attrs bfr = {0};
1138         struct bfr_attr_query query[BFR_ATTRS];
1139         struct nvhost_comptags comptags;
1140
1141         mutex_lock(&vm->update_gmmu_lock);
1142
1143         /* check if this buffer is already mapped */
1144         map_offset = gk20a_vm_map_duplicate_locked(vm, memmgr, r, offset_align,
1145                                                    flags, kind, sgt,
1146                                                    user_mapped, rw_flag);
1147         if (map_offset) {
1148                 mutex_unlock(&vm->update_gmmu_lock);
1149                 return map_offset;
1150         }
1151
1152         /* pin buffer to get phys/iovmm addr */
1153         bfr.sgt = nvhost_memmgr_pin(memmgr, r, d, rw_flag);
1154         if (IS_ERR(bfr.sgt)) {
1155                 /* Falling back to physical is actually possible
1156                  * here in many cases if we use 4K phys pages in the
1157                  * gmmu.  However we have some regions which require
1158                  * contig regions to work properly (either phys-contig
1159                  * or contig through smmu io_vaspace).  Until we can
1160                  * track the difference between those two cases we have
1161                  * to fail the mapping when we run out of SMMU space.
1162                  */
1163                 nvhost_warn(d, "oom allocating tracking buffer");
1164                 goto clean_up;
1165         }
1166
1167         if (sgt)
1168                 *sgt = bfr.sgt;
1169
1170         /* query bfr attributes: size, align, heap, kind */
1171         for (attr = 0; attr < BFR_ATTRS; attr++) {
1172                 query[attr].err =
1173                         nvhost_memmgr_get_param(memmgr, r,
1174                                                 nvmap_bfr_param[attr],
1175                                                 &query[attr].v);
1176                 if (unlikely(query[attr].err != 0)) {
1177                         nvhost_err(d,
1178                                    "failed to get nvmap buffer param %d: %d\n",
1179                                    nvmap_bfr_param[attr],
1180                                    query[attr].err);
1181                         err = query[attr].err;
1182                         goto clean_up;
1183                 }
1184         }
1185
1186         /* validate/adjust bfr attributes */
1187         err = setup_buffer_size_and_align(d, &bfr, query, offset_align, flags);
1188         if (unlikely(err))
1189                 goto clean_up;
1190         if (unlikely(bfr.pgsz_idx < gmmu_page_size_small ||
1191                      bfr.pgsz_idx > gmmu_page_size_big)) {
1192                 BUG_ON(1);
1193                 err = -EINVAL;
1194                 goto clean_up;
1195         }
1196         gmmu_page_size = gmmu_page_sizes[bfr.pgsz_idx];
1197
1198         /* Check if we should use a fixed offset for mapping this buffer */
1199         if (flags & NVHOST_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET)  {
1200                 err = validate_fixed_buffer(vm, &bfr, offset_align);
1201                 if (err)
1202                         goto clean_up;
1203
1204                 map_offset = offset_align;
1205                 va_allocated = false;
1206         } else
1207                 va_allocated = true;
1208
1209         if (sgt)
1210                 *sgt = bfr.sgt;
1211
1212         err = setup_buffer_kind_and_compression(d, flags, &bfr, bfr.pgsz_idx);
1213         if (unlikely(err)) {
1214                 nvhost_err(d, "failure setting up kind and compression");
1215                 goto clean_up;
1216         }
1217
1218         /* bar1 and pmu vm don't need ctag */
1219         if (!vm->enable_ctag)
1220                 bfr.ctag_lines = 0;
1221
1222         nvhost_memmgr_get_comptags(r, &comptags);
1223
1224         if (bfr.ctag_lines && !comptags.lines) {
1225                 /* allocate compression resources if needed */
1226                 err = nvhost_memmgr_alloc_comptags(r,
1227                                 ctag_allocator, bfr.ctag_lines);
1228                 if (err) {
1229                         /* ok to fall back here if we ran out */
1230                         /* TBD: we can partially alloc ctags as well... */
1231                         bfr.ctag_lines = bfr.ctag_offset = 0;
1232                         bfr.kind_v = bfr.uc_kind_v;
1233                 } else {
1234                         nvhost_memmgr_get_comptags(r, &comptags);
1235
1236                         /* init/clear the ctag buffer */
1237                         gk20a_gr_clear_comptags(g,
1238                                 comptags.offset,
1239                                 comptags.offset + comptags.lines - 1);
1240                 }
1241         }
1242
1243         /* store the comptag info */
1244         WARN_ON(bfr.ctag_lines != comptags.lines);
1245         bfr.ctag_offset = comptags.offset;
1246
1247         /* update gmmu ptes */
1248         map_offset = __locked_gmmu_map(vm, map_offset,
1249                                         bfr.sgt,
1250                                         bfr.size,
1251                                         bfr.pgsz_idx,
1252                                         bfr.kind_v,
1253                                         bfr.ctag_offset,
1254                                         flags, rw_flag);
1255         if (!map_offset)
1256                 goto clean_up;
1257
1258         nvhost_dbg(dbg_map,
1259            "as=%d pgsz=%d "
1260            "kind=0x%x kind_uc=0x%x flags=0x%x "
1261            "ctags=%d start=%d gv=0x%x,%08x -> 0x%x,%08x -> 0x%x,%08x",
1262            vm_aspace_id(vm), gmmu_page_size,
1263            bfr.kind_v, bfr.uc_kind_v, flags,
1264            bfr.ctag_lines, bfr.ctag_offset,
1265            hi32(map_offset), lo32(map_offset),
1266            hi32((u64)sg_dma_address(bfr.sgt->sgl)),
1267            lo32((u64)sg_dma_address(bfr.sgt->sgl)),
1268            hi32((u64)sg_phys(bfr.sgt->sgl)),
1269            lo32((u64)sg_phys(bfr.sgt->sgl)));
1270
1271 #if defined(NVHOST_DEBUG)
1272         {
1273                 int i;
1274                 struct scatterlist *sg = NULL;
1275                 nvhost_dbg(dbg_pte, "for_each_sg(bfr.sgt->sgl, sg, bfr.sgt->nents, i)");
1276                 for_each_sg(bfr.sgt->sgl, sg, bfr.sgt->nents, i ) {
1277                         u64 da = sg_dma_address(sg);
1278                         u64 pa = sg_phys(sg);
1279                         u64 len = sg->length;
1280                         nvhost_dbg(dbg_pte, "i=%d pa=0x%x,%08x da=0x%x,%08x len=0x%x,%08x",
1281                                    i, hi32(pa), lo32(pa), hi32(da), lo32(da),
1282                                    hi32(len), lo32(len));
1283                 }
1284         }
1285 #endif
1286
1287         /* keep track of the buffer for unmapping */
1288         /* TBD: check for multiple mapping of same buffer */
1289         mapped_buffer = kzalloc(sizeof(*mapped_buffer), GFP_KERNEL);
1290         if (!mapped_buffer) {
1291                 nvhost_warn(d, "oom allocating tracking buffer");
1292                 goto clean_up;
1293         }
1294         mapped_buffer->memmgr      = memmgr;
1295         mapped_buffer->handle_ref  = r;
1296         mapped_buffer->sgt         = bfr.sgt;
1297         mapped_buffer->addr        = map_offset;
1298         mapped_buffer->size        = bfr.size;
1299         mapped_buffer->pgsz_idx    = bfr.pgsz_idx;
1300         mapped_buffer->ctag_offset = bfr.ctag_offset;
1301         mapped_buffer->ctag_lines  = bfr.ctag_lines;
1302         mapped_buffer->vm          = vm;
1303         mapped_buffer->flags       = flags;
1304         mapped_buffer->va_allocated = va_allocated;
1305         mapped_buffer->user_mapped = user_mapped ? 1 : 0;
1306         mapped_buffer->own_mem_ref = user_mapped;
1307         INIT_LIST_HEAD(&mapped_buffer->unmap_list);
1308         INIT_LIST_HEAD(&mapped_buffer->va_buffers_list);
1309         kref_init(&mapped_buffer->ref);
1310
1311         err = insert_mapped_buffer(&vm->mapped_buffers, mapped_buffer);
1312         if (err) {
1313                 nvhost_err(d, "failed to insert into mapped buffer tree");
1314                 goto clean_up;
1315         }
1316         inserted = true;
1317         if (user_mapped)
1318                 vm->num_user_mapped_buffers++;
1319
1320         nvhost_dbg_info("allocated va @ 0x%llx", map_offset);
1321
1322         if (!va_allocated) {
1323                 struct vm_reserved_va_node *va_node;
1324
1325                 /* find the space reservation */
1326                 va_node = addr_to_reservation(vm, map_offset);
1327                 list_add_tail(&mapped_buffer->va_buffers_list,
1328                               &va_node->va_buffers_list);
1329         }
1330
1331         mutex_unlock(&vm->update_gmmu_lock);
1332
1333         /* Invalidate kernel mappings immediately */
1334         if (vm_aspace_id(vm) == -1)
1335                 gk20a_mm_tlb_invalidate(vm);
1336
1337         return map_offset;
1338
1339 clean_up:
1340         if (inserted) {
1341                 rb_erase(&mapped_buffer->node, &vm->mapped_buffers);
1342                 if (user_mapped)
1343                         vm->num_user_mapped_buffers--;
1344         }
1345         kfree(mapped_buffer);
1346         if (va_allocated)
1347                 gk20a_vm_free_va(vm, map_offset, bfr.size, bfr.pgsz_idx);
1348         if (!IS_ERR(bfr.sgt))
1349                 nvhost_memmgr_unpin(memmgr, r, d, bfr.sgt);
1350
1351         mutex_unlock(&vm->update_gmmu_lock);
1352         nvhost_dbg_info("err=%d\n", err);
1353         return 0;
1354 }
1355
1356 u64 gk20a_gmmu_map(struct vm_gk20a *vm,
1357                 struct sg_table **sgt,
1358                 u64 size,
1359                 u32 flags,
1360                 int rw_flag)
1361 {
1362         u64 vaddr;
1363
1364         mutex_lock(&vm->update_gmmu_lock);
1365         vaddr = __locked_gmmu_map(vm, 0, /* already mapped? - No */
1366                                 *sgt, /* sg table */
1367                                 size,
1368                                 0, /* page size index = 0 i.e. SZ_4K */
1369                                 0, /* kind */
1370                                 0, /* ctag_offset */
1371                                 flags, rw_flag);
1372         mutex_unlock(&vm->update_gmmu_lock);
1373         if (!vaddr) {
1374                 nvhost_err(dev_from_vm(vm), "failed to allocate va space");
1375                 return 0;
1376         }
1377
1378         /* Invalidate kernel mappings immediately */
1379         gk20a_mm_tlb_invalidate(vm);
1380
1381         return vaddr;
1382 }
1383
1384 void gk20a_gmmu_unmap(struct vm_gk20a *vm,
1385                 u64 vaddr,
1386                 u64 size,
1387                 int rw_flag)
1388 {
1389         mutex_lock(&vm->update_gmmu_lock);
1390         __locked_gmmu_unmap(vm,
1391                         vaddr,
1392                         size,
1393                         0, /* page size 4K */
1394                         true, /*va_allocated */
1395                         rw_flag);
1396         mutex_unlock(&vm->update_gmmu_lock);
1397 }
1398
1399 phys_addr_t gk20a_get_phys_from_iova(struct device *d,
1400                                 u64 dma_addr)
1401 {
1402         phys_addr_t phys;
1403         u64 iova;
1404
1405         struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(d);
1406         if (!mapping)
1407                 return dma_addr;
1408
1409         iova = dma_addr & PAGE_MASK;
1410         phys = iommu_iova_to_phys(mapping->domain, iova);
1411         return phys;
1412 }
1413
1414 /* get sg_table from already allocated buffer */
1415 int gk20a_get_sgtable(struct device *d, struct sg_table **sgt,
1416                         void *cpuva, u64 iova,
1417                         size_t size)
1418 {
1419         int err = 0;
1420         *sgt = kzalloc(sizeof(struct sg_table), GFP_KERNEL);
1421         if (!(*sgt)) {
1422                 dev_err(d, "failed to allocate memory\n");
1423                 err = -ENOMEM;
1424                 goto fail;
1425         }
1426         err = dma_get_sgtable(d, *sgt,
1427                         cpuva, iova,
1428                         size);
1429         if (err) {
1430                 dev_err(d, "failed to create sg table\n");
1431                 goto fail;
1432         }
1433         sg_dma_address((*sgt)->sgl) = iova;
1434
1435         return 0;
1436  fail:
1437         if (*sgt) {
1438                 kfree(*sgt);
1439                 *sgt = NULL;
1440         }
1441         return err;
1442 }
1443
1444 void gk20a_free_sgtable(struct sg_table **sgt)
1445 {
1446         sg_free_table(*sgt);
1447         kfree(*sgt);
1448         *sgt = NULL;
1449 }
1450
1451 u64 gk20a_mm_iova_addr(struct scatterlist *sgl)
1452 {
1453         u64 result = sg_phys(sgl);
1454 #ifdef CONFIG_TEGRA_IOMMU_SMMU
1455         if (sg_dma_address(sgl) == DMA_ERROR_CODE)
1456                 result = 0;
1457         else if (sg_dma_address(sgl)) {
1458                 result = sg_dma_address(sgl) |
1459                         1ULL << NV_MC_SMMU_VADDR_TRANSLATION_BIT;
1460         }
1461 #endif
1462         return result;
1463 }
1464
1465 static int update_gmmu_ptes_locked(struct vm_gk20a *vm,
1466                                    enum gmmu_pgsz_gk20a pgsz_idx,
1467                                    struct sg_table *sgt,
1468                                    u64 first_vaddr, u64 last_vaddr,
1469                                    u8 kind_v, u32 ctag_offset,
1470                                    bool cacheable,
1471                                    int rw_flag)
1472 {
1473         int err;
1474         u32 pde_lo, pde_hi, pde_i;
1475         struct scatterlist *cur_chunk;
1476         unsigned int cur_offset;
1477         u32 pte_w[2] = {0, 0}; /* invalid pte */
1478         u32 ctag = ctag_offset;
1479         u32 ctag_incr;
1480         u32 page_size  = gmmu_page_sizes[pgsz_idx];
1481         u64 addr = 0;
1482
1483         pde_range_from_vaddr_range(vm, first_vaddr, last_vaddr,
1484                                    &pde_lo, &pde_hi);
1485
1486         nvhost_dbg(dbg_pte, "size_idx=%d, pde_lo=%d, pde_hi=%d",
1487                    pgsz_idx, pde_lo, pde_hi);
1488
1489         /* If ctag_offset !=0 add 1 else add 0.  The idea is to avoid a branch
1490          * below (per-pte). Note: this doesn't work unless page size (when
1491          * comptags are active) is 128KB. We have checks elsewhere for that. */
1492         ctag_incr = !!ctag_offset;
1493
1494         if (sgt)
1495                 cur_chunk = sgt->sgl;
1496         else
1497                 cur_chunk = NULL;
1498
1499         cur_offset = 0;
1500
1501         for (pde_i = pde_lo; pde_i <= pde_hi; pde_i++) {
1502                 u32 pte_lo, pte_hi;
1503                 u32 pte_cur;
1504                 void *pte_kv_cur;
1505
1506                 struct page_table_gk20a *pte = vm->pdes.ptes[pgsz_idx] + pde_i;
1507
1508                 if (pde_i == pde_lo)
1509                         pte_lo = pte_index_from_vaddr(vm, first_vaddr,
1510                                                       pgsz_idx);
1511                 else
1512                         pte_lo = 0;
1513
1514                 if ((pde_i != pde_hi) && (pde_hi != pde_lo))
1515                         pte_hi = vm->mm->page_table_sizing[pgsz_idx].num_ptes-1;
1516                 else
1517                         pte_hi = pte_index_from_vaddr(vm, last_vaddr,
1518                                                       pgsz_idx);
1519
1520                 /* get cpu access to the ptes */
1521                 err = map_gmmu_pages(pte->ref, pte->sgt, &pte_kv_cur);
1522                 if (err) {
1523                         nvhost_err(dev_from_vm(vm),
1524                                    "couldn't map ptes for update as=%d pte_ref_cnt=%d",
1525                                    vm_aspace_id(vm), pte->ref_cnt);
1526                         goto clean_up;
1527                 }
1528
1529                 nvhost_dbg(dbg_pte, "pte_lo=%d, pte_hi=%d", pte_lo, pte_hi);
1530                 for (pte_cur = pte_lo; pte_cur <= pte_hi; pte_cur++) {
1531
1532                         if (likely(sgt)) {
1533                                 u64 new_addr = gk20a_mm_iova_addr(cur_chunk);
1534                                 if (new_addr) {
1535                                         addr = new_addr;
1536                                         addr += cur_offset;
1537                                 }
1538
1539                                 pte_w[0] = gmmu_pte_valid_true_f() |
1540                                         gmmu_pte_address_sys_f(addr
1541                                                 >> gmmu_pte_address_shift_v());
1542                                 pte_w[1] = gmmu_pte_aperture_video_memory_f() |
1543                                         gmmu_pte_kind_f(kind_v) |
1544                                         gmmu_pte_comptagline_f(ctag);
1545
1546                                 if (rw_flag == mem_flag_read_only) {
1547                                         pte_w[0] |= gmmu_pte_read_only_true_f();
1548                                         pte_w[1] |=
1549                                                 gmmu_pte_write_disable_true_f();
1550                                 } else if (rw_flag == mem_flag_write_only) {
1551                                         pte_w[1] |=
1552                                                 gmmu_pte_read_disable_true_f();
1553                                 }
1554
1555                                 if (!cacheable)
1556                                         pte_w[1] |= gmmu_pte_vol_true_f();
1557
1558                                 pte->ref_cnt++;
1559
1560                                 nvhost_dbg(dbg_pte,
1561                                            "pte_cur=%d addr=0x%x,%08x kind=%d"
1562                                            " ctag=%d vol=%d refs=%d"
1563                                            " [0x%08x,0x%08x]",
1564                                            pte_cur, hi32(addr), lo32(addr),
1565                                            kind_v, ctag, !cacheable,
1566                                            pte->ref_cnt, pte_w[1], pte_w[0]);
1567
1568                                 ctag += ctag_incr;
1569                                 cur_offset += page_size;
1570                                 addr += page_size;
1571                                 while (cur_chunk &&
1572                                         cur_offset >= cur_chunk->length) {
1573                                         cur_offset -= cur_chunk->length;
1574                                         cur_chunk = sg_next(cur_chunk);
1575                                 }
1576
1577                         } else {
1578                                 pte->ref_cnt--;
1579                                 nvhost_dbg(dbg_pte,
1580                                            "pte_cur=%d ref=%d [0x0,0x0]",
1581                                            pte_cur, pte->ref_cnt);
1582                         }
1583
1584                         mem_wr32(pte_kv_cur + pte_cur*8, 0, pte_w[0]);
1585                         mem_wr32(pte_kv_cur + pte_cur*8, 1, pte_w[1]);
1586                 }
1587
1588                 unmap_gmmu_pages(pte->ref, pte->sgt, pte_kv_cur);
1589
1590                 if (pte->ref_cnt == 0) {
1591                         /* It can make sense to keep around one page table for
1592                          * each flavor (empty)... in case a new map is coming
1593                          * right back to alloc (and fill it in) again.
1594                          * But: deferring unmapping should help with pathologic
1595                          * unmap/map/unmap/map cases where we'd trigger pte
1596                          * free/alloc/free/alloc.
1597                          */
1598                         free_gmmu_pages(vm, pte->ref, pte->sgt,
1599                                 vm->mm->page_table_sizing[pgsz_idx].order);
1600                         pte->ref = NULL;
1601
1602                         /* rewrite pde */
1603                         update_gmmu_pde_locked(vm, pde_i);
1604                 }
1605
1606         }
1607
1608         smp_mb();
1609         vm->tlb_dirty = true;
1610         nvhost_dbg_fn("set tlb dirty");
1611
1612         return 0;
1613
1614 clean_up:
1615         /*TBD: potentially rewrite above to pre-map everything it needs to
1616          * as that's the only way it can fail */
1617         return err;
1618
1619 }
1620
1621
1622 /* for gk20a the "video memory" apertures here are misnomers. */
1623 static inline u32 big_valid_pde0_bits(u64 pte_addr)
1624 {
1625         u32 pde0_bits =
1626                 gmmu_pde_aperture_big_video_memory_f() |
1627                 gmmu_pde_address_big_sys_f(
1628                            (u32)(pte_addr >> gmmu_pde_address_shift_v()));
1629         return  pde0_bits;
1630 }
1631 static inline u32 small_valid_pde1_bits(u64 pte_addr)
1632 {
1633         u32 pde1_bits =
1634                 gmmu_pde_aperture_small_video_memory_f() |
1635                 gmmu_pde_vol_small_true_f() | /* tbd: why? */
1636                 gmmu_pde_address_small_sys_f(
1637                            (u32)(pte_addr >> gmmu_pde_address_shift_v()));
1638         return pde1_bits;
1639 }
1640
1641 /* Given the current state of the ptes associated with a pde,
1642    determine value and write it out.  There's no checking
1643    here to determine whether or not a change was actually
1644    made.  So, superfluous updates will cause unnecessary
1645    pde invalidations.
1646 */
1647 static void update_gmmu_pde_locked(struct vm_gk20a *vm, u32 i)
1648 {
1649         bool small_valid, big_valid;
1650         u64 pte_addr[2] = {0, 0};
1651         struct page_table_gk20a *small_pte =
1652                 vm->pdes.ptes[gmmu_page_size_small] + i;
1653         struct page_table_gk20a *big_pte =
1654                 vm->pdes.ptes[gmmu_page_size_big] + i;
1655         u32 pde_v[2] = {0, 0};
1656         u32 *pde;
1657
1658         small_valid = small_pte && small_pte->ref;
1659         big_valid   = big_pte && big_pte->ref;
1660
1661         if (small_valid)
1662                 pte_addr[gmmu_page_size_small] =
1663                         gk20a_mm_iova_addr(small_pte->sgt->sgl);
1664         if (big_valid)
1665                 pte_addr[gmmu_page_size_big] =
1666                         gk20a_mm_iova_addr(big_pte->sgt->sgl);
1667
1668         pde_v[0] = gmmu_pde_size_full_f();
1669         pde_v[0] |= big_valid ?
1670                 big_valid_pde0_bits(pte_addr[gmmu_page_size_big])
1671                 :
1672                 (gmmu_pde_aperture_big_invalid_f());
1673
1674         pde_v[1] |= (small_valid ?
1675                      small_valid_pde1_bits(pte_addr[gmmu_page_size_small])
1676                      :
1677                      (gmmu_pde_aperture_small_invalid_f() |
1678                       gmmu_pde_vol_small_false_f())
1679                      )
1680                 |
1681                 (big_valid ? (gmmu_pde_vol_big_true_f()) :
1682                  gmmu_pde_vol_big_false_f());
1683
1684         pde = pde_from_index(vm, i);
1685
1686         mem_wr32(pde, 0, pde_v[0]);
1687         mem_wr32(pde, 1, pde_v[1]);
1688
1689         smp_mb();
1690
1691         FLUSH_CPU_DCACHE(pde,
1692                          sg_phys(vm->pdes.sgt->sgl) + (i*gmmu_pde__size_v()),
1693                          sizeof(u32)*2);
1694
1695         gk20a_mm_l2_invalidate(vm->mm->g);
1696
1697         nvhost_dbg(dbg_pte, "pde:%d = 0x%x,0x%08x\n", i, pde_v[1], pde_v[0]);
1698
1699         vm->tlb_dirty  = true;
1700 }
1701
1702
1703 /* return mem_mgr and mem_handle to caller. If the mem_handle is a kernel dup
1704    from user space (as_ioctl), caller releases the kernel duplicated handle */
1705 /* NOTE! mapped_buffers lock must be held */
1706 static void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer)
1707 {
1708         struct vm_gk20a *vm = mapped_buffer->vm;
1709
1710         __locked_gmmu_unmap(vm,
1711                         mapped_buffer->addr,
1712                         mapped_buffer->size,
1713                         mapped_buffer->pgsz_idx,
1714                         mapped_buffer->va_allocated,
1715                         mem_flag_none);
1716
1717         nvhost_dbg(dbg_map, "as=%d pgsz=%d gv=0x%x,%08x own_mem_ref=%d",
1718                    vm_aspace_id(vm), gmmu_page_sizes[mapped_buffer->pgsz_idx],
1719                    hi32(mapped_buffer->addr), lo32(mapped_buffer->addr),
1720                    mapped_buffer->own_mem_ref);
1721
1722         nvhost_memmgr_unpin(mapped_buffer->memmgr,
1723                             mapped_buffer->handle_ref,
1724                             dev_from_vm(vm),
1725                             mapped_buffer->sgt);
1726
1727         /* remove from mapped buffer tree and remove list, free */
1728         rb_erase(&mapped_buffer->node, &vm->mapped_buffers);
1729         if (!list_empty(&mapped_buffer->va_buffers_list))
1730                 list_del(&mapped_buffer->va_buffers_list);
1731
1732         /* keep track of mapped buffers */
1733         if (mapped_buffer->user_mapped)
1734                 vm->num_user_mapped_buffers--;
1735
1736         if (mapped_buffer->own_mem_ref) {
1737                 nvhost_memmgr_put(mapped_buffer->memmgr,
1738                                   mapped_buffer->handle_ref);
1739                 nvhost_memmgr_put_mgr(mapped_buffer->memmgr);
1740         }
1741
1742         kfree(mapped_buffer);
1743
1744         return;
1745 }
1746
1747 void gk20a_vm_unmap(struct vm_gk20a *vm, u64 offset)
1748 {
1749         struct device *d = dev_from_vm(vm);
1750         struct mapped_buffer_node *mapped_buffer;
1751
1752         mutex_lock(&vm->update_gmmu_lock);
1753         mapped_buffer = find_mapped_buffer_locked(&vm->mapped_buffers, offset);
1754         if (!mapped_buffer) {
1755                 mutex_unlock(&vm->update_gmmu_lock);
1756                 nvhost_err(d, "invalid addr to unmap 0x%llx", offset);
1757                 return;
1758         }
1759         kref_put(&mapped_buffer->ref, gk20a_vm_unmap_locked_kref);
1760         mutex_unlock(&vm->update_gmmu_lock);
1761 }
1762
1763 static void gk20a_vm_remove_support(struct vm_gk20a *vm)
1764 {
1765         struct mapped_buffer_node *mapped_buffer;
1766         struct vm_reserved_va_node *va_node, *va_node_tmp;
1767         struct rb_node *node;
1768
1769         nvhost_dbg_fn("");
1770         mutex_lock(&vm->update_gmmu_lock);
1771
1772         /* TBD: add a flag here for the unmap code to recognize teardown
1773          * and short-circuit any otherwise expensive operations. */
1774
1775         node = rb_first(&vm->mapped_buffers);
1776         while (node) {
1777                 mapped_buffer =
1778                         container_of(node, struct mapped_buffer_node, node);
1779                 gk20a_vm_unmap_locked(mapped_buffer);
1780                 node = rb_first(&vm->mapped_buffers);
1781         }
1782
1783         /* destroy remaining reserved memory areas */
1784         list_for_each_entry_safe(va_node, va_node_tmp, &vm->reserved_va_list,
1785                 reserved_va_list) {
1786                 list_del(&va_node->reserved_va_list);
1787                 kfree(va_node);
1788         }
1789
1790         /* TBD: unmapping all buffers above may not actually free
1791          * all vm ptes.  jettison them here for certain... */
1792
1793         unmap_gmmu_pages(vm->pdes.ref, vm->pdes.sgt, vm->pdes.kv);
1794         free_gmmu_pages(vm, vm->pdes.ref, vm->pdes.sgt, 0);
1795
1796         kfree(vm->pdes.ptes[gmmu_page_size_small]);
1797         kfree(vm->pdes.ptes[gmmu_page_size_big]);
1798         nvhost_allocator_destroy(&vm->vma[gmmu_page_size_small]);
1799         nvhost_allocator_destroy(&vm->vma[gmmu_page_size_big]);
1800
1801         mutex_unlock(&vm->update_gmmu_lock);
1802
1803         /* vm is not used anymore. release it. */
1804         kfree(vm);
1805 }
1806
1807 static void gk20a_vm_remove_support_kref(struct kref *ref)
1808 {
1809         struct vm_gk20a *vm = container_of(ref, struct vm_gk20a, ref);
1810         gk20a_vm_remove_support(vm);
1811 }
1812
1813 void gk20a_vm_get(struct vm_gk20a *vm)
1814 {
1815         kref_get(&vm->ref);
1816 }
1817
1818 void gk20a_vm_put(struct vm_gk20a *vm)
1819 {
1820         kref_put(&vm->ref, gk20a_vm_remove_support_kref);
1821 }
1822
1823 /* address space interfaces for the gk20a module */
1824 static int gk20a_as_alloc_share(struct nvhost_as_share *as_share)
1825 {
1826         struct nvhost_as *as = as_share->as;
1827         struct gk20a *gk20a = get_gk20a(as->ch->dev);
1828         struct mm_gk20a *mm = &gk20a->mm;
1829         struct vm_gk20a *vm;
1830         u64 vma_size;
1831         u32 num_pages, low_hole_pages;
1832         char name[32];
1833         int err;
1834
1835         nvhost_dbg_fn("");
1836
1837         vm = kzalloc(sizeof(*vm), GFP_KERNEL);
1838         if (!vm)
1839                 return -ENOMEM;
1840
1841         as_share->priv = (void *)vm;
1842
1843         vm->mm = mm;
1844         vm->as_share = as_share;
1845
1846         vm->big_pages = true;
1847
1848         vm->va_start  = mm->pde_stride;   /* create a one pde hole */
1849         vm->va_limit  = mm->channel.size; /* note this means channel.size is
1850                                              really just the max */
1851         {
1852                 u32 pde_lo, pde_hi;
1853                 pde_range_from_vaddr_range(vm,
1854                                            0, vm->va_limit-1,
1855                                            &pde_lo, &pde_hi);
1856                 vm->pdes.num_pdes = pde_hi + 1;
1857         }
1858
1859         vm->pdes.ptes[gmmu_page_size_small] =
1860                 kzalloc(sizeof(struct page_table_gk20a) *
1861                         vm->pdes.num_pdes, GFP_KERNEL);
1862
1863         vm->pdes.ptes[gmmu_page_size_big] =
1864                 kzalloc(sizeof(struct page_table_gk20a) *
1865                         vm->pdes.num_pdes, GFP_KERNEL);
1866
1867         if (!(vm->pdes.ptes[gmmu_page_size_small] &&
1868               vm->pdes.ptes[gmmu_page_size_big]))
1869                 return -ENOMEM;
1870
1871         nvhost_dbg_info("init space for va_limit=0x%llx num_pdes=%d",
1872                    vm->va_limit, vm->pdes.num_pdes);
1873
1874         /* allocate the page table directory */
1875         err = alloc_gmmu_pages(vm, 0, &vm->pdes.ref,
1876                                &vm->pdes.sgt);
1877         if (err)
1878                 return -ENOMEM;
1879
1880         err = map_gmmu_pages(vm->pdes.ref, vm->pdes.sgt, &vm->pdes.kv);
1881         if (err) {
1882                 free_gmmu_pages(vm, vm->pdes.ref, vm->pdes.sgt, 0);
1883                 return -ENOMEM;
1884         }
1885         nvhost_dbg(dbg_pte, "pdes.kv = 0x%p, pdes.phys = 0x%llx",
1886                         vm->pdes.kv,
1887                         gk20a_mm_iova_addr(vm->pdes.sgt->sgl));
1888         /* we could release vm->pdes.kv but it's only one page... */
1889
1890
1891         /* low-half: alloc small pages */
1892         /* high-half: alloc big pages */
1893         vma_size = mm->channel.size >> 1;
1894
1895         snprintf(name, sizeof(name), "gk20a_as_%d-%dKB", as_share->id,
1896                  gmmu_page_sizes[gmmu_page_size_small]>>10);
1897         num_pages = (u32)(vma_size >> gmmu_page_shifts[gmmu_page_size_small]);
1898
1899         /* num_pages above is without regard to the low-side hole. */
1900         low_hole_pages = (vm->va_start >>
1901                           gmmu_page_shifts[gmmu_page_size_small]);
1902
1903         nvhost_allocator_init(&vm->vma[gmmu_page_size_small], name,
1904               low_hole_pages,             /* start */
1905               num_pages - low_hole_pages, /* length */
1906               1);                         /* align */
1907
1908         snprintf(name, sizeof(name), "gk20a_as_%d-%dKB", as_share->id,
1909                  gmmu_page_sizes[gmmu_page_size_big]>>10);
1910
1911         num_pages = (u32)(vma_size >> gmmu_page_shifts[gmmu_page_size_big]);
1912         nvhost_allocator_init(&vm->vma[gmmu_page_size_big], name,
1913                               num_pages, /* start */
1914                               num_pages, /* length */
1915                               1); /* align */
1916
1917         vm->mapped_buffers = RB_ROOT;
1918
1919         mutex_init(&vm->update_gmmu_lock);
1920         kref_init(&vm->ref);
1921         INIT_LIST_HEAD(&vm->reserved_va_list);
1922
1923         vm->enable_ctag = true;
1924
1925         return 0;
1926 }
1927
1928
1929 static int gk20a_as_release_share(struct nvhost_as_share *as_share)
1930 {
1931         struct vm_gk20a *vm = (struct vm_gk20a *)as_share->priv;
1932
1933         nvhost_dbg_fn("");
1934
1935         vm->as_share = NULL;
1936
1937         /* put as reference to vm */
1938         gk20a_vm_put(vm);
1939
1940         as_share->priv = NULL;
1941
1942         return 0;
1943 }
1944
1945
1946 static int gk20a_as_alloc_space(struct nvhost_as_share *as_share,
1947                                 struct nvhost_as_alloc_space_args *args)
1948
1949 {       int err = -ENOMEM;
1950         int pgsz_idx;
1951         u32 start_page_nr;
1952         struct nvhost_allocator *vma;
1953         struct vm_gk20a *vm = (struct vm_gk20a *)as_share->priv;
1954         struct vm_reserved_va_node *va_node;
1955         u64 vaddr_start = 0;
1956
1957         nvhost_dbg_fn("flags=0x%x pgsz=0x%x nr_pages=0x%x o/a=0x%llx",
1958                         args->flags, args->page_size, args->pages,
1959                         args->o_a.offset);
1960
1961         /* determine pagesz idx */
1962         for (pgsz_idx = gmmu_page_size_small;
1963              pgsz_idx < gmmu_nr_page_sizes;
1964              pgsz_idx++) {
1965                 if (gmmu_page_sizes[pgsz_idx] == args->page_size)
1966                         break;
1967         }
1968
1969         if (pgsz_idx >= gmmu_nr_page_sizes) {
1970                 err = -EINVAL;
1971                 goto clean_up;
1972         }
1973
1974         va_node = kzalloc(sizeof(*va_node), GFP_KERNEL);
1975         if (!va_node) {
1976                 err = -ENOMEM;
1977                 goto clean_up;
1978         }
1979
1980         start_page_nr = 0;
1981         if (args->flags & NVHOST_AS_ALLOC_SPACE_FLAGS_FIXED_OFFSET)
1982                 start_page_nr = (u32)(args->o_a.offset >>
1983                                       gmmu_page_shifts[pgsz_idx]);
1984
1985         vma = &vm->vma[pgsz_idx];
1986         err = vma->alloc(vma, &start_page_nr, args->pages);
1987         if (err) {
1988                 kfree(va_node);
1989                 goto clean_up;
1990         }
1991
1992         vaddr_start = (u64)start_page_nr << gmmu_page_shifts[pgsz_idx];
1993
1994         va_node->vaddr_start = vaddr_start;
1995         va_node->size = (u64)args->page_size * (u64)args->pages;
1996         INIT_LIST_HEAD(&va_node->va_buffers_list);
1997         INIT_LIST_HEAD(&va_node->reserved_va_list);
1998
1999         mutex_lock(&vm->update_gmmu_lock);
2000         list_add_tail(&va_node->reserved_va_list, &vm->reserved_va_list);
2001         mutex_unlock(&vm->update_gmmu_lock);
2002
2003         args->o_a.offset = vaddr_start;
2004
2005 clean_up:
2006         return err;
2007 }
2008
2009 static int gk20a_as_free_space(struct nvhost_as_share *as_share,
2010                                struct nvhost_as_free_space_args *args)
2011 {
2012         int err = -ENOMEM;
2013         int pgsz_idx;
2014         u32 start_page_nr;
2015         struct nvhost_allocator *vma;
2016         struct vm_gk20a *vm = (struct vm_gk20a *)as_share->priv;
2017         struct vm_reserved_va_node *va_node;
2018
2019         nvhost_dbg_fn("pgsz=0x%x nr_pages=0x%x o/a=0x%llx", args->page_size,
2020                         args->pages, args->offset);
2021
2022         /* determine pagesz idx */
2023         for (pgsz_idx = gmmu_page_size_small;
2024              pgsz_idx < gmmu_nr_page_sizes;
2025              pgsz_idx++) {
2026                 if (gmmu_page_sizes[pgsz_idx] == args->page_size)
2027                         break;
2028         }
2029
2030         if (pgsz_idx >= gmmu_nr_page_sizes) {
2031                 err = -EINVAL;
2032                 goto clean_up;
2033         }
2034
2035         start_page_nr = (u32)(args->offset >>
2036                               gmmu_page_shifts[pgsz_idx]);
2037
2038         vma = &vm->vma[pgsz_idx];
2039         err = vma->free(vma, start_page_nr, args->pages);
2040
2041         if (err)
2042                 goto clean_up;
2043
2044         mutex_lock(&vm->update_gmmu_lock);
2045         va_node = addr_to_reservation(vm, args->offset);
2046         if (va_node) {
2047                 struct mapped_buffer_node *buffer;
2048
2049                 /* there is no need to unallocate the buffers in va. Just
2050                  * convert them into normal buffers */
2051
2052                 list_for_each_entry(buffer,
2053                         &va_node->va_buffers_list, va_buffers_list)
2054                         list_del_init(&buffer->va_buffers_list);
2055
2056                 list_del(&va_node->reserved_va_list);
2057                 kfree(va_node);
2058         }
2059         mutex_unlock(&vm->update_gmmu_lock);
2060
2061 clean_up:
2062         return err;
2063 }
2064
2065 static int gk20a_as_bind_hwctx(struct nvhost_as_share *as_share,
2066                                struct nvhost_hwctx *hwctx)
2067 {
2068         int err = 0;
2069         struct vm_gk20a *vm = (struct vm_gk20a *)as_share->priv;
2070         struct channel_gk20a *c = hwctx->priv;
2071
2072         nvhost_dbg_fn("");
2073
2074         c->vm = vm;
2075         err = channel_gk20a_commit_va(c);
2076         if (err)
2077                 c->vm = 0;
2078
2079         return err;
2080 }
2081
2082 static int gk20a_as_map_buffer(struct nvhost_as_share *as_share,
2083                                int memmgr_fd,
2084                                ulong mem_id,
2085                                u64 *offset_align,
2086                                u32 flags /*NVHOST_AS_MAP_BUFFER_FLAGS_*/)
2087 {
2088         int err = 0;
2089         struct vm_gk20a *vm = (struct vm_gk20a *)as_share->priv;
2090         struct gk20a *g = gk20a_from_vm(vm);
2091         struct mem_mgr *memmgr;
2092         struct mem_handle *r;
2093         u64 ret_va;
2094
2095         nvhost_dbg_fn("");
2096
2097         /* get ref to the memmgr (released on unmap_locked) */
2098         memmgr = nvhost_memmgr_get_mgr_file(memmgr_fd);
2099         if (IS_ERR(memmgr))
2100                 return 0;
2101
2102         /* get ref to the mem handle (released on unmap_locked) */
2103         r = nvhost_memmgr_get(memmgr, mem_id, g->dev);
2104         if (!r) {
2105                 nvhost_memmgr_put_mgr(memmgr);
2106                 return 0;
2107         }
2108
2109         ret_va = gk20a_vm_map(vm, memmgr, r, *offset_align,
2110                         flags, 0/*no kind here, to be removed*/, NULL, true,
2111                         mem_flag_none);
2112         *offset_align = ret_va;
2113         if (!ret_va) {
2114                 nvhost_memmgr_put(memmgr, r);
2115                 nvhost_memmgr_put_mgr(memmgr);
2116                 err = -EINVAL;
2117         }
2118
2119         return err;
2120 }
2121
2122 static int gk20a_as_unmap_buffer(struct nvhost_as_share *as_share, u64 offset)
2123 {
2124         struct vm_gk20a *vm = (struct vm_gk20a *)as_share->priv;
2125
2126         nvhost_dbg_fn("");
2127
2128         gk20a_vm_unmap_user(vm, offset);
2129         return 0;
2130 }
2131
2132
2133 const struct nvhost_as_moduleops tegra_gk20a_as_ops = {
2134         .alloc_share   = gk20a_as_alloc_share,
2135         .release_share = gk20a_as_release_share,
2136         .alloc_space   = gk20a_as_alloc_space,
2137         .free_space    = gk20a_as_free_space,
2138         .bind_hwctx    = gk20a_as_bind_hwctx,
2139         .map_buffer    = gk20a_as_map_buffer,
2140         .unmap_buffer  = gk20a_as_unmap_buffer,
2141 };
2142
2143 int gk20a_init_bar1_vm(struct mm_gk20a *mm)
2144 {
2145         int err;
2146         phys_addr_t inst_pa;
2147         void *inst_ptr;
2148         struct vm_gk20a *vm = &mm->bar1.vm;
2149         struct gk20a *g = gk20a_from_mm(mm);
2150         struct device *d = dev_from_gk20a(g);
2151         struct inst_desc *inst_block = &mm->bar1.inst_block;
2152         u64 pde_addr;
2153         u32 pde_addr_lo;
2154         u32 pde_addr_hi;
2155
2156         vm->mm = mm;
2157
2158         mm->bar1.aperture_size = bar1_aperture_size_mb_gk20a() << 20;
2159
2160         nvhost_dbg_info("bar1 vm size = 0x%x", mm->bar1.aperture_size);
2161
2162         vm->va_start = mm->pde_stride * 1;
2163         vm->va_limit = mm->bar1.aperture_size;
2164
2165         {
2166                 u32 pde_lo, pde_hi;
2167                 pde_range_from_vaddr_range(vm,
2168                                            0, vm->va_limit-1,
2169                                            &pde_lo, &pde_hi);
2170                 vm->pdes.num_pdes = pde_hi + 1;
2171         }
2172
2173         /* bar1 is likely only to ever use/need small page sizes. */
2174         /* But just in case, for now... arrange for both.*/
2175         vm->pdes.ptes[gmmu_page_size_small] =
2176                 kzalloc(sizeof(struct page_table_gk20a) *
2177                         vm->pdes.num_pdes, GFP_KERNEL);
2178
2179         vm->pdes.ptes[gmmu_page_size_big] =
2180                 kzalloc(sizeof(struct page_table_gk20a) *
2181                         vm->pdes.num_pdes, GFP_KERNEL);
2182
2183         if (!(vm->pdes.ptes[gmmu_page_size_small] &&
2184               vm->pdes.ptes[gmmu_page_size_big]))
2185                 return -ENOMEM;
2186
2187         nvhost_dbg_info("init space for bar1 va_limit=0x%llx num_pdes=%d",
2188                    vm->va_limit, vm->pdes.num_pdes);
2189
2190
2191         /* allocate the page table directory */
2192         err = alloc_gmmu_pages(vm, 0, &vm->pdes.ref,
2193                                &vm->pdes.sgt);
2194         if (err)
2195                 goto clean_up;
2196
2197         err = map_gmmu_pages(vm->pdes.ref, vm->pdes.sgt, &vm->pdes.kv);
2198         if (err) {
2199                 free_gmmu_pages(vm, vm->pdes.ref, vm->pdes.sgt, 0);
2200                 goto clean_up;
2201         }
2202         nvhost_dbg(dbg_pte, "bar 1 pdes.kv = 0x%p, pdes.phys = 0x%llx",
2203                         vm->pdes.kv, gk20a_mm_iova_addr(vm->pdes.sgt->sgl));
2204         /* we could release vm->pdes.kv but it's only one page... */
2205
2206         pde_addr = gk20a_mm_iova_addr(vm->pdes.sgt->sgl);
2207         pde_addr_lo = u64_lo32(pde_addr >> 12);
2208         pde_addr_hi = u64_hi32(pde_addr);
2209
2210         nvhost_dbg_info("pde pa=0x%llx pde_addr_lo=0x%x pde_addr_hi=0x%x",
2211                 (u64)gk20a_mm_iova_addr(vm->pdes.sgt->sgl),
2212                 pde_addr_lo, pde_addr_hi);
2213
2214         /* allocate instance mem for bar1 */
2215         inst_block->size = ram_in_alloc_size_v();
2216         inst_block->cpuva = dma_alloc_coherent(d, inst_block->size,
2217                                 &inst_block->iova, GFP_KERNEL);
2218         if (!inst_block->cpuva) {
2219                 nvhost_err(d, "%s: memory allocation failed\n", __func__);
2220                 err = -ENOMEM;
2221                 goto clean_up;
2222         }
2223
2224         inst_block->cpu_pa = gk20a_get_phys_from_iova(d, inst_block->iova);
2225         if (!inst_block->cpu_pa) {
2226                 nvhost_err(d, "%s: failed to get phys address\n", __func__);
2227                 err = -ENOMEM;
2228                 goto clean_up;
2229         }
2230
2231         inst_pa = inst_block->cpu_pa;
2232         inst_ptr = inst_block->cpuva;
2233
2234         nvhost_dbg_info("bar1 inst block physical phys = 0x%llx, kv = 0x%p",
2235                 (u64)inst_pa, inst_ptr);
2236
2237         memset(inst_ptr, 0, ram_fc_size_val_v());
2238
2239         mem_wr32(inst_ptr, ram_in_page_dir_base_lo_w(),
2240                 ram_in_page_dir_base_target_vid_mem_f() |
2241                 ram_in_page_dir_base_vol_true_f() |
2242                 ram_in_page_dir_base_lo_f(pde_addr_lo));
2243
2244         mem_wr32(inst_ptr, ram_in_page_dir_base_hi_w(),
2245                 ram_in_page_dir_base_hi_f(pde_addr_hi));
2246
2247         mem_wr32(inst_ptr, ram_in_adr_limit_lo_w(),
2248                  u64_lo32(vm->va_limit) | 0xFFF);
2249
2250         mem_wr32(inst_ptr, ram_in_adr_limit_hi_w(),
2251                 ram_in_adr_limit_hi_f(u64_hi32(vm->va_limit)));
2252
2253         nvhost_dbg_info("bar1 inst block ptr: %08llx",  (u64)inst_pa);
2254         nvhost_allocator_init(&vm->vma[gmmu_page_size_small], "gk20a_bar1",
2255                               1,/*start*/
2256                               (vm->va_limit >> 12) - 1 /* length*/,
2257                               1); /* align */
2258         /* initialize just in case we try to use it anyway */
2259         nvhost_allocator_init(&vm->vma[gmmu_page_size_big], "gk20a_bar1-unused",
2260                               0x0badc0de, /* start */
2261                               1, /* length */
2262                               1); /* align */
2263
2264         vm->mapped_buffers = RB_ROOT;
2265
2266         mutex_init(&vm->update_gmmu_lock);
2267         kref_init(&vm->ref);
2268         INIT_LIST_HEAD(&vm->reserved_va_list);
2269
2270         return 0;
2271
2272 clean_up:
2273         /* free, etc */
2274         if (inst_block->cpuva)
2275                 dma_free_coherent(d, inst_block->size,
2276                         inst_block->cpuva, inst_block->iova);
2277         inst_block->cpuva = NULL;
2278         inst_block->iova = 0;
2279         return err;
2280 }
2281
2282 /* pmu vm, share channel_vm interfaces */
2283 int gk20a_init_pmu_vm(struct mm_gk20a *mm)
2284 {
2285         int err;
2286         phys_addr_t inst_pa;
2287         void *inst_ptr;
2288         struct vm_gk20a *vm = &mm->pmu.vm;
2289         struct gk20a *g = gk20a_from_mm(mm);
2290         struct device *d = dev_from_gk20a(g);
2291         struct inst_desc *inst_block = &mm->pmu.inst_block;
2292         u64 pde_addr;
2293         u32 pde_addr_lo;
2294         u32 pde_addr_hi;
2295
2296         vm->mm = mm;
2297
2298         mm->pmu.aperture_size = GK20A_PMU_VA_SIZE;
2299
2300         nvhost_dbg_info("pmu vm size = 0x%x", mm->pmu.aperture_size);
2301
2302         vm->va_start  = GK20A_PMU_VA_START;
2303         vm->va_limit  = vm->va_start + mm->pmu.aperture_size;
2304
2305         {
2306                 u32 pde_lo, pde_hi;
2307                 pde_range_from_vaddr_range(vm,
2308                                            0, vm->va_limit-1,
2309                                            &pde_lo, &pde_hi);
2310                 vm->pdes.num_pdes = pde_hi + 1;
2311         }
2312
2313         /* The pmu is likely only to ever use/need small page sizes. */
2314         /* But just in case, for now... arrange for both.*/
2315         vm->pdes.ptes[gmmu_page_size_small] =
2316                 kzalloc(sizeof(struct page_table_gk20a) *
2317                         vm->pdes.num_pdes, GFP_KERNEL);
2318
2319         vm->pdes.ptes[gmmu_page_size_big] =
2320                 kzalloc(sizeof(struct page_table_gk20a) *
2321                         vm->pdes.num_pdes, GFP_KERNEL);
2322
2323         if (!(vm->pdes.ptes[gmmu_page_size_small] &&
2324               vm->pdes.ptes[gmmu_page_size_big]))
2325                 return -ENOMEM;
2326
2327         nvhost_dbg_info("init space for pmu va_limit=0x%llx num_pdes=%d",
2328                    vm->va_limit, vm->pdes.num_pdes);
2329
2330         /* allocate the page table directory */
2331         err = alloc_gmmu_pages(vm, 0, &vm->pdes.ref,
2332                                &vm->pdes.sgt);
2333         if (err)
2334                 goto clean_up;
2335
2336         err = map_gmmu_pages(vm->pdes.ref, vm->pdes.sgt, &vm->pdes.kv);
2337         if (err) {
2338                 free_gmmu_pages(vm, vm->pdes.ref, vm->pdes.sgt, 0);
2339                 goto clean_up;
2340         }
2341         nvhost_dbg_info("pmu pdes phys @ 0x%llx",
2342                         (u64)gk20a_mm_iova_addr(vm->pdes.sgt->sgl));
2343         /* we could release vm->pdes.kv but it's only one page... */
2344
2345         pde_addr = gk20a_mm_iova_addr(vm->pdes.sgt->sgl);
2346         pde_addr_lo = u64_lo32(pde_addr >> 12);
2347         pde_addr_hi = u64_hi32(pde_addr);
2348
2349         nvhost_dbg_info("pde pa=0x%llx pde_addr_lo=0x%x pde_addr_hi=0x%x",
2350                         (u64)pde_addr, pde_addr_lo, pde_addr_hi);
2351
2352         /* allocate instance mem for pmu */
2353         inst_block->size = GK20A_PMU_INST_SIZE;
2354         inst_block->cpuva = dma_alloc_coherent(d, inst_block->size,
2355                                 &inst_block->iova, GFP_KERNEL);
2356         if (!inst_block->cpuva) {
2357                 nvhost_err(d, "%s: memory allocation failed\n", __func__);
2358                 err = -ENOMEM;
2359                 goto clean_up;
2360         }
2361
2362         inst_block->cpu_pa = gk20a_get_phys_from_iova(d, inst_block->iova);
2363         if (!inst_block->cpu_pa) {
2364                 nvhost_err(d, "%s: failed to get phys address\n", __func__);
2365                 err = -ENOMEM;
2366                 goto clean_up;
2367         }
2368
2369         inst_pa = inst_block->cpu_pa;
2370         inst_ptr = inst_block->cpuva;
2371
2372         nvhost_dbg_info("pmu inst block physical addr: 0x%llx", (u64)inst_pa);
2373
2374         memset(inst_ptr, 0, GK20A_PMU_INST_SIZE);
2375
2376         mem_wr32(inst_ptr, ram_in_page_dir_base_lo_w(),
2377                 ram_in_page_dir_base_target_vid_mem_f() |
2378                 ram_in_page_dir_base_vol_true_f() |
2379                 ram_in_page_dir_base_lo_f(pde_addr_lo));
2380
2381         mem_wr32(inst_ptr, ram_in_page_dir_base_hi_w(),
2382                 ram_in_page_dir_base_hi_f(pde_addr_hi));
2383
2384         mem_wr32(inst_ptr, ram_in_adr_limit_lo_w(),
2385                  u64_lo32(vm->va_limit) | 0xFFF);
2386
2387         mem_wr32(inst_ptr, ram_in_adr_limit_hi_w(),
2388                 ram_in_adr_limit_hi_f(u64_hi32(vm->va_limit)));
2389
2390         nvhost_allocator_init(&vm->vma[gmmu_page_size_small], "gk20a_pmu",
2391                               (vm->va_start >> 12), /* start */
2392                               (vm->va_limit - vm->va_start) >> 12, /*length*/
2393                               1); /* align */
2394         /* initialize just in case we try to use it anyway */
2395         nvhost_allocator_init(&vm->vma[gmmu_page_size_big], "gk20a_pmu-unused",
2396                               0x0badc0de, /* start */
2397                               1, /* length */
2398                               1); /* align */
2399
2400
2401         vm->mapped_buffers = RB_ROOT;
2402
2403         mutex_init(&vm->update_gmmu_lock);
2404         kref_init(&vm->ref);
2405         INIT_LIST_HEAD(&vm->reserved_va_list);
2406
2407         return 0;
2408
2409 clean_up:
2410         /* free, etc */
2411         if (inst_block->cpuva)
2412                 dma_free_coherent(d, inst_block->size,
2413                         inst_block->cpuva, inst_block->iova);
2414         inst_block->cpuva = NULL;
2415         inst_block->iova = 0;
2416         return err;
2417 }
2418
2419 /* Flushes the compression bit cache as well as "data".
2420  * Note: the name here is a bit of a misnomer.  ELPG uses this
2421  * internally... but ELPG doesn't have to be on to do it manually.
2422  */
2423 static void gk20a_mm_g_elpg_flush_locked(struct gk20a *g)
2424 {
2425         u32 data;
2426         s32 retry = 100;
2427
2428         nvhost_dbg_fn("");
2429
2430         /* Make sure all previous writes are committed to the L2. There's no
2431            guarantee that writes are to DRAM. This will be a sysmembar internal
2432            to the L2. */
2433         gk20a_writel(g, ltc_ltss_g_elpg_r(),
2434                      ltc_ltss_g_elpg_flush_pending_f());
2435         do {
2436                 data = gk20a_readl(g, ltc_ltss_g_elpg_r());
2437
2438                 if (ltc_ltss_g_elpg_flush_v(data) ==
2439                     ltc_ltss_g_elpg_flush_pending_v()) {
2440                         nvhost_dbg_info("g_elpg_flush 0x%x", data);
2441                         retry--;
2442                         usleep_range(20, 40);
2443                 } else
2444                         break;
2445         } while (retry >= 0);
2446
2447         if (retry < 0)
2448                 nvhost_warn(dev_from_gk20a(g),
2449                             "g_elpg_flush too many retries");
2450
2451 }
2452
2453 void gk20a_mm_fb_flush(struct gk20a *g)
2454 {
2455         struct mm_gk20a *mm = &g->mm;
2456         u32 data;
2457         s32 retry = 100;
2458
2459         nvhost_dbg_fn("");
2460
2461         mutex_lock(&mm->l2_op_lock);
2462
2463         gk20a_mm_g_elpg_flush_locked(g);
2464
2465         /* Make sure all previous writes are committed to the L2. There's no
2466            guarantee that writes are to DRAM. This will be a sysmembar internal
2467            to the L2. */
2468         gk20a_writel(g, flush_fb_flush_r(),
2469                 flush_fb_flush_pending_busy_f());
2470
2471         do {
2472                 data = gk20a_readl(g, flush_fb_flush_r());
2473
2474                 if (flush_fb_flush_outstanding_v(data) ==
2475                         flush_fb_flush_outstanding_true_v() ||
2476                     flush_fb_flush_pending_v(data) ==
2477                         flush_fb_flush_pending_busy_v()) {
2478                                 nvhost_dbg_info("fb_flush 0x%x", data);
2479                                 retry--;
2480                                 usleep_range(20, 40);
2481                 } else
2482                         break;
2483         } while (retry >= 0);
2484
2485         if (retry < 0)
2486                 nvhost_warn(dev_from_gk20a(g),
2487                         "fb_flush too many retries");
2488
2489         mutex_unlock(&mm->l2_op_lock);
2490 }
2491
2492 static void gk20a_mm_l2_invalidate_locked(struct gk20a *g)
2493 {
2494         u32 data;
2495         s32 retry = 200;
2496
2497         /* Invalidate any clean lines from the L2 so subsequent reads go to
2498            DRAM. Dirty lines are not affected by this operation. */
2499         gk20a_writel(g, flush_l2_system_invalidate_r(),
2500                 flush_l2_system_invalidate_pending_busy_f());
2501
2502         do {
2503                 data = gk20a_readl(g, flush_l2_system_invalidate_r());
2504
2505                 if (flush_l2_system_invalidate_outstanding_v(data) ==
2506                         flush_l2_system_invalidate_outstanding_true_v() ||
2507                     flush_l2_system_invalidate_pending_v(data) ==
2508                         flush_l2_system_invalidate_pending_busy_v()) {
2509                                 nvhost_dbg_info("l2_system_invalidate 0x%x",
2510                                                 data);
2511                                 retry--;
2512                                 usleep_range(20, 40);
2513                 } else
2514                         break;
2515         } while (retry >= 0);
2516
2517         if (retry < 0)
2518                 nvhost_warn(dev_from_gk20a(g),
2519                         "l2_system_invalidate too many retries");
2520 }
2521
2522 void gk20a_mm_l2_invalidate(struct gk20a *g)
2523 {
2524         struct mm_gk20a *mm = &g->mm;
2525         mutex_lock(&mm->l2_op_lock);
2526         gk20a_mm_l2_invalidate_locked(g);
2527         mutex_unlock(&mm->l2_op_lock);
2528 }
2529
2530 void gk20a_mm_l2_flush(struct gk20a *g, bool invalidate)
2531 {
2532         struct mm_gk20a *mm = &g->mm;
2533         u32 data;
2534         s32 retry = 200;
2535
2536         nvhost_dbg_fn("");
2537
2538         mutex_lock(&mm->l2_op_lock);
2539
2540         /* Flush all dirty lines from the L2 to DRAM. Lines are left in the L2
2541            as clean, so subsequent reads might hit in the L2. */
2542         gk20a_writel(g, flush_l2_flush_dirty_r(),
2543                 flush_l2_flush_dirty_pending_busy_f());
2544
2545         do {
2546                 data = gk20a_readl(g, flush_l2_flush_dirty_r());
2547
2548                 if (flush_l2_flush_dirty_outstanding_v(data) ==
2549                         flush_l2_flush_dirty_outstanding_true_v() ||
2550                     flush_l2_flush_dirty_pending_v(data) ==
2551                         flush_l2_flush_dirty_pending_busy_v()) {
2552                                 nvhost_dbg_info("l2_flush_dirty 0x%x", data);
2553                                 retry--;
2554                                 usleep_range(20, 40);
2555                 } else
2556                         break;
2557         } while (retry >= 0);
2558
2559         if (retry < 0)
2560                 nvhost_warn(dev_from_gk20a(g),
2561                         "l2_flush_dirty too many retries");
2562
2563         if (invalidate)
2564                 gk20a_mm_l2_invalidate_locked(g);
2565
2566         mutex_unlock(&mm->l2_op_lock);
2567 }
2568
2569
2570 int gk20a_vm_find_buffer(struct vm_gk20a *vm, u64 gpu_va,
2571                          struct mem_mgr **mgr, struct mem_handle **r,
2572                          u64 *offset)
2573 {
2574         struct mapped_buffer_node *mapped_buffer;
2575
2576         nvhost_dbg_fn("gpu_va=0x%llx", gpu_va);
2577
2578         mutex_lock(&vm->update_gmmu_lock);
2579
2580         mapped_buffer = find_mapped_buffer_range_locked(&vm->mapped_buffers,
2581                                                         gpu_va);
2582         if (!mapped_buffer) {
2583                 mutex_unlock(&vm->update_gmmu_lock);
2584                 return -EINVAL;
2585         }
2586
2587         *mgr = mapped_buffer->memmgr;
2588         *r = mapped_buffer->handle_ref;
2589         *offset = gpu_va - mapped_buffer->addr;
2590
2591         mutex_unlock(&vm->update_gmmu_lock);
2592
2593         return 0;
2594 }
2595
2596 void gk20a_mm_tlb_invalidate(struct vm_gk20a *vm)
2597 {
2598         struct mm_gk20a *mm = vm->mm;
2599         struct gk20a *g = gk20a_from_vm(vm);
2600         u32 addr_lo = u64_lo32(gk20a_mm_iova_addr(vm->pdes.sgt->sgl) >> 12);
2601         u32 data;
2602         s32 retry = 200;
2603
2604         nvhost_dbg_fn("");
2605
2606         /* pagetables are considered sw states which are preserved after
2607            prepare_poweroff. When gk20a deinit releases those pagetables,
2608            common code in vm unmap path calls tlb invalidate that touches
2609            hw. Use the power_on flag to skip tlb invalidation when gpu
2610            power is turned off */
2611
2612         if (!g->power_on)
2613                 return;
2614
2615         /* No need to invalidate if tlb is clean */
2616         mutex_lock(&vm->update_gmmu_lock);
2617         if (!vm->tlb_dirty) {
2618                 mutex_unlock(&vm->update_gmmu_lock);
2619                 return;
2620         }
2621         vm->tlb_dirty = false;
2622         mutex_unlock(&vm->update_gmmu_lock);
2623
2624         mutex_lock(&mm->tlb_lock);
2625         do {
2626                 data = gk20a_readl(g, fb_mmu_ctrl_r());
2627                 if (fb_mmu_ctrl_pri_fifo_space_v(data) != 0)
2628                         break;
2629                 usleep_range(20, 40);
2630                 retry--;
2631         } while (retry >= 0);
2632
2633         if (retry < 0)
2634                 nvhost_warn(dev_from_gk20a(g),
2635                         "wait mmu fifo space too many retries");
2636
2637         gk20a_writel(g, fb_mmu_invalidate_pdb_r(),
2638                 fb_mmu_invalidate_pdb_addr_f(addr_lo) |
2639                 fb_mmu_invalidate_pdb_aperture_vid_mem_f());
2640
2641         /* this is a sledgehammer, it would seem */
2642         gk20a_writel(g, fb_mmu_invalidate_r(),
2643                 fb_mmu_invalidate_all_pdb_true_f() |
2644                 fb_mmu_invalidate_all_va_true_f() |
2645                 fb_mmu_invalidate_trigger_true_f());
2646
2647         do {
2648                 data = gk20a_readl(g, fb_mmu_ctrl_r());
2649                 if (fb_mmu_ctrl_pri_fifo_empty_v(data) !=
2650                         fb_mmu_ctrl_pri_fifo_empty_false_f())
2651                         break;
2652                 retry--;
2653                 usleep_range(20, 40);
2654         } while (retry >= 0);
2655
2656         if (retry < 0)
2657                 nvhost_warn(dev_from_gk20a(g),
2658                         "mmu invalidate too many retries");
2659
2660         mutex_unlock(&mm->tlb_lock);
2661 }
2662
2663 #if 0 /* VM DEBUG */
2664
2665 /* print pdes/ptes for a gpu virtual address range under a vm */
2666 void gk20a_mm_dump_vm(struct vm_gk20a *vm,
2667                 u64 va_begin, u64 va_end, char *label)
2668 {
2669         struct mem_mgr *client = mem_mgr_from_vm(vm);
2670         struct mm_gk20a *mm = vm->mm;
2671         struct page_table_gk20a *pte_s;
2672         u64 pde_va, pte_va;
2673         u32 pde_i, pde_lo, pde_hi;
2674         u32 pte_i, pte_lo, pte_hi;
2675         u32 pte_space_page_cur, pte_space_offset_cur;
2676         u32 pte_space_page_offset;
2677         u32 num_ptes, page_size;
2678         void *pde, *pte;
2679         phys_addr_t pte_addr;
2680         int err;
2681
2682         pde_range_from_vaddr_range(vm, va_begin, va_end,
2683                         &pde_lo, &pde_hi);
2684
2685         nvhost_err(dev_from_vm(vm),
2686                 "%s page table entries for gpu va 0x%016llx -> 0x%016llx\n",
2687                 label, va_begin, va_end);
2688
2689         for (pde_i = pde_lo; pde_i <= pde_hi; pde_i++) {
2690                 pde = pde_from_index(vm, pde_i);
2691                 pde_va = pde_i * mm->pde_stride;
2692                 nvhost_err(dev_from_vm(vm),
2693                         "\t[0x%016llx -> 0x%016llx] pde @ 0x%08x: 0x%08x, 0x%08x\n",
2694                         pde_va, pde_va + mm->pde_stride - 1,
2695                         gk20a_mm_iova_addr(vm->pdes.sgt->sgl)
2696                                 + pde_i * gmmu_pde__size_v(),
2697                         mem_rd32(pde, 0), mem_rd32(pde, 1));
2698
2699                 pte_s = vm->pdes.ptes[pte_s->pgsz_idx] + pde_i;
2700
2701                 num_ptes = mm->page_table_sizing[pte_s->pgsz_idx].num_ptes;
2702                 page_size = mm->pde_stride / num_ptes;
2703                 pte_lo = 0;
2704                 pte_hi = num_ptes - 1;
2705
2706                 pte_space_page_offset_from_index(pte_lo,
2707                                                 &pte_space_page_cur,
2708                                                 &pte_space_offset_cur);
2709
2710                 err = map_gmmu_pages(pte_s->ref, pte_s->sgt, &pte);
2711                 pte_s->sgt = nvhost_memmgr_sg_table(client, pte_s->ref);
2712                 if (WARN_ON(IS_ERR(pte_s->sgt)))
2713                         return;
2714                 pte_addr = gk20a_mm_iova_addr(pte_s->sgt->sgl);
2715
2716                 for (pte_i = pte_lo; pte_i <= pte_hi; pte_i++) {
2717
2718                         pte_va = pde_va + pte_i * page_size;
2719
2720                         if (pte_va < va_begin)
2721                                 continue;
2722                         if (pte_va > va_end)
2723                                 break;
2724
2725                         pte_space_page_offset = pte_i;
2726
2727                         nvhost_err(dev_from_vm(vm),
2728                                 "\t\t[0x%016llx -> 0x%016llx] pte @ 0x%08x : 0x%08x, 0x%08x\n",
2729                                 pte_va, pte_va + page_size - 1,
2730                                 pte_addr + pte_i * gmmu_pte__size_v(),
2731                                 mem_rd32(pte + pte_space_page_offset * 8, 0),
2732                                 mem_rd32(pte + pte_space_page_offset * 8, 1));
2733                 }
2734
2735                 unmap_gmmu_pages(pte_s->ref, pte_s->sgt, pte);
2736         }
2737 }
2738 #endif /* VM DEBUG */
2739
2740 int gk20a_mm_suspend(struct gk20a *g)
2741 {
2742         nvhost_dbg_fn("");
2743
2744         gk20a_mm_fb_flush(g);
2745         gk20a_mm_l2_flush(g, true);
2746
2747         nvhost_dbg_fn("done");
2748         return 0;
2749 }
2750
2751 void gk20a_mm_ltc_isr(struct gk20a *g)
2752 {
2753         u32 intr;
2754
2755         intr = gk20a_readl(g, ltc_ltc0_ltss_intr_r());
2756         nvhost_err(dev_from_gk20a(g), "ltc: %08x\n", intr);
2757         gk20a_writel(g, ltc_ltc0_ltss_intr_r(), intr);
2758 }
2759
2760 bool gk20a_mm_mmu_debug_mode_enabled(struct gk20a *g)
2761 {
2762         u32 debug_ctrl = gk20a_readl(g, fb_mmu_debug_ctrl_r());
2763         return fb_mmu_debug_ctrl_debug_v(debug_ctrl) ==
2764                 fb_mmu_debug_ctrl_debug_enabled_v();
2765 }
2766