12f3c092c0cbaa770727cdd5ec09e314abaf1cc3
[linux-3.10.git] / drivers / gpu / nvgpu / gk20a / mm_gk20a.c
1 /*
2  * drivers/video/tegra/host/gk20a/mm_gk20a.c
3  *
4  * GK20A memory management
5  *
6  * Copyright (c) 2011-2017, NVIDIA CORPORATION.  All rights reserved.
7  *
8  * This program is free software; you can redistribute it and/or modify it
9  * under the terms and conditions of the GNU General Public License,
10  * version 2, as published by the Free Software Foundation.
11  *
12  * This program is distributed in the hope it will be useful, but WITHOUT
13  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
15  * more details.
16  *
17  * You should have received a copy of the GNU General Public License along with
18  * this program; if not, write to the Free Software Foundation, Inc.,
19  * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
20  */
21
22 #include <linux/delay.h>
23 #include <linux/highmem.h>
24 #include <linux/log2.h>
25 #include <linux/nvhost.h>
26 #include <linux/pm_runtime.h>
27 #include <linux/scatterlist.h>
28 #include <linux/nvmap.h>
29 #include <linux/tegra-soc.h>
30 #include <linux/vmalloc.h>
31 #include <linux/dma-buf.h>
32 #include <asm/cacheflush.h>
33
34 #include "gk20a.h"
35 #include "mm_gk20a.h"
36 #include "hw_gmmu_gk20a.h"
37 #include "hw_fb_gk20a.h"
38 #include "hw_bus_gk20a.h"
39 #include "hw_ram_gk20a.h"
40 #include "hw_mc_gk20a.h"
41 #include "hw_flush_gk20a.h"
42 #include "hw_ltc_gk20a.h"
43
44 #include "kind_gk20a.h"
45
46 #ifdef CONFIG_ARM64
47 #define outer_flush_range(a, b)
48 #define __cpuc_flush_dcache_area __flush_dcache_area
49 #endif
50
51 /*
52  * GPU mapping life cycle
53  * ======================
54  *
55  * Kernel mappings
56  * ---------------
57  *
58  * Kernel mappings are created through vm.map(..., false):
59  *
60  *  - Mappings to the same allocations are reused and refcounted.
61  *  - This path does not support deferred unmapping (i.e. kernel must wait for
62  *    all hw operations on the buffer to complete before unmapping).
63  *  - References to dmabuf are owned and managed by the (kernel) clients of
64  *    the gk20a_vm layer.
65  *
66  *
67  * User space mappings
68  * -------------------
69  *
70  * User space mappings are created through as.map_buffer -> vm.map(..., true):
71  *
72  *  - Mappings to the same allocations are reused and refcounted.
73  *  - This path supports deferred unmapping (i.e. we delay the actual unmapping
74  *    until all hw operations have completed).
75  *  - References to dmabuf are owned and managed by the vm_gk20a
76  *    layer itself. vm.map acquires these refs, and sets
77  *    mapped_buffer->own_mem_ref to record that we must release the refs when we
78  *    actually unmap.
79  *
80  */
81
82 static inline int vm_aspace_id(struct vm_gk20a *vm)
83 {
84         /* -1 is bar1 or pmu, etc. */
85         return vm->as_share ? vm->as_share->id : -1;
86 }
87 static inline u32 hi32(u64 f)
88 {
89         return (u32)(f >> 32);
90 }
91 static inline u32 lo32(u64 f)
92 {
93         return (u32)(f & 0xffffffff);
94 }
95
96 #define FLUSH_CPU_DCACHE(va, pa, size)  \
97         do {    \
98                 __cpuc_flush_dcache_area((void *)(va), (size_t)(size)); \
99                 outer_flush_range(pa, pa + (size_t)(size));             \
100         } while (0)
101
102 static void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer);
103 void __gk20a_mm_tlb_invalidate(struct vm_gk20a *vm);
104 static struct mapped_buffer_node *find_mapped_buffer_locked(
105                                         struct rb_root *root, u64 addr);
106 static struct mapped_buffer_node *find_mapped_buffer_reverse_locked(
107                                 struct rb_root *root, struct dma_buf *dmabuf,
108                                 u32 kind);
109 static int update_gmmu_ptes_locked(struct vm_gk20a *vm,
110                                    enum gmmu_pgsz_gk20a pgsz_idx,
111                                    struct sg_table *sgt, u64 buffer_offset,
112                                    u64 first_vaddr, u64 last_vaddr,
113                                    u8 kind_v, u32 ctag_offset, bool cacheable,
114                                    int rw_flag);
115 static void update_gmmu_pde_locked(struct vm_gk20a *vm, u32 i);
116 static void gk20a_vm_remove_support(struct vm_gk20a *vm);
117
118
119 /* note: keep the page sizes sorted lowest to highest here */
120 static const u32 gmmu_page_sizes[gmmu_nr_page_sizes] = { SZ_4K, SZ_128K };
121 static const u32 gmmu_page_shifts[gmmu_nr_page_sizes] = { 12, 17 };
122 static const u64 gmmu_page_offset_masks[gmmu_nr_page_sizes] = { 0xfffLL,
123                                                                 0x1ffffLL };
124 static const u64 gmmu_page_masks[gmmu_nr_page_sizes] = { ~0xfffLL, ~0x1ffffLL };
125
126 struct gk20a_comptags {
127         u32 offset;
128         u32 lines;
129 };
130
131 struct gk20a_dmabuf_priv {
132         struct mutex lock;
133
134         struct gk20a_allocator *comptag_allocator;
135         struct gk20a_comptags comptags;
136
137         struct dma_buf_attachment *attach;
138         struct sg_table *sgt;
139
140         int pin_count;
141 };
142
143 static void gk20a_mm_delete_priv(void *_priv)
144 {
145         struct gk20a_dmabuf_priv *priv = _priv;
146         if (!priv)
147                 return;
148
149         if (priv->comptags.lines) {
150                 BUG_ON(!priv->comptag_allocator);
151                 priv->comptag_allocator->free(priv->comptag_allocator,
152                                               priv->comptags.offset,
153                                               priv->comptags.lines);
154         }
155
156         kfree(priv);
157 }
158
159 struct sg_table *gk20a_mm_pin(struct device *dev, struct dma_buf *dmabuf)
160 {
161         struct gk20a_dmabuf_priv *priv;
162
163         priv = dma_buf_get_drvdata(dmabuf, dev);
164         if (WARN_ON(!priv))
165                 return ERR_PTR(-EINVAL);
166
167         mutex_lock(&priv->lock);
168
169         if (priv->pin_count == 0) {
170                 priv->attach = dma_buf_attach(dmabuf, dev);
171                 if (IS_ERR(priv->attach)) {
172                         mutex_unlock(&priv->lock);
173                         return (struct sg_table *)priv->attach;
174                 }
175
176                 priv->sgt = dma_buf_map_attachment(priv->attach,
177                                                    DMA_BIDIRECTIONAL);
178                 if (IS_ERR(priv->sgt)) {
179                         dma_buf_detach(dmabuf, priv->attach);
180                         mutex_unlock(&priv->lock);
181                         return priv->sgt;
182                 }
183         }
184
185         priv->pin_count++;
186         mutex_unlock(&priv->lock);
187         return priv->sgt;
188 }
189
190 void gk20a_mm_unpin(struct device *dev, struct dma_buf *dmabuf,
191                     struct sg_table *sgt)
192 {
193         struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(dmabuf, dev);
194         dma_addr_t dma_addr;
195
196         if (IS_ERR(priv) || !priv)
197                 return;
198
199         mutex_lock(&priv->lock);
200         WARN_ON(priv->sgt != sgt);
201         priv->pin_count--;
202         WARN_ON(priv->pin_count < 0);
203         dma_addr = sg_dma_address(priv->sgt->sgl);
204         if (priv->pin_count == 0) {
205                 dma_buf_unmap_attachment(priv->attach, priv->sgt,
206                                          DMA_BIDIRECTIONAL);
207                 dma_buf_detach(dmabuf, priv->attach);
208         }
209         mutex_unlock(&priv->lock);
210 }
211
212
213 static void gk20a_get_comptags(struct device *dev,
214                                struct dma_buf *dmabuf,
215                                struct gk20a_comptags *comptags)
216 {
217         struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(dmabuf, dev);
218
219         if (!comptags)
220                 return;
221
222         if (!priv) {
223                 comptags->lines = 0;
224                 comptags->offset = 0;
225                 return;
226         }
227
228         *comptags = priv->comptags;
229 }
230
231 static int gk20a_alloc_comptags(struct device *dev,
232                                 struct dma_buf *dmabuf,
233                                 struct gk20a_allocator *allocator,
234                                 int lines)
235 {
236         struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(dmabuf, dev);
237         u32 offset = 0;
238         int err;
239
240         if (!priv)
241                 return -ENOSYS;
242
243         if (!lines)
244                 return -EINVAL;
245
246         /* store the allocator so we can use it when we free the ctags */
247         priv->comptag_allocator = allocator;
248         err = allocator->alloc(allocator, &offset, lines);
249         if (!err) {
250                 priv->comptags.lines = lines;
251                 priv->comptags.offset = offset;
252         }
253         return err;
254 }
255
256
257
258
259 static int gk20a_init_mm_reset_enable_hw(struct gk20a *g)
260 {
261         gk20a_dbg_fn("");
262         if (g->ops.fb.reset)
263                 g->ops.fb.reset(g);
264
265         if (g->ops.fb.init_fs_state)
266                 g->ops.fb.init_fs_state(g);
267
268         return 0;
269 }
270
271 void gk20a_remove_mm_support(struct mm_gk20a *mm)
272 {
273         struct gk20a *g = mm->g;
274         struct device *d = dev_from_gk20a(g);
275         struct vm_gk20a *vm = &mm->bar1.vm;
276         struct inst_desc *inst_block = &mm->bar1.inst_block;
277
278         gk20a_dbg_fn("");
279
280         if (inst_block->cpuva)
281                 dma_free_coherent(d, inst_block->size,
282                         inst_block->cpuva, inst_block->iova);
283         inst_block->cpuva = NULL;
284         inst_block->iova = 0;
285
286         gk20a_vm_remove_support(vm);
287 }
288
289 int gk20a_init_mm_setup_sw(struct gk20a *g)
290 {
291         struct mm_gk20a *mm = &g->mm;
292         int i;
293
294         gk20a_dbg_fn("");
295
296         if (mm->sw_ready) {
297                 gk20a_dbg_fn("skip init");
298                 return 0;
299         }
300
301         mm->g = g;
302         mutex_init(&mm->l2_op_lock);
303         mm->big_page_size = gmmu_page_sizes[gmmu_page_size_big];
304         mm->compression_page_size = gmmu_page_sizes[gmmu_page_size_big];
305         mm->pde_stride    = mm->big_page_size << 10;
306         mm->pde_stride_shift = ilog2(mm->pde_stride);
307         BUG_ON(mm->pde_stride_shift > 31); /* we have assumptions about this */
308
309         for (i = 0; i < ARRAY_SIZE(gmmu_page_sizes); i++) {
310
311                 u32 num_ptes, pte_space, num_pages;
312
313                 /* assuming "full" page tables */
314                 num_ptes = mm->pde_stride / gmmu_page_sizes[i];
315
316                 pte_space = num_ptes * gmmu_pte__size_v();
317                 /* allocate whole pages */
318                 pte_space = roundup(pte_space, PAGE_SIZE);
319
320                 num_pages = pte_space / PAGE_SIZE;
321                 /* make sure "order" is viable */
322                 BUG_ON(!is_power_of_2(num_pages));
323
324                 mm->page_table_sizing[i].num_ptes = num_ptes;
325                 mm->page_table_sizing[i].order = ilog2(num_pages);
326         }
327
328         /*TBD: make channel vm size configurable */
329         mm->channel.size = 1ULL << NV_GMMU_VA_RANGE;
330
331         gk20a_dbg_info("channel vm size: %dMB", (int)(mm->channel.size >> 20));
332
333         gk20a_dbg_info("small page-size (%dKB) pte array: %dKB",
334                         gmmu_page_sizes[gmmu_page_size_small] >> 10,
335                         (mm->page_table_sizing[gmmu_page_size_small].num_ptes *
336                          gmmu_pte__size_v()) >> 10);
337
338         gk20a_dbg_info("big page-size (%dKB) pte array: %dKB",
339                         gmmu_page_sizes[gmmu_page_size_big] >> 10,
340                         (mm->page_table_sizing[gmmu_page_size_big].num_ptes *
341                          gmmu_pte__size_v()) >> 10);
342
343
344         gk20a_init_bar1_vm(mm);
345
346         mm->remove_support = gk20a_remove_mm_support;
347         mm->sw_ready = true;
348
349         gk20a_dbg_fn("done");
350         return 0;
351 }
352
353 /* make sure gk20a_init_mm_support is called before */
354 static int gk20a_init_mm_setup_hw(struct gk20a *g)
355 {
356         struct mm_gk20a *mm = &g->mm;
357         struct inst_desc *inst_block = &mm->bar1.inst_block;
358         phys_addr_t inst_pa = inst_block->cpu_pa;
359
360         gk20a_dbg_fn("");
361
362         /* set large page size in fb
363          * note this is very early on, can we defer it ? */
364         {
365                 u32 fb_mmu_ctrl = gk20a_readl(g, fb_mmu_ctrl_r());
366
367                 if (gmmu_page_sizes[gmmu_page_size_big] == SZ_128K)
368                         fb_mmu_ctrl = (fb_mmu_ctrl &
369                                        ~fb_mmu_ctrl_vm_pg_size_f(~0x0)) |
370                                 fb_mmu_ctrl_vm_pg_size_128kb_f();
371                 else
372                         BUG_ON(1); /* no support/testing for larger ones yet */
373
374                 gk20a_writel(g, fb_mmu_ctrl_r(), fb_mmu_ctrl);
375         }
376
377         inst_pa = (u32)(inst_pa >> bar1_instance_block_shift_gk20a());
378         gk20a_dbg_info("bar1 inst block ptr: 0x%08x",  (u32)inst_pa);
379
380         gk20a_writel(g, bus_bar1_block_r(),
381                      bus_bar1_block_target_vid_mem_f() |
382                      bus_bar1_block_mode_virtual_f() |
383                      bus_bar1_block_ptr_f(inst_pa));
384         if (gk20a_mm_fb_flush(g) || gk20a_mm_fb_flush(g))
385                 return -EBUSY;
386
387         gk20a_dbg_fn("done");
388         return 0;
389 }
390
391 int gk20a_init_mm_support(struct gk20a *g)
392 {
393         u32 err;
394
395         err = gk20a_init_mm_reset_enable_hw(g);
396         if (err)
397                 return err;
398
399         err = gk20a_init_mm_setup_sw(g);
400         if (err)
401                 return err;
402
403         err = gk20a_init_mm_setup_hw(g);
404         if (err)
405                 return err;
406
407         return err;
408 }
409
410 #ifdef CONFIG_GK20A_PHYS_PAGE_TABLES
411 static int alloc_gmmu_pages(struct vm_gk20a *vm, u32 order,
412                             void **handle,
413                             struct sg_table **sgt,
414                             size_t *size)
415 {
416         u32 num_pages = 1 << order;
417         u32 len = num_pages * PAGE_SIZE;
418         int err;
419         struct page *pages;
420
421         gk20a_dbg_fn("");
422
423         pages = alloc_pages(GFP_KERNEL, order);
424         if (!pages) {
425                 gk20a_dbg(gpu_dbg_pte, "alloc_pages failed\n");
426                 goto err_out;
427         }
428         *sgt = kzalloc(sizeof(*sgt), GFP_KERNEL);
429         if (!sgt) {
430                 gk20a_dbg(gpu_dbg_pte, "cannot allocate sg table");
431                 goto err_alloced;
432         }
433         err = sg_alloc_table(*sgt, 1, GFP_KERNEL);
434         if (err) {
435                 gk20a_dbg(gpu_dbg_pte, "sg_alloc_table failed\n");
436                 goto err_sg_table;
437         }
438         sg_set_page((*sgt)->sgl, pages, len, 0);
439         *handle = page_address(pages);
440         memset(*handle, 0, len);
441         *size = len;
442         FLUSH_CPU_DCACHE(*handle, sg_phys((*sgt)->sgl), len);
443
444         return 0;
445
446 err_sg_table:
447         kfree(*sgt);
448 err_alloced:
449         __free_pages(pages, order);
450 err_out:
451         return -ENOMEM;
452 }
453
454 static void free_gmmu_pages(struct vm_gk20a *vm, void *handle,
455                             struct sg_table *sgt, u32 order,
456                             size_t size)
457 {
458         gk20a_dbg_fn("");
459         BUG_ON(sgt == NULL);
460         free_pages((unsigned long)handle, order);
461         sg_free_table(sgt);
462         kfree(sgt);
463 }
464
465 static int map_gmmu_pages(void *handle, struct sg_table *sgt,
466                           void **va, size_t size)
467 {
468         FLUSH_CPU_DCACHE(handle, sg_phys(sgt->sgl), sgt->sgl->length);
469         *va = handle;
470         return 0;
471 }
472
473 static void unmap_gmmu_pages(void *handle, struct sg_table *sgt, void *va)
474 {
475         FLUSH_CPU_DCACHE(handle, sg_phys(sgt->sgl), sgt->sgl->length);
476 }
477 #else
478
479 static int alloc_gmmu_pages(struct vm_gk20a *vm, u32 order,
480                             void **handle,
481                             struct sg_table **sgt,
482                             size_t *size)
483 {
484         struct device *d = dev_from_vm(vm);
485         u32 num_pages = 1 << order;
486         u32 len = num_pages * PAGE_SIZE;
487         dma_addr_t iova;
488         DEFINE_DMA_ATTRS(attrs);
489         struct page **pages;
490         void *cpuva;
491         int err = 0;
492
493         gk20a_dbg_fn("");
494
495         *size = len;
496
497         if (IS_ENABLED(CONFIG_ARM64)) {
498                 cpuva = dma_zalloc_coherent(d, len, &iova, GFP_KERNEL);
499                 if (!cpuva) {
500                         gk20a_err(d, "memory allocation failed\n");
501                         goto err_out;
502                 }
503
504                 err = gk20a_get_sgtable(d, sgt, cpuva, iova, len);
505                 if (err) {
506                         gk20a_err(d, "sgt allocation failed\n");
507                         goto err_free;
508                 }
509
510                 *handle = cpuva;
511         } else {
512                 dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, &attrs);
513                 pages = dma_alloc_attrs(d, len, &iova, GFP_KERNEL, &attrs);
514                 if (!pages) {
515                         gk20a_err(d, "memory allocation failed\n");
516                         goto err_out;
517                 }
518
519                 err = gk20a_get_sgtable_from_pages(d, sgt, pages,
520                                         iova, len);
521                 if (err) {
522                         gk20a_err(d, "sgt allocation failed\n");
523                         goto err_free;
524                 }
525
526                 *handle = (void *)pages;
527         }
528
529         return 0;
530
531 err_free:
532         if (IS_ENABLED(CONFIG_ARM64)) {
533                 dma_free_coherent(d, len, handle, iova);
534                 cpuva = NULL;
535         } else {
536                 dma_free_attrs(d, len, pages, iova, &attrs);
537                 pages = NULL;
538         }
539         iova = 0;
540 err_out:
541         return -ENOMEM;
542 }
543
544 static void free_gmmu_pages(struct vm_gk20a *vm, void *handle,
545                             struct sg_table *sgt, u32 order,
546                             size_t size)
547 {
548         struct device *d = dev_from_vm(vm);
549         u64 iova;
550         DEFINE_DMA_ATTRS(attrs);
551         struct page **pages;
552
553         gk20a_dbg_fn("");
554         BUG_ON(sgt == NULL);
555
556         iova = sg_dma_address(sgt->sgl);
557
558         gk20a_free_sgtable(&sgt);
559
560         if (IS_ENABLED(CONFIG_ARM64)) {
561                 dma_free_coherent(d, size, handle, iova);
562         } else {
563                 pages = (struct page **)handle;
564                 dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, &attrs);
565                 dma_free_attrs(d, size, pages, iova, &attrs);
566                 pages = NULL;
567         }
568
569         handle = NULL;
570         iova = 0;
571 }
572
573 static int map_gmmu_pages(void *handle, struct sg_table *sgt,
574                           void **kva, size_t size)
575 {
576         int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
577         struct page **pages;
578         gk20a_dbg_fn("");
579
580         if (IS_ENABLED(CONFIG_ARM64)) {
581                 *kva = handle;
582         } else {
583                 pages = (struct page **)handle;
584                 *kva = vmap(pages, count, 0, pgprot_dmacoherent(PAGE_KERNEL));
585                 if (!(*kva))
586                         return -ENOMEM;
587         }
588
589         return 0;
590 }
591
592 static void unmap_gmmu_pages(void *handle, struct sg_table *sgt, void *va)
593 {
594         gk20a_dbg_fn("");
595
596         if (!IS_ENABLED(CONFIG_ARM64))
597                 vunmap(va);
598         va = NULL;
599 }
600 #endif
601
602 /* allocate a phys contig region big enough for a full
603  * sized gmmu page table for the given gmmu_page_size.
604  * the whole range is zeroed so it's "invalid"/will fault
605  */
606
607 static int zalloc_gmmu_page_table_gk20a(struct vm_gk20a *vm,
608                                         enum gmmu_pgsz_gk20a gmmu_pgsz_idx,
609                                         struct page_table_gk20a *pte)
610 {
611         int err;
612         u32 pte_order;
613         void *handle = NULL;
614         struct sg_table *sgt;
615         size_t size;
616
617         gk20a_dbg_fn("");
618
619         /* allocate enough pages for the table */
620         pte_order = vm->mm->page_table_sizing[gmmu_pgsz_idx].order;
621
622         err = alloc_gmmu_pages(vm, pte_order, &handle, &sgt, &size);
623         if (err)
624                 return err;
625
626         gk20a_dbg(gpu_dbg_pte, "pte = 0x%p, addr=%08llx, size %d",
627                         pte, gk20a_mm_iova_addr(sgt->sgl), pte_order);
628
629         pte->ref = handle;
630         pte->sgt = sgt;
631         pte->size = size;
632
633         return 0;
634 }
635
636 /* given address range (inclusive) determine the pdes crossed */
637 static inline void pde_range_from_vaddr_range(struct vm_gk20a *vm,
638                                               u64 addr_lo, u64 addr_hi,
639                                               u32 *pde_lo, u32 *pde_hi)
640 {
641         *pde_lo = (u32)(addr_lo >> vm->mm->pde_stride_shift);
642         *pde_hi = (u32)(addr_hi >> vm->mm->pde_stride_shift);
643         gk20a_dbg(gpu_dbg_pte, "addr_lo=0x%llx addr_hi=0x%llx pde_ss=%d",
644                    addr_lo, addr_hi, vm->mm->pde_stride_shift);
645         gk20a_dbg(gpu_dbg_pte, "pde_lo=%d pde_hi=%d",
646                    *pde_lo, *pde_hi);
647 }
648
649 static inline u32 *pde_from_index(struct vm_gk20a *vm, u32 i)
650 {
651         return (u32 *) (((u8 *)vm->pdes.kv) + i*gmmu_pde__size_v());
652 }
653
654 static inline u32 pte_index_from_vaddr(struct vm_gk20a *vm,
655                                        u64 addr, enum gmmu_pgsz_gk20a pgsz_idx)
656 {
657         u32 ret;
658         /* mask off pde part */
659         addr = addr & ((((u64)1) << vm->mm->pde_stride_shift) - ((u64)1));
660         /* shift over to get pte index. note assumption that pte index
661          * doesn't leak over into the high 32b */
662         ret = (u32)(addr >> gmmu_page_shifts[pgsz_idx]);
663
664         gk20a_dbg(gpu_dbg_pte, "addr=0x%llx pte_i=0x%x", addr, ret);
665         return ret;
666 }
667
668 static inline void pte_space_page_offset_from_index(u32 i, u32 *pte_page,
669                                                     u32 *pte_offset)
670 {
671         /* ptes are 8B regardless of pagesize */
672         /* pte space pages are 4KB. so 512 ptes per 4KB page*/
673         *pte_page = i >> 9;
674
675         /* this offset is a pte offset, not a byte offset */
676         *pte_offset = i & ((1<<9)-1);
677
678         gk20a_dbg(gpu_dbg_pte, "i=0x%x pte_page=0x%x pte_offset=0x%x",
679                    i, *pte_page, *pte_offset);
680 }
681
682
683 /*
684  * given a pde index/page table number make sure it has
685  * backing store and if not go ahead allocate it and
686  * record it in the appropriate pde
687  */
688 static int validate_gmmu_page_table_gk20a_locked(struct vm_gk20a *vm,
689                                 u32 i, enum gmmu_pgsz_gk20a gmmu_pgsz_idx)
690 {
691         int err;
692         struct page_table_gk20a *pte =
693                 vm->pdes.ptes[gmmu_pgsz_idx] + i;
694
695         gk20a_dbg_fn("");
696
697         /* if it's already in place it's valid */
698         if (pte->ref)
699                 return 0;
700
701         gk20a_dbg(gpu_dbg_pte, "alloc %dKB ptes for pde %d",
702                    gmmu_page_sizes[gmmu_pgsz_idx]/1024, i);
703
704         err = zalloc_gmmu_page_table_gk20a(vm, gmmu_pgsz_idx, pte);
705         if (err)
706                 return err;
707
708         /* rewrite pde */
709         update_gmmu_pde_locked(vm, i);
710
711         return 0;
712 }
713
714 static struct vm_reserved_va_node *addr_to_reservation(struct vm_gk20a *vm,
715                                                        u64 addr)
716 {
717         struct vm_reserved_va_node *va_node;
718         list_for_each_entry(va_node, &vm->reserved_va_list, reserved_va_list)
719                 if (addr >= va_node->vaddr_start &&
720                     addr < (u64)va_node->vaddr_start + (u64)va_node->size)
721                         return va_node;
722
723         return NULL;
724 }
725
726 int gk20a_vm_get_buffers(struct vm_gk20a *vm,
727                          struct mapped_buffer_node ***mapped_buffers,
728                          int *num_buffers)
729 {
730         struct mapped_buffer_node *mapped_buffer;
731         struct mapped_buffer_node **buffer_list;
732         struct rb_node *node;
733         int i = 0;
734
735         mutex_lock(&vm->update_gmmu_lock);
736
737         buffer_list = kzalloc(sizeof(*buffer_list) *
738                               vm->num_user_mapped_buffers, GFP_KERNEL);
739         if (!buffer_list) {
740                 mutex_unlock(&vm->update_gmmu_lock);
741                 return -ENOMEM;
742         }
743
744         node = rb_first(&vm->mapped_buffers);
745         while (node) {
746                 mapped_buffer =
747                         container_of(node, struct mapped_buffer_node, node);
748                 if (mapped_buffer->user_mapped) {
749                         buffer_list[i] = mapped_buffer;
750                         kref_get(&mapped_buffer->ref);
751                         i++;
752                 }
753                 node = rb_next(&mapped_buffer->node);
754         }
755
756         BUG_ON(i != vm->num_user_mapped_buffers);
757
758         *num_buffers = vm->num_user_mapped_buffers;
759         *mapped_buffers = buffer_list;
760
761         mutex_unlock(&vm->update_gmmu_lock);
762
763         return 0;
764 }
765
766 static void gk20a_vm_unmap_locked_kref(struct kref *ref)
767 {
768         struct mapped_buffer_node *mapped_buffer =
769                 container_of(ref, struct mapped_buffer_node, ref);
770         gk20a_vm_unmap_locked(mapped_buffer);
771 }
772
773 void gk20a_vm_put_buffers(struct vm_gk20a *vm,
774                                  struct mapped_buffer_node **mapped_buffers,
775                                  int num_buffers)
776 {
777         int i;
778
779         mutex_lock(&vm->update_gmmu_lock);
780
781         for (i = 0; i < num_buffers; ++i)
782                 kref_put(&mapped_buffers[i]->ref,
783                          gk20a_vm_unmap_locked_kref);
784
785         mutex_unlock(&vm->update_gmmu_lock);
786
787         kfree(mapped_buffers);
788 }
789
790 static void gk20a_vm_unmap_user(struct vm_gk20a *vm, u64 offset)
791 {
792         struct device *d = dev_from_vm(vm);
793         int retries;
794         struct mapped_buffer_node *mapped_buffer;
795
796         mutex_lock(&vm->update_gmmu_lock);
797
798         mapped_buffer = find_mapped_buffer_locked(&vm->mapped_buffers, offset);
799         if (!mapped_buffer) {
800                 mutex_unlock(&vm->update_gmmu_lock);
801                 gk20a_err(d, "invalid addr to unmap 0x%llx", offset);
802                 return;
803         }
804
805         if (mapped_buffer->flags & NVHOST_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) {
806                 mutex_unlock(&vm->update_gmmu_lock);
807
808                 retries = 1000;
809                 while (retries) {
810                         if (atomic_read(&mapped_buffer->ref.refcount) == 1)
811                                 break;
812                         retries--;
813                         udelay(50);
814                 }
815                 if (!retries)
816                         gk20a_err(d, "sync-unmap failed on 0x%llx",
817                                                                 offset);
818                 mutex_lock(&vm->update_gmmu_lock);
819         }
820
821         mapped_buffer->user_mapped--;
822         if (mapped_buffer->user_mapped == 0)
823                 vm->num_user_mapped_buffers--;
824         kref_put(&mapped_buffer->ref, gk20a_vm_unmap_locked_kref);
825
826         mutex_unlock(&vm->update_gmmu_lock);
827 }
828
829 static u64 gk20a_vm_alloc_va(struct vm_gk20a *vm,
830                              u64 size,
831                              enum gmmu_pgsz_gk20a gmmu_pgsz_idx)
832
833 {
834         struct gk20a_allocator *vma = &vm->vma[gmmu_pgsz_idx];
835         int err;
836         u64 offset;
837         u32 start_page_nr = 0, num_pages;
838         u64 gmmu_page_size = gmmu_page_sizes[gmmu_pgsz_idx];
839
840         if (gmmu_pgsz_idx >= ARRAY_SIZE(gmmu_page_sizes)) {
841                 dev_warn(dev_from_vm(vm),
842                          "invalid page size requested in gk20a vm alloc");
843                 return -EINVAL;
844         }
845
846         if ((gmmu_pgsz_idx == gmmu_page_size_big) && !vm->big_pages) {
847                 dev_warn(dev_from_vm(vm),
848                          "unsupportd page size requested");
849                 return -EINVAL;
850
851         }
852
853         /* be certain we round up to gmmu_page_size if needed */
854         /* TBD: DIV_ROUND_UP -> undefined reference to __aeabi_uldivmod */
855         size = (size + ((u64)gmmu_page_size - 1)) & ~((u64)gmmu_page_size - 1);
856
857         gk20a_dbg_info("size=0x%llx @ pgsz=%dKB", size,
858                         gmmu_page_sizes[gmmu_pgsz_idx]>>10);
859
860         /* The vma allocator represents page accounting. */
861         num_pages = size >> gmmu_page_shifts[gmmu_pgsz_idx];
862
863         err = vma->alloc(vma, &start_page_nr, num_pages);
864
865         if (err) {
866                 gk20a_err(dev_from_vm(vm),
867                            "%s oom: sz=0x%llx", vma->name, size);
868                 return 0;
869         }
870
871         offset = (u64)start_page_nr << gmmu_page_shifts[gmmu_pgsz_idx];
872         gk20a_dbg_fn("%s found addr: 0x%llx", vma->name, offset);
873
874         return offset;
875 }
876
877 static int gk20a_vm_free_va(struct vm_gk20a *vm,
878                              u64 offset, u64 size,
879                              enum gmmu_pgsz_gk20a pgsz_idx)
880 {
881         struct gk20a_allocator *vma = &vm->vma[pgsz_idx];
882         u32 page_size = gmmu_page_sizes[pgsz_idx];
883         u32 page_shift = gmmu_page_shifts[pgsz_idx];
884         u32 start_page_nr, num_pages;
885         int err;
886
887         gk20a_dbg_info("%s free addr=0x%llx, size=0x%llx",
888                         vma->name, offset, size);
889
890         start_page_nr = (u32)(offset >> page_shift);
891         num_pages = (u32)((size + page_size - 1) >> page_shift);
892
893         err = vma->free(vma, start_page_nr, num_pages);
894         if (err) {
895                 gk20a_err(dev_from_vm(vm),
896                            "not found: offset=0x%llx, sz=0x%llx",
897                            offset, size);
898         }
899
900         return err;
901 }
902
903 static int insert_mapped_buffer(struct rb_root *root,
904                                 struct mapped_buffer_node *mapped_buffer)
905 {
906         struct rb_node **new_node = &(root->rb_node), *parent = NULL;
907
908         /* Figure out where to put new node */
909         while (*new_node) {
910                 struct mapped_buffer_node *cmp_with =
911                         container_of(*new_node, struct mapped_buffer_node,
912                                      node);
913
914                 parent = *new_node;
915
916                 if (cmp_with->addr > mapped_buffer->addr) /* u64 cmp */
917                         new_node = &((*new_node)->rb_left);
918                 else if (cmp_with->addr != mapped_buffer->addr) /* u64 cmp */
919                         new_node = &((*new_node)->rb_right);
920                 else
921                         return -EINVAL; /* no fair dup'ing */
922         }
923
924         /* Add new node and rebalance tree. */
925         rb_link_node(&mapped_buffer->node, parent, new_node);
926         rb_insert_color(&mapped_buffer->node, root);
927
928         return 0;
929 }
930
931 static struct mapped_buffer_node *find_mapped_buffer_reverse_locked(
932                                 struct rb_root *root, struct dma_buf *dmabuf,
933                                 u32 kind)
934 {
935         struct rb_node *node = rb_first(root);
936         while (node) {
937                 struct mapped_buffer_node *mapped_buffer =
938                         container_of(node, struct mapped_buffer_node, node);
939                 if (mapped_buffer->dmabuf == dmabuf &&
940                     kind == mapped_buffer->kind)
941                         return mapped_buffer;
942                 node = rb_next(&mapped_buffer->node);
943         }
944         return 0;
945 }
946
947 static struct mapped_buffer_node *find_mapped_buffer_locked(
948                                         struct rb_root *root, u64 addr)
949 {
950
951         struct rb_node *node = root->rb_node;
952         while (node) {
953                 struct mapped_buffer_node *mapped_buffer =
954                         container_of(node, struct mapped_buffer_node, node);
955                 if (mapped_buffer->addr > addr) /* u64 cmp */
956                         node = node->rb_left;
957                 else if (mapped_buffer->addr != addr) /* u64 cmp */
958                         node = node->rb_right;
959                 else
960                         return mapped_buffer;
961         }
962         return 0;
963 }
964
965 static struct mapped_buffer_node *find_mapped_buffer_range_locked(
966                                         struct rb_root *root, u64 addr)
967 {
968         struct rb_node *node = root->rb_node;
969         while (node) {
970                 struct mapped_buffer_node *m =
971                         container_of(node, struct mapped_buffer_node, node);
972                 if (m->addr <= addr && m->addr + m->size > addr)
973                         return m;
974                 else if (m->addr > addr) /* u64 cmp */
975                         node = node->rb_left;
976                 else
977                         node = node->rb_right;
978         }
979         return 0;
980 }
981
982 #define BFR_ATTRS (sizeof(nvmap_bfr_param)/sizeof(nvmap_bfr_param[0]))
983
984 struct buffer_attrs {
985         struct sg_table *sgt;
986         u64 size;
987         u64 align;
988         u32 ctag_offset;
989         u32 ctag_lines;
990         int pgsz_idx;
991         u8 kind_v;
992         u8 uc_kind_v;
993 };
994
995 static void gmmu_select_page_size(struct buffer_attrs *bfr)
996 {
997         int i;
998         /*  choose the biggest first (top->bottom) */
999         for (i = (gmmu_nr_page_sizes-1); i >= 0; i--)
1000                 if (!(gmmu_page_offset_masks[i] & bfr->align)) {
1001                         /* would like to add this too but nvmap returns the
1002                          * original requested size not the allocated size.
1003                          * (!(gmmu_page_offset_masks[i] & bfr->size)) */
1004                         bfr->pgsz_idx = i;
1005                         break;
1006                 }
1007 }
1008
1009 static int setup_buffer_kind_and_compression(struct device *d,
1010                                              u32 flags,
1011                                              struct buffer_attrs *bfr,
1012                                              enum gmmu_pgsz_gk20a pgsz_idx)
1013 {
1014         bool kind_compressible;
1015
1016         if (unlikely(bfr->kind_v == gmmu_pte_kind_invalid_v()))
1017                 bfr->kind_v = gmmu_pte_kind_pitch_v();
1018
1019         if (unlikely(!gk20a_kind_is_supported(bfr->kind_v))) {
1020                 gk20a_err(d, "kind 0x%x not supported", bfr->kind_v);
1021                 return -EINVAL;
1022         }
1023
1024         bfr->uc_kind_v = gmmu_pte_kind_invalid_v();
1025         /* find a suitable uncompressed kind if it becomes necessary later */
1026         kind_compressible = gk20a_kind_is_compressible(bfr->kind_v);
1027         if (kind_compressible) {
1028                 bfr->uc_kind_v = gk20a_get_uncompressed_kind(bfr->kind_v);
1029                 if (unlikely(bfr->uc_kind_v == gmmu_pte_kind_invalid_v())) {
1030                         /* shouldn't happen, but it is worth cross-checking */
1031                         gk20a_err(d, "comptag kind 0x%x can't be"
1032                                    " downgraded to uncompressed kind",
1033                                    bfr->kind_v);
1034                         return -EINVAL;
1035                 }
1036         }
1037         /* comptags only supported for suitable kinds, 128KB pagesize */
1038         if (unlikely(kind_compressible &&
1039                      (gmmu_page_sizes[pgsz_idx] != 128*1024))) {
1040                 /*
1041                 gk20a_warn(d, "comptags specified"
1042                 " but pagesize being used doesn't support it");*/
1043                 /* it is safe to fall back to uncompressed as
1044                    functionality is not harmed */
1045                 bfr->kind_v = bfr->uc_kind_v;
1046                 kind_compressible = false;
1047         }
1048         if (kind_compressible)
1049                 bfr->ctag_lines = ALIGN(bfr->size, COMP_TAG_LINE_SIZE) >>
1050                         COMP_TAG_LINE_SIZE_SHIFT;
1051         else
1052                 bfr->ctag_lines = 0;
1053
1054         return 0;
1055 }
1056
1057 static int validate_fixed_buffer(struct vm_gk20a *vm,
1058                                  struct buffer_attrs *bfr,
1059                                  u64 map_offset, u64 map_size)
1060 {
1061         struct device *dev = dev_from_vm(vm);
1062         struct vm_reserved_va_node *va_node;
1063         struct mapped_buffer_node *buffer;
1064
1065         if (map_offset & gmmu_page_offset_masks[bfr->pgsz_idx]) {
1066                 gk20a_err(dev, "map offset must be buffer page size aligned 0x%llx",
1067                            map_offset);
1068                 return -EINVAL;
1069         }
1070
1071         /* find the space reservation */
1072         va_node = addr_to_reservation(vm, map_offset);
1073         if (!va_node) {
1074                 gk20a_warn(dev, "fixed offset mapping without space allocation");
1075                 return -EINVAL;
1076         }
1077
1078         /* check that this mappings does not collide with existing
1079          * mappings by checking the overlapping area between the current
1080          * buffer and all other mapped buffers */
1081
1082         list_for_each_entry(buffer,
1083                 &va_node->va_buffers_list, va_buffers_list) {
1084                 s64 begin = max(buffer->addr, map_offset);
1085                 s64 end = min(buffer->addr +
1086                         buffer->size, map_offset + map_size);
1087                 if (end - begin > 0) {
1088                         gk20a_warn(dev, "overlapping buffer map requested");
1089                         return -EINVAL;
1090                 }
1091         }
1092
1093         return 0;
1094 }
1095
1096 static u64 __locked_gmmu_map(struct vm_gk20a *vm,
1097                                 u64 map_offset,
1098                                 struct sg_table *sgt,
1099                                 u64 buffer_offset,
1100                                 u64 size,
1101                                 int pgsz_idx,
1102                                 u8 kind_v,
1103                                 u32 ctag_offset,
1104                                 u32 flags,
1105                                 int rw_flag)
1106 {
1107         int err = 0, i = 0;
1108         bool allocated = false;
1109         u32 pde_lo, pde_hi;
1110         struct device *d = dev_from_vm(vm);
1111
1112         /* Allocate (or validate when map_offset != 0) the virtual address. */
1113         if (!map_offset) {
1114                 map_offset = gk20a_vm_alloc_va(vm, size,
1115                                           pgsz_idx);
1116                 if (!map_offset) {
1117                         gk20a_err(d, "failed to allocate va space");
1118                         err = -ENOMEM;
1119                         goto fail_alloc;
1120                 }
1121                 allocated = true;
1122         }
1123
1124         pde_range_from_vaddr_range(vm,
1125                                    map_offset,
1126                                    map_offset + size - 1,
1127                                    &pde_lo, &pde_hi);
1128
1129         /* mark the addr range valid (but with 0 phys addr, which will fault) */
1130         for (i = pde_lo; i <= pde_hi; i++) {
1131                 err = validate_gmmu_page_table_gk20a_locked(vm, i,
1132                                                             pgsz_idx);
1133                 if (err) {
1134                         gk20a_err(d, "failed to validate page table %d: %d",
1135                                                            i, err);
1136                         goto fail_validate;
1137                 }
1138         }
1139
1140         err = update_gmmu_ptes_locked(vm, pgsz_idx,
1141                                       sgt,
1142                                       buffer_offset,
1143                                       map_offset, map_offset + size - 1,
1144                                       kind_v,
1145                                       ctag_offset,
1146                                       flags &
1147                                       NVHOST_MAP_BUFFER_FLAGS_CACHEABLE_TRUE,
1148                                       rw_flag);
1149         if (err) {
1150                 gk20a_err(d, "failed to update ptes on map");
1151                 goto fail_validate;
1152         }
1153
1154         return map_offset;
1155 fail_validate:
1156         if (allocated)
1157                 gk20a_vm_free_va(vm, map_offset, size, pgsz_idx);
1158 fail_alloc:
1159         gk20a_err(d, "%s: failed with err=%d\n", __func__, err);
1160         return 0;
1161 }
1162
1163 static void __locked_gmmu_unmap(struct vm_gk20a *vm,
1164                                 u64 vaddr,
1165                                 u64 size,
1166                                 int pgsz_idx,
1167                                 bool va_allocated,
1168                                 int rw_flag)
1169 {
1170         int err = 0;
1171         struct gk20a *g = gk20a_from_vm(vm);
1172
1173         if (va_allocated) {
1174                 err = gk20a_vm_free_va(vm, vaddr, size, pgsz_idx);
1175                 if (err) {
1176                         dev_err(dev_from_vm(vm),
1177                                 "failed to free va");
1178                         return;
1179                 }
1180         }
1181
1182         /* unmap here needs to know the page size we assigned at mapping */
1183         err = update_gmmu_ptes_locked(vm,
1184                                 pgsz_idx,
1185                                 0, /* n/a for unmap */
1186                                 0,
1187                                 vaddr,
1188                                 vaddr + size - 1,
1189                                 0, 0, false /* n/a for unmap */,
1190                                 rw_flag);
1191         if (err)
1192                 dev_err(dev_from_vm(vm),
1193                         "failed to update gmmu ptes on unmap");
1194
1195         /* detect which if any pdes/ptes can now be released */
1196
1197         /* flush l2 so any dirty lines are written out *now*.
1198          *  also as we could potentially be switching this buffer
1199          * from nonvolatile (l2 cacheable) to volatile (l2 non-cacheable) at
1200          * some point in the future we need to invalidate l2.  e.g. switching
1201          * from a render buffer unmap (here) to later using the same memory
1202          * for gmmu ptes.  note the positioning of this relative to any smmu
1203          * unmapping (below). */
1204
1205         gk20a_mm_l2_flush(g, true);
1206 }
1207
1208 static u64 gk20a_vm_map_duplicate_locked(struct vm_gk20a *vm,
1209                                          struct dma_buf *dmabuf,
1210                                          u64 offset_align,
1211                                          u32 flags,
1212                                          int kind,
1213                                          struct sg_table **sgt,
1214                                          bool user_mapped,
1215                                          int rw_flag)
1216 {
1217         struct mapped_buffer_node *mapped_buffer = 0;
1218
1219         mapped_buffer =
1220                 find_mapped_buffer_reverse_locked(&vm->mapped_buffers,
1221                                                   dmabuf, kind);
1222         if (!mapped_buffer)
1223                 return 0;
1224
1225         if (mapped_buffer->flags != flags)
1226                 return 0;
1227
1228         if (flags & NVHOST_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET &&
1229             mapped_buffer->addr != offset_align)
1230                 return 0;
1231
1232         BUG_ON(mapped_buffer->vm != vm);
1233
1234         /* mark the buffer as used */
1235         if (user_mapped) {
1236                 if (mapped_buffer->user_mapped == 0)
1237                         vm->num_user_mapped_buffers++;
1238                 mapped_buffer->user_mapped++;
1239
1240                 /* If the mapping comes from user space, we own
1241                  * the handle ref. Since we reuse an
1242                  * existing mapping here, we need to give back those
1243                  * refs once in order not to leak.
1244                  */
1245                 if (mapped_buffer->own_mem_ref)
1246                         dma_buf_put(mapped_buffer->dmabuf);
1247                 else
1248                         mapped_buffer->own_mem_ref = true;
1249         }
1250         kref_get(&mapped_buffer->ref);
1251
1252         gk20a_dbg(gpu_dbg_map,
1253                    "reusing as=%d pgsz=%d flags=0x%x ctags=%d "
1254                    "start=%d gv=0x%x,%08x -> 0x%x,%08x -> 0x%x,%08x "
1255                    "own_mem_ref=%d user_mapped=%d",
1256                    vm_aspace_id(vm), mapped_buffer->pgsz_idx,
1257                    mapped_buffer->flags,
1258                    mapped_buffer->ctag_lines,
1259                    mapped_buffer->ctag_offset,
1260                    hi32(mapped_buffer->addr), lo32(mapped_buffer->addr),
1261                    hi32((u64)sg_dma_address(mapped_buffer->sgt->sgl)),
1262                    lo32((u64)sg_dma_address(mapped_buffer->sgt->sgl)),
1263                    hi32((u64)sg_phys(mapped_buffer->sgt->sgl)),
1264                    lo32((u64)sg_phys(mapped_buffer->sgt->sgl)),
1265                    mapped_buffer->own_mem_ref, user_mapped);
1266
1267         if (sgt)
1268                 *sgt = mapped_buffer->sgt;
1269         return mapped_buffer->addr;
1270 }
1271
1272 u64 gk20a_vm_map(struct vm_gk20a *vm,
1273                         struct dma_buf *dmabuf,
1274                         u64 offset_align,
1275                         u32 flags /*NVHOST_AS_MAP_BUFFER_FLAGS_*/,
1276                         int kind,
1277                         struct sg_table **sgt,
1278                         bool user_mapped,
1279                         int rw_flag,
1280                         u64 buffer_offset,
1281                         u64 mapping_size)
1282 {
1283         struct gk20a *g = gk20a_from_vm(vm);
1284         struct gk20a_allocator *ctag_allocator = &g->gr.comp_tags;
1285         struct device *d = dev_from_vm(vm);
1286         struct mapped_buffer_node *mapped_buffer = 0;
1287         bool inserted = false, va_allocated = false;
1288         u32 gmmu_page_size = 0;
1289         u64 map_offset = 0;
1290         int err = 0;
1291         struct buffer_attrs bfr = {0};
1292         struct gk20a_comptags comptags;
1293         u64 buf_addr;
1294
1295         mutex_lock(&vm->update_gmmu_lock);
1296
1297         /* check if this buffer is already mapped */
1298         map_offset = gk20a_vm_map_duplicate_locked(vm, dmabuf, offset_align,
1299                                                    flags, kind, sgt,
1300                                                    user_mapped, rw_flag);
1301         if (map_offset) {
1302                 mutex_unlock(&vm->update_gmmu_lock);
1303                 return map_offset;
1304         }
1305
1306         /* pin buffer to get phys/iovmm addr */
1307         bfr.sgt = gk20a_mm_pin(d, dmabuf);
1308         if (IS_ERR(bfr.sgt)) {
1309                 /* Falling back to physical is actually possible
1310                  * here in many cases if we use 4K phys pages in the
1311                  * gmmu.  However we have some regions which require
1312                  * contig regions to work properly (either phys-contig
1313                  * or contig through smmu io_vaspace).  Until we can
1314                  * track the difference between those two cases we have
1315                  * to fail the mapping when we run out of SMMU space.
1316                  */
1317                 gk20a_warn(d, "oom allocating tracking buffer");
1318                 goto clean_up;
1319         }
1320
1321         if (sgt)
1322                 *sgt = bfr.sgt;
1323
1324         bfr.kind_v = kind;
1325         bfr.size = dmabuf->size;
1326         buf_addr = (u64)sg_dma_address(bfr.sgt->sgl);
1327         if (unlikely(!buf_addr))
1328                 buf_addr = (u64)sg_phys(bfr.sgt->sgl);
1329         bfr.align = 1 << __ffs(buf_addr);
1330         bfr.pgsz_idx = -1;
1331         mapping_size = mapping_size ? mapping_size : bfr.size;
1332
1333         /* If FIX_OFFSET is set, pgsz is determined. Otherwise, select
1334          * page size according to memory alignment */
1335         if (flags & NVHOST_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) {
1336                 bfr.pgsz_idx = NV_GMMU_VA_IS_UPPER(offset_align) ?
1337                                 gmmu_page_size_big : gmmu_page_size_small;
1338         } else {
1339                 if (vm->big_pages)
1340                         gmmu_select_page_size(&bfr);
1341                 else
1342                         bfr.pgsz_idx = gmmu_page_size_small;
1343         }
1344
1345         /* validate/adjust bfr attributes */
1346         if (unlikely(bfr.pgsz_idx == -1)) {
1347                 gk20a_err(d, "unsupported page size detected");
1348                 goto clean_up;
1349         }
1350
1351         if (unlikely(bfr.pgsz_idx < gmmu_page_size_small ||
1352                      bfr.pgsz_idx > gmmu_page_size_big)) {
1353                 BUG_ON(1);
1354                 err = -EINVAL;
1355                 goto clean_up;
1356         }
1357         gmmu_page_size = gmmu_page_sizes[bfr.pgsz_idx];
1358
1359         /* Check if we should use a fixed offset for mapping this buffer */
1360
1361         if (flags & NVHOST_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET)  {
1362                 err = validate_fixed_buffer(vm, &bfr,
1363                         offset_align, mapping_size);
1364                 if (err)
1365                         goto clean_up;
1366
1367                 map_offset = offset_align;
1368                 va_allocated = false;
1369         } else
1370                 va_allocated = true;
1371
1372         if (sgt)
1373                 *sgt = bfr.sgt;
1374
1375         err = setup_buffer_kind_and_compression(d, flags, &bfr, bfr.pgsz_idx);
1376         if (unlikely(err)) {
1377                 gk20a_err(d, "failure setting up kind and compression");
1378                 goto clean_up;
1379         }
1380
1381         /* bar1 and pmu vm don't need ctag */
1382         if (!vm->enable_ctag)
1383                 bfr.ctag_lines = 0;
1384
1385         gk20a_get_comptags(d, dmabuf, &comptags);
1386
1387         if (bfr.ctag_lines && !comptags.lines) {
1388                 /* allocate compression resources if needed */
1389                 err = gk20a_alloc_comptags(d, dmabuf, ctag_allocator,
1390                                            bfr.ctag_lines);
1391                 if (err) {
1392                         /* ok to fall back here if we ran out */
1393                         /* TBD: we can partially alloc ctags as well... */
1394                         bfr.ctag_lines = bfr.ctag_offset = 0;
1395                         bfr.kind_v = bfr.uc_kind_v;
1396                 } else {
1397                         gk20a_get_comptags(d, dmabuf, &comptags);
1398
1399                         /* init/clear the ctag buffer */
1400                         g->ops.ltc.cbc_ctrl(g, gk20a_cbc_op_clear,
1401                                           comptags.offset,
1402                                           comptags.offset + comptags.lines - 1);
1403                 }
1404         }
1405
1406         /* store the comptag info */
1407         bfr.ctag_offset = comptags.offset;
1408
1409         /* update gmmu ptes */
1410         map_offset = __locked_gmmu_map(vm, map_offset,
1411                                         bfr.sgt,
1412                                         buffer_offset, /* sg offset */
1413                                         mapping_size,
1414                                         bfr.pgsz_idx,
1415                                         bfr.kind_v,
1416                                         bfr.ctag_offset,
1417                                         flags, rw_flag);
1418
1419         if (!map_offset)
1420                 goto clean_up;
1421
1422         gk20a_dbg(gpu_dbg_map,
1423            "as=%d pgsz=%d "
1424            "kind=0x%x kind_uc=0x%x flags=0x%x "
1425            "ctags=%d start=%d gv=0x%x,%08x -> 0x%x,%08x -> 0x%x,%08x",
1426            vm_aspace_id(vm), gmmu_page_size,
1427            bfr.kind_v, bfr.uc_kind_v, flags,
1428            bfr.ctag_lines, bfr.ctag_offset,
1429            hi32(map_offset), lo32(map_offset),
1430            hi32((u64)sg_dma_address(bfr.sgt->sgl)),
1431            lo32((u64)sg_dma_address(bfr.sgt->sgl)),
1432            hi32((u64)sg_phys(bfr.sgt->sgl)),
1433            lo32((u64)sg_phys(bfr.sgt->sgl)));
1434
1435 #if defined(NVHOST_DEBUG)
1436         {
1437                 int i;
1438                 struct scatterlist *sg = NULL;
1439                 gk20a_dbg(gpu_dbg_pte, "for_each_sg(bfr.sgt->sgl, sg, bfr.sgt->nents, i)");
1440                 for_each_sg(bfr.sgt->sgl, sg, bfr.sgt->nents, i ) {
1441                         u64 da = sg_dma_address(sg);
1442                         u64 pa = sg_phys(sg);
1443                         u64 len = sg->length;
1444                         gk20a_dbg(gpu_dbg_pte, "i=%d pa=0x%x,%08x da=0x%x,%08x len=0x%x,%08x",
1445                                    i, hi32(pa), lo32(pa), hi32(da), lo32(da),
1446                                    hi32(len), lo32(len));
1447                 }
1448         }
1449 #endif
1450
1451         /* keep track of the buffer for unmapping */
1452         /* TBD: check for multiple mapping of same buffer */
1453         mapped_buffer = kzalloc(sizeof(*mapped_buffer), GFP_KERNEL);
1454         if (!mapped_buffer) {
1455                 gk20a_warn(d, "oom allocating tracking buffer");
1456                 goto clean_up;
1457         }
1458         mapped_buffer->dmabuf      = dmabuf;
1459         mapped_buffer->sgt         = bfr.sgt;
1460         mapped_buffer->addr        = map_offset;
1461         mapped_buffer->size        = mapping_size;
1462         mapped_buffer->pgsz_idx    = bfr.pgsz_idx;
1463         mapped_buffer->ctag_offset = bfr.ctag_offset;
1464         mapped_buffer->ctag_lines  = bfr.ctag_lines;
1465         mapped_buffer->vm          = vm;
1466         mapped_buffer->flags       = flags;
1467         mapped_buffer->kind        = kind;
1468         mapped_buffer->va_allocated = va_allocated;
1469         mapped_buffer->user_mapped = user_mapped ? 1 : 0;
1470         mapped_buffer->own_mem_ref = user_mapped;
1471         INIT_LIST_HEAD(&mapped_buffer->unmap_list);
1472         INIT_LIST_HEAD(&mapped_buffer->va_buffers_list);
1473         kref_init(&mapped_buffer->ref);
1474
1475         err = insert_mapped_buffer(&vm->mapped_buffers, mapped_buffer);
1476         if (err) {
1477                 gk20a_err(d, "failed to insert into mapped buffer tree");
1478                 goto clean_up;
1479         }
1480         inserted = true;
1481         if (user_mapped)
1482                 vm->num_user_mapped_buffers++;
1483
1484         gk20a_dbg_info("allocated va @ 0x%llx", map_offset);
1485
1486         if (!va_allocated) {
1487                 struct vm_reserved_va_node *va_node;
1488
1489                 /* find the space reservation */
1490                 va_node = addr_to_reservation(vm, map_offset);
1491                 list_add_tail(&mapped_buffer->va_buffers_list,
1492                               &va_node->va_buffers_list);
1493                 mapped_buffer->va_node = va_node;
1494         }
1495
1496         mutex_unlock(&vm->update_gmmu_lock);
1497
1498         /* Invalidate kernel mappings immediately */
1499         if (vm_aspace_id(vm) == -1)
1500                 gk20a_mm_tlb_invalidate(vm);
1501
1502         return map_offset;
1503
1504 clean_up:
1505         if (inserted) {
1506                 rb_erase(&mapped_buffer->node, &vm->mapped_buffers);
1507                 if (user_mapped)
1508                         vm->num_user_mapped_buffers--;
1509         }
1510         kfree(mapped_buffer);
1511         if (va_allocated)
1512                 gk20a_vm_free_va(vm, map_offset, bfr.size, bfr.pgsz_idx);
1513         if (!IS_ERR(bfr.sgt))
1514                 gk20a_mm_unpin(d, dmabuf, bfr.sgt);
1515
1516         mutex_unlock(&vm->update_gmmu_lock);
1517         gk20a_dbg_info("err=%d\n", err);
1518         return 0;
1519 }
1520
1521 u64 gk20a_gmmu_map(struct vm_gk20a *vm,
1522                 struct sg_table **sgt,
1523                 u64 size,
1524                 u32 flags,
1525                 int rw_flag)
1526 {
1527         u64 vaddr;
1528
1529         mutex_lock(&vm->update_gmmu_lock);
1530         vaddr = __locked_gmmu_map(vm, 0, /* already mapped? - No */
1531                                 *sgt, /* sg table */
1532                                 0, /* sg offset */
1533                                 size,
1534                                 0, /* page size index = 0 i.e. SZ_4K */
1535                                 0, /* kind */
1536                                 0, /* ctag_offset */
1537                                 flags, rw_flag);
1538         mutex_unlock(&vm->update_gmmu_lock);
1539         if (!vaddr) {
1540                 gk20a_err(dev_from_vm(vm), "failed to allocate va space");
1541                 return 0;
1542         }
1543
1544         /* Invalidate kernel mappings immediately */
1545         gk20a_mm_tlb_invalidate(vm);
1546
1547         return vaddr;
1548 }
1549
1550 void gk20a_gmmu_unmap(struct vm_gk20a *vm,
1551                 u64 vaddr,
1552                 u64 size,
1553                 int rw_flag)
1554 {
1555         mutex_lock(&vm->update_gmmu_lock);
1556         __locked_gmmu_unmap(vm,
1557                         vaddr,
1558                         size,
1559                         0, /* page size 4K */
1560                         true, /*va_allocated */
1561                         rw_flag);
1562         mutex_unlock(&vm->update_gmmu_lock);
1563 }
1564
1565 phys_addr_t gk20a_get_phys_from_iova(struct device *d,
1566                                 u64 dma_addr)
1567 {
1568         phys_addr_t phys;
1569         u64 iova;
1570
1571         struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(d);
1572         if (!mapping)
1573                 return dma_addr;
1574
1575         iova = dma_addr & PAGE_MASK;
1576         phys = iommu_iova_to_phys(mapping->domain, iova);
1577         return phys;
1578 }
1579
1580 /* get sg_table from already allocated buffer */
1581 int gk20a_get_sgtable(struct device *d, struct sg_table **sgt,
1582                         void *cpuva, u64 iova,
1583                         size_t size)
1584 {
1585         int err = 0;
1586         *sgt = kzalloc(sizeof(struct sg_table), GFP_KERNEL);
1587         if (!(*sgt)) {
1588                 dev_err(d, "failed to allocate memory\n");
1589                 err = -ENOMEM;
1590                 goto fail;
1591         }
1592         err = dma_get_sgtable(d, *sgt,
1593                         cpuva, iova,
1594                         size);
1595         if (err) {
1596                 dev_err(d, "failed to create sg table\n");
1597                 goto fail;
1598         }
1599         sg_dma_address((*sgt)->sgl) = iova;
1600
1601         return 0;
1602  fail:
1603         if (*sgt) {
1604                 kfree(*sgt);
1605                 *sgt = NULL;
1606         }
1607         return err;
1608 }
1609
1610 int gk20a_get_sgtable_from_pages(struct device *d, struct sg_table **sgt,
1611                         struct page **pages, u64 iova,
1612                         size_t size)
1613 {
1614         int err = 0;
1615         *sgt = kzalloc(sizeof(struct sg_table), GFP_KERNEL);
1616         if (!(*sgt)) {
1617                 dev_err(d, "failed to allocate memory\n");
1618                 err = -ENOMEM;
1619                 goto fail;
1620         }
1621         err = sg_alloc_table(*sgt, 1, GFP_KERNEL);
1622         if (err) {
1623                 dev_err(d, "failed to allocate sg_table\n");
1624                 goto fail;
1625         }
1626         sg_set_page((*sgt)->sgl, *pages, size, 0);
1627         sg_dma_address((*sgt)->sgl) = iova;
1628
1629         return 0;
1630  fail:
1631         if (*sgt) {
1632                 kfree(*sgt);
1633                 *sgt = NULL;
1634         }
1635         return err;
1636 }
1637
1638 void gk20a_free_sgtable(struct sg_table **sgt)
1639 {
1640         sg_free_table(*sgt);
1641         kfree(*sgt);
1642         *sgt = NULL;
1643 }
1644
1645 u64 gk20a_mm_iova_addr(struct scatterlist *sgl)
1646 {
1647         u64 result = sg_phys(sgl);
1648 #ifdef CONFIG_TEGRA_IOMMU_SMMU
1649         if (sg_dma_address(sgl) == DMA_ERROR_CODE)
1650                 result = 0;
1651         else if (sg_dma_address(sgl)) {
1652                 result = sg_dma_address(sgl) |
1653                         1ULL << NV_MC_SMMU_VADDR_TRANSLATION_BIT;
1654         }
1655 #endif
1656         return result;
1657 }
1658
1659 static int update_gmmu_ptes_locked(struct vm_gk20a *vm,
1660                                    enum gmmu_pgsz_gk20a pgsz_idx,
1661                                    struct sg_table *sgt,
1662                                    u64 buffer_offset,
1663                                    u64 first_vaddr, u64 last_vaddr,
1664                                    u8 kind_v, u32 ctag_offset,
1665                                    bool cacheable,
1666                                    int rw_flag)
1667 {
1668         int err;
1669         u32 pde_lo, pde_hi, pde_i;
1670         struct scatterlist *cur_chunk;
1671         unsigned int cur_offset;
1672         u32 pte_w[2] = {0, 0}; /* invalid pte */
1673         u32 ctag = ctag_offset;
1674         u32 ctag_incr;
1675         u32 page_size  = gmmu_page_sizes[pgsz_idx];
1676         u64 addr = 0;
1677         u64 space_to_skip = buffer_offset;
1678         bool set_tlb_dirty = false;
1679
1680         pde_range_from_vaddr_range(vm, first_vaddr, last_vaddr,
1681                                    &pde_lo, &pde_hi);
1682
1683         gk20a_dbg(gpu_dbg_pte, "size_idx=%d, pde_lo=%d, pde_hi=%d",
1684                    pgsz_idx, pde_lo, pde_hi);
1685
1686         /* If ctag_offset !=0 add 1 else add 0.  The idea is to avoid a branch
1687          * below (per-pte). Note: this doesn't work unless page size (when
1688          * comptags are active) is 128KB. We have checks elsewhere for that. */
1689         ctag_incr = !!ctag_offset;
1690
1691         cur_offset = 0;
1692         if (sgt) {
1693                 cur_chunk = sgt->sgl;
1694                 /* space_to_skip must be page aligned */
1695                 BUG_ON(space_to_skip & (page_size - 1));
1696
1697                 while (space_to_skip > 0 && cur_chunk) {
1698                         u64 new_addr = gk20a_mm_iova_addr(cur_chunk);
1699                         if (new_addr) {
1700                                 addr = new_addr;
1701                                 addr += cur_offset;
1702                         }
1703                         cur_offset += page_size;
1704                         addr += page_size;
1705                         while (cur_chunk &&
1706                                 cur_offset >= cur_chunk->length) {
1707                                 cur_offset -= cur_chunk->length;
1708                                 cur_chunk = sg_next(cur_chunk);
1709                         }
1710                         space_to_skip -= page_size;
1711                 }
1712         }
1713         else
1714                 cur_chunk = NULL;
1715
1716         for (pde_i = pde_lo; pde_i <= pde_hi; pde_i++) {
1717                 u32 pte_lo, pte_hi;
1718                 u32 pte_cur;
1719                 void *pte_kv_cur;
1720
1721                 struct page_table_gk20a *pte = vm->pdes.ptes[pgsz_idx] + pde_i;
1722
1723                 set_tlb_dirty = true;
1724
1725                 if (pde_i == pde_lo)
1726                         pte_lo = pte_index_from_vaddr(vm, first_vaddr,
1727                                                       pgsz_idx);
1728                 else
1729                         pte_lo = 0;
1730
1731                 if ((pde_i != pde_hi) && (pde_hi != pde_lo))
1732                         pte_hi = vm->mm->page_table_sizing[pgsz_idx].num_ptes-1;
1733                 else
1734                         pte_hi = pte_index_from_vaddr(vm, last_vaddr,
1735                                                       pgsz_idx);
1736
1737                 /* get cpu access to the ptes */
1738                 err = map_gmmu_pages(pte->ref, pte->sgt, &pte_kv_cur,
1739                                      pte->size);
1740                 if (err) {
1741                         gk20a_err(dev_from_vm(vm),
1742                                    "couldn't map ptes for update as=%d pte_ref_cnt=%d",
1743                                    vm_aspace_id(vm), pte->ref_cnt);
1744                         goto clean_up;
1745                 }
1746
1747                 gk20a_dbg(gpu_dbg_pte, "pte_lo=%d, pte_hi=%d", pte_lo, pte_hi);
1748                 for (pte_cur = pte_lo; pte_cur <= pte_hi; pte_cur++) {
1749                         if (likely(sgt)) {
1750                                 u64 new_addr = gk20a_mm_iova_addr(cur_chunk);
1751                                 if (new_addr) {
1752                                         addr = new_addr;
1753                                         addr += cur_offset;
1754                                 }
1755                                 pte_w[0] = gmmu_pte_valid_true_f() |
1756                                         gmmu_pte_address_sys_f(addr
1757                                                 >> gmmu_pte_address_shift_v());
1758                                 pte_w[1] = gmmu_pte_aperture_video_memory_f() |
1759                                         gmmu_pte_kind_f(kind_v) |
1760                                         gmmu_pte_comptagline_f(ctag);
1761
1762                                 if (rw_flag == gk20a_mem_flag_read_only) {
1763                                         pte_w[0] |= gmmu_pte_read_only_true_f();
1764                                         pte_w[1] |=
1765                                                 gmmu_pte_write_disable_true_f();
1766                                 } else if (rw_flag ==
1767                                            gk20a_mem_flag_write_only) {
1768                                         pte_w[1] |=
1769                                                 gmmu_pte_read_disable_true_f();
1770                                 }
1771                                 if (!cacheable)
1772                                         pte_w[1] |= gmmu_pte_vol_true_f();
1773
1774                                 pte->ref_cnt++;
1775                                 gk20a_dbg(gpu_dbg_pte, "pte_cur=%d addr=0x%x,%08x kind=%d"
1776                                            " ctag=%d vol=%d refs=%d"
1777                                            " [0x%08x,0x%08x]",
1778                                            pte_cur, hi32(addr), lo32(addr),
1779                                            kind_v, ctag, !cacheable,
1780                                            pte->ref_cnt, pte_w[1], pte_w[0]);
1781                                 ctag += ctag_incr;
1782                                 cur_offset += page_size;
1783                                 addr += page_size;
1784                                 while (cur_chunk &&
1785                                         cur_offset >= cur_chunk->length) {
1786                                         cur_offset -= cur_chunk->length;
1787                                         cur_chunk = sg_next(cur_chunk);
1788                                 }
1789
1790                         } else {
1791                                 pte->ref_cnt--;
1792                                 gk20a_dbg(gpu_dbg_pte,
1793                                            "pte_cur=%d ref=%d [0x0,0x0]",
1794                                            pte_cur, pte->ref_cnt);
1795                         }
1796
1797                         gk20a_mem_wr32(pte_kv_cur + pte_cur*8, 0, pte_w[0]);
1798                         gk20a_mem_wr32(pte_kv_cur + pte_cur*8, 1, pte_w[1]);
1799                 }
1800
1801                 unmap_gmmu_pages(pte->ref, pte->sgt, pte_kv_cur);
1802
1803                 if (pte->ref_cnt == 0) {
1804                         void *pte_ref_ptr = pte->ref;
1805
1806                         /* It can make sense to keep around one page table for
1807                          * each flavor (empty)... in case a new map is coming
1808                          * right back to alloc (and fill it in) again.
1809                          * But: deferring unmapping should help with pathologic
1810                          * unmap/map/unmap/map cases where we'd trigger pte
1811                          * free/alloc/free/alloc.
1812                          */
1813                         pte->ref = NULL;
1814
1815                         /* rewrite pde */
1816                         update_gmmu_pde_locked(vm, pde_i);
1817
1818                         __gk20a_mm_tlb_invalidate(vm);
1819                         set_tlb_dirty = false;
1820
1821                         free_gmmu_pages(vm, pte_ref_ptr, pte->sgt,
1822                                 vm->mm->page_table_sizing[pgsz_idx].order,
1823                                 pte->size);
1824
1825                 }
1826
1827         }
1828
1829         smp_mb();
1830         if (set_tlb_dirty) {
1831                 vm->tlb_dirty = true;
1832                 gk20a_dbg_fn("set tlb dirty");
1833         }
1834
1835         return 0;
1836
1837 clean_up:
1838         /*TBD: potentially rewrite above to pre-map everything it needs to
1839          * as that's the only way it can fail */
1840         return err;
1841
1842 }
1843
1844
1845 /* for gk20a the "video memory" apertures here are misnomers. */
1846 static inline u32 big_valid_pde0_bits(u64 pte_addr)
1847 {
1848         u32 pde0_bits =
1849                 gmmu_pde_aperture_big_video_memory_f() |
1850                 gmmu_pde_address_big_sys_f(
1851                            (u32)(pte_addr >> gmmu_pde_address_shift_v()));
1852         return  pde0_bits;
1853 }
1854 static inline u32 small_valid_pde1_bits(u64 pte_addr)
1855 {
1856         u32 pde1_bits =
1857                 gmmu_pde_aperture_small_video_memory_f() |
1858                 gmmu_pde_vol_small_true_f() | /* tbd: why? */
1859                 gmmu_pde_address_small_sys_f(
1860                            (u32)(pte_addr >> gmmu_pde_address_shift_v()));
1861         return pde1_bits;
1862 }
1863
1864 /* Given the current state of the ptes associated with a pde,
1865    determine value and write it out.  There's no checking
1866    here to determine whether or not a change was actually
1867    made.  So, superfluous updates will cause unnecessary
1868    pde invalidations.
1869 */
1870 static void update_gmmu_pde_locked(struct vm_gk20a *vm, u32 i)
1871 {
1872         bool small_valid, big_valid;
1873         u64 pte_addr[2] = {0, 0};
1874         struct page_table_gk20a *small_pte =
1875                 vm->pdes.ptes[gmmu_page_size_small] + i;
1876         struct page_table_gk20a *big_pte =
1877                 vm->pdes.ptes[gmmu_page_size_big] + i;
1878         u32 pde_v[2] = {0, 0};
1879         u32 *pde;
1880
1881         small_valid = small_pte && small_pte->ref;
1882         big_valid   = big_pte && big_pte->ref;
1883
1884         if (small_valid)
1885                 pte_addr[gmmu_page_size_small] =
1886                         gk20a_mm_iova_addr(small_pte->sgt->sgl);
1887         if (big_valid)
1888                 pte_addr[gmmu_page_size_big] =
1889                         gk20a_mm_iova_addr(big_pte->sgt->sgl);
1890
1891         pde_v[0] = gmmu_pde_size_full_f();
1892         pde_v[0] |= big_valid ?
1893                 big_valid_pde0_bits(pte_addr[gmmu_page_size_big])
1894                 :
1895                 (gmmu_pde_aperture_big_invalid_f());
1896
1897         pde_v[1] |= (small_valid ?
1898                      small_valid_pde1_bits(pte_addr[gmmu_page_size_small])
1899                      :
1900                      (gmmu_pde_aperture_small_invalid_f() |
1901                       gmmu_pde_vol_small_false_f())
1902                      )
1903                 |
1904                 (big_valid ? (gmmu_pde_vol_big_true_f()) :
1905                  gmmu_pde_vol_big_false_f());
1906
1907         pde = pde_from_index(vm, i);
1908
1909         gk20a_mem_wr32(pde, 0, pde_v[0]);
1910         gk20a_mem_wr32(pde, 1, pde_v[1]);
1911
1912         smp_mb();
1913
1914         FLUSH_CPU_DCACHE(pde,
1915                          sg_phys(vm->pdes.sgt->sgl) + (i*gmmu_pde__size_v()),
1916                          sizeof(u32)*2);
1917
1918         gk20a_mm_l2_invalidate(vm->mm->g);
1919
1920         gk20a_dbg(gpu_dbg_pte, "pde:%d = 0x%x,0x%08x\n", i, pde_v[1], pde_v[0]);
1921
1922         vm->tlb_dirty  = true;
1923 }
1924
1925
1926 static int gk20a_vm_put_empty(struct vm_gk20a *vm, u64 vaddr,
1927                                u32 num_pages, u32 pgsz_idx)
1928 {
1929         struct mm_gk20a *mm = vm->mm;
1930         struct gk20a *g = mm->g;
1931         u32 pgsz = gmmu_page_sizes[pgsz_idx];
1932         u32 i;
1933         dma_addr_t iova;
1934
1935         /* allocate the zero page if the va does not already have one */
1936         if (!vm->zero_page_cpuva) {
1937                 int err = 0;
1938                 vm->zero_page_cpuva = dma_alloc_coherent(&g->dev->dev,
1939                                                          mm->big_page_size,
1940                                                          &iova,
1941                                                          GFP_KERNEL);
1942                 if (!vm->zero_page_cpuva) {
1943                         dev_err(&g->dev->dev, "failed to allocate zero page\n");
1944                         return -ENOMEM;
1945                 }
1946
1947                 vm->zero_page_iova = iova;
1948                 err = gk20a_get_sgtable(&g->dev->dev, &vm->zero_page_sgt,
1949                                         vm->zero_page_cpuva, vm->zero_page_iova,
1950                                         mm->big_page_size);
1951                 if (err) {
1952                         dma_free_coherent(&g->dev->dev, mm->big_page_size,
1953                                           vm->zero_page_cpuva,
1954                                           vm->zero_page_iova);
1955                         vm->zero_page_iova = 0;
1956                         vm->zero_page_cpuva = NULL;
1957
1958                         dev_err(&g->dev->dev, "failed to create sg table for zero page\n");
1959                         return -ENOMEM;
1960                 }
1961         }
1962
1963         for (i = 0; i < num_pages; i++) {
1964                 u64 page_vaddr = __locked_gmmu_map(vm, vaddr,
1965                         vm->zero_page_sgt, 0, pgsz, pgsz_idx, 0, 0,
1966                         NVHOST_AS_ALLOC_SPACE_FLAGS_FIXED_OFFSET,
1967                         gk20a_mem_flag_none);
1968
1969                 if (!page_vaddr) {
1970                         gk20a_err(dev_from_vm(vm), "failed to remap clean buffers!");
1971                         goto err_unmap;
1972                 }
1973                 vaddr += pgsz;
1974         }
1975
1976         return 0;
1977
1978 err_unmap:
1979
1980         WARN_ON(1);
1981         /* something went wrong. unmap pages */
1982         while (i--) {
1983                 vaddr -= pgsz;
1984                 __locked_gmmu_unmap(vm, vaddr, pgsz, pgsz_idx, 0,
1985                                     gk20a_mem_flag_none);
1986         }
1987
1988         return -EINVAL;
1989 }
1990
1991 /* NOTE! mapped_buffers lock must be held */
1992 static void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer)
1993 {
1994         struct vm_gk20a *vm = mapped_buffer->vm;
1995
1996         if (mapped_buffer->va_node &&
1997             mapped_buffer->va_node->sparse) {
1998                 u64 vaddr = mapped_buffer->addr;
1999                 u32 pgsz_idx = mapped_buffer->pgsz_idx;
2000                 u32 num_pages = mapped_buffer->size >>
2001                         gmmu_page_shifts[pgsz_idx];
2002
2003                 /* there is little we can do if this fails... */
2004                 gk20a_vm_put_empty(vm, vaddr, num_pages, pgsz_idx);
2005
2006         } else
2007                 __locked_gmmu_unmap(vm,
2008                                 mapped_buffer->addr,
2009                                 mapped_buffer->size,
2010                                 mapped_buffer->pgsz_idx,
2011                                 mapped_buffer->va_allocated,
2012                                 gk20a_mem_flag_none);
2013
2014         gk20a_dbg(gpu_dbg_map, "as=%d pgsz=%d gv=0x%x,%08x own_mem_ref=%d",
2015                    vm_aspace_id(vm), gmmu_page_sizes[mapped_buffer->pgsz_idx],
2016                    hi32(mapped_buffer->addr), lo32(mapped_buffer->addr),
2017                    mapped_buffer->own_mem_ref);
2018
2019         gk20a_mm_unpin(dev_from_vm(vm), mapped_buffer->dmabuf,
2020                        mapped_buffer->sgt);
2021
2022         /* remove from mapped buffer tree and remove list, free */
2023         rb_erase(&mapped_buffer->node, &vm->mapped_buffers);
2024         if (!list_empty(&mapped_buffer->va_buffers_list))
2025                 list_del(&mapped_buffer->va_buffers_list);
2026
2027         /* keep track of mapped buffers */
2028         if (mapped_buffer->user_mapped)
2029                 vm->num_user_mapped_buffers--;
2030
2031         if (mapped_buffer->own_mem_ref)
2032                 dma_buf_put(mapped_buffer->dmabuf);
2033
2034         kfree(mapped_buffer);
2035
2036         return;
2037 }
2038
2039 void gk20a_vm_unmap(struct vm_gk20a *vm, u64 offset)
2040 {
2041         struct device *d = dev_from_vm(vm);
2042         struct mapped_buffer_node *mapped_buffer;
2043
2044         mutex_lock(&vm->update_gmmu_lock);
2045         mapped_buffer = find_mapped_buffer_locked(&vm->mapped_buffers, offset);
2046         if (!mapped_buffer) {
2047                 mutex_unlock(&vm->update_gmmu_lock);
2048                 gk20a_err(d, "invalid addr to unmap 0x%llx", offset);
2049                 return;
2050         }
2051
2052         kref_put(&mapped_buffer->ref, gk20a_vm_unmap_locked_kref);
2053         mutex_unlock(&vm->update_gmmu_lock);
2054 }
2055
2056 static void gk20a_vm_remove_support(struct vm_gk20a *vm)
2057 {
2058         struct gk20a *g = vm->mm->g;
2059         struct mapped_buffer_node *mapped_buffer;
2060         struct vm_reserved_va_node *va_node, *va_node_tmp;
2061         struct rb_node *node;
2062         int i;
2063
2064         gk20a_dbg_fn("");
2065         mutex_lock(&vm->update_gmmu_lock);
2066
2067         /* TBD: add a flag here for the unmap code to recognize teardown
2068          * and short-circuit any otherwise expensive operations. */
2069
2070         node = rb_first(&vm->mapped_buffers);
2071         while (node) {
2072                 mapped_buffer =
2073                         container_of(node, struct mapped_buffer_node, node);
2074                 gk20a_vm_unmap_locked(mapped_buffer);
2075                 node = rb_first(&vm->mapped_buffers);
2076         }
2077
2078         /* destroy remaining reserved memory areas */
2079         list_for_each_entry_safe(va_node, va_node_tmp, &vm->reserved_va_list,
2080                 reserved_va_list) {
2081                 list_del(&va_node->reserved_va_list);
2082                 kfree(va_node);
2083         }
2084
2085         /* unmapping all buffers above may not actually free
2086          * all vm ptes.  jettison them here for certain... */
2087         for (i = 0; i < vm->pdes.num_pdes; i++) {
2088                 struct page_table_gk20a *pte =
2089                         &vm->pdes.ptes[gmmu_page_size_small][i];
2090                 if (pte->ref) {
2091                         free_gmmu_pages(vm, pte->ref, pte->sgt,
2092                                 vm->mm->page_table_sizing[gmmu_page_size_small].order,
2093                                 pte->size);
2094                         pte->ref = NULL;
2095                 }
2096                 pte = &vm->pdes.ptes[gmmu_page_size_big][i];
2097                 if (pte->ref) {
2098                         free_gmmu_pages(vm, pte->ref, pte->sgt,
2099                                 vm->mm->page_table_sizing[gmmu_page_size_big].order,
2100                                 pte->size);
2101                         pte->ref = NULL;
2102                 }
2103         }
2104
2105         unmap_gmmu_pages(vm->pdes.ref, vm->pdes.sgt, vm->pdes.kv);
2106         free_gmmu_pages(vm, vm->pdes.ref, vm->pdes.sgt, 0, vm->pdes.size);
2107
2108         kfree(vm->pdes.ptes[gmmu_page_size_small]);
2109         kfree(vm->pdes.ptes[gmmu_page_size_big]);
2110         gk20a_allocator_destroy(&vm->vma[gmmu_page_size_small]);
2111         gk20a_allocator_destroy(&vm->vma[gmmu_page_size_big]);
2112
2113         mutex_unlock(&vm->update_gmmu_lock);
2114
2115         /* release zero page if used */
2116         if (vm->zero_page_cpuva)
2117                 dma_free_coherent(&g->dev->dev, vm->mm->big_page_size,
2118                                   vm->zero_page_cpuva, vm->zero_page_iova);
2119
2120         /* vm is not used anymore. release it. */
2121         kfree(vm);
2122 }
2123
2124 static void gk20a_vm_remove_support_kref(struct kref *ref)
2125 {
2126         struct vm_gk20a *vm = container_of(ref, struct vm_gk20a, ref);
2127         gk20a_vm_remove_support(vm);
2128 }
2129
2130 void gk20a_vm_get(struct vm_gk20a *vm)
2131 {
2132         kref_get(&vm->ref);
2133 }
2134
2135 void gk20a_vm_put(struct vm_gk20a *vm)
2136 {
2137         kref_put(&vm->ref, gk20a_vm_remove_support_kref);
2138 }
2139
2140 /* address space interfaces for the gk20a module */
2141 int gk20a_vm_alloc_share(struct gk20a_as_share *as_share)
2142 {
2143         struct gk20a_as *as = as_share->as;
2144         struct gk20a *g = gk20a_from_as(as);
2145         struct mm_gk20a *mm = &g->mm;
2146         struct vm_gk20a *vm;
2147         u64 vma_size;
2148         u32 num_pages, low_hole_pages;
2149         char name[32];
2150         int err;
2151
2152         gk20a_dbg_fn("");
2153
2154         vm = kzalloc(sizeof(*vm), GFP_KERNEL);
2155         if (!vm)
2156                 return -ENOMEM;
2157
2158         as_share->vm = vm;
2159
2160         vm->mm = mm;
2161         vm->as_share = as_share;
2162
2163         vm->big_pages = true;
2164
2165         vm->va_start  = mm->pde_stride;   /* create a one pde hole */
2166         vm->va_limit  = mm->channel.size; /* note this means channel.size is
2167                                              really just the max */
2168         {
2169                 u32 pde_lo, pde_hi;
2170                 pde_range_from_vaddr_range(vm,
2171                                            0, vm->va_limit-1,
2172                                            &pde_lo, &pde_hi);
2173                 vm->pdes.num_pdes = pde_hi + 1;
2174         }
2175
2176         vm->pdes.ptes[gmmu_page_size_small] =
2177                 kzalloc(sizeof(struct page_table_gk20a) *
2178                         vm->pdes.num_pdes, GFP_KERNEL);
2179
2180         vm->pdes.ptes[gmmu_page_size_big] =
2181                 kzalloc(sizeof(struct page_table_gk20a) *
2182                         vm->pdes.num_pdes, GFP_KERNEL);
2183
2184         if (!(vm->pdes.ptes[gmmu_page_size_small] &&
2185               vm->pdes.ptes[gmmu_page_size_big]))
2186                 return -ENOMEM;
2187
2188         gk20a_dbg_info("init space for va_limit=0x%llx num_pdes=%d",
2189                    vm->va_limit, vm->pdes.num_pdes);
2190
2191         /* allocate the page table directory */
2192         err = alloc_gmmu_pages(vm, 0, &vm->pdes.ref,
2193                                &vm->pdes.sgt, &vm->pdes.size);
2194         if (err)
2195                 return -ENOMEM;
2196
2197         err = map_gmmu_pages(vm->pdes.ref, vm->pdes.sgt, &vm->pdes.kv,
2198                              vm->pdes.size);
2199         if (err) {
2200                 free_gmmu_pages(vm, vm->pdes.ref, vm->pdes.sgt, 0,
2201                                         vm->pdes.size);
2202                 return -ENOMEM;
2203         }
2204         gk20a_dbg(gpu_dbg_pte, "pdes.kv = 0x%p, pdes.phys = 0x%llx",
2205                         vm->pdes.kv,
2206                         gk20a_mm_iova_addr(vm->pdes.sgt->sgl));
2207         /* we could release vm->pdes.kv but it's only one page... */
2208
2209
2210         /* low-half: alloc small pages */
2211         /* high-half: alloc big pages */
2212         vma_size = mm->channel.size >> 1;
2213
2214         snprintf(name, sizeof(name), "gk20a_as_%d-%dKB", as_share->id,
2215                  gmmu_page_sizes[gmmu_page_size_small]>>10);
2216         num_pages = (u32)(vma_size >> gmmu_page_shifts[gmmu_page_size_small]);
2217
2218         /* num_pages above is without regard to the low-side hole. */
2219         low_hole_pages = (vm->va_start >>
2220                           gmmu_page_shifts[gmmu_page_size_small]);
2221
2222         gk20a_allocator_init(&vm->vma[gmmu_page_size_small], name,
2223               low_hole_pages,             /* start */
2224               num_pages - low_hole_pages, /* length */
2225               1);                         /* align */
2226
2227         snprintf(name, sizeof(name), "gk20a_as_%d-%dKB", as_share->id,
2228                  gmmu_page_sizes[gmmu_page_size_big]>>10);
2229
2230         num_pages = (u32)(vma_size >> gmmu_page_shifts[gmmu_page_size_big]);
2231         gk20a_allocator_init(&vm->vma[gmmu_page_size_big], name,
2232                               num_pages, /* start */
2233                               num_pages, /* length */
2234                               1); /* align */
2235
2236         vm->mapped_buffers = RB_ROOT;
2237
2238         mutex_init(&vm->update_gmmu_lock);
2239         kref_init(&vm->ref);
2240         INIT_LIST_HEAD(&vm->reserved_va_list);
2241
2242         vm->enable_ctag = true;
2243
2244         return 0;
2245 }
2246
2247
2248 int gk20a_vm_release_share(struct gk20a_as_share *as_share)
2249 {
2250         struct vm_gk20a *vm = as_share->vm;
2251
2252         gk20a_dbg_fn("");
2253
2254         vm->as_share = NULL;
2255
2256         /* put as reference to vm */
2257         gk20a_vm_put(vm);
2258
2259         as_share->vm = NULL;
2260
2261         return 0;
2262 }
2263
2264
2265 int gk20a_vm_alloc_space(struct gk20a_as_share *as_share,
2266                          struct nvhost_as_alloc_space_args *args)
2267
2268 {       int err = -ENOMEM;
2269         int pgsz_idx;
2270         u32 start_page_nr;
2271         struct gk20a_allocator *vma;
2272         struct vm_gk20a *vm = as_share->vm;
2273         struct vm_reserved_va_node *va_node;
2274         u64 vaddr_start = 0;
2275
2276         gk20a_dbg_fn("flags=0x%x pgsz=0x%x nr_pages=0x%x o/a=0x%llx",
2277                         args->flags, args->page_size, args->pages,
2278                         args->o_a.offset);
2279
2280         /* determine pagesz idx */
2281         for (pgsz_idx = gmmu_page_size_small;
2282              pgsz_idx < gmmu_nr_page_sizes;
2283              pgsz_idx++) {
2284                 if (gmmu_page_sizes[pgsz_idx] == args->page_size)
2285                         break;
2286         }
2287
2288         if (pgsz_idx >= gmmu_nr_page_sizes) {
2289                 err = -EINVAL;
2290                 goto clean_up;
2291         }
2292
2293         va_node = kzalloc(sizeof(*va_node), GFP_KERNEL);
2294         if (!va_node) {
2295                 err = -ENOMEM;
2296                 goto clean_up;
2297         }
2298
2299         if (args->flags & NVHOST_AS_ALLOC_SPACE_FLAGS_SPARSE &&
2300             pgsz_idx != gmmu_page_size_big) {
2301                 err = -ENOSYS;
2302                 kfree(va_node);
2303                 goto clean_up;
2304         }
2305
2306         start_page_nr = 0;
2307         if (args->flags & NVHOST_AS_ALLOC_SPACE_FLAGS_FIXED_OFFSET)
2308                 start_page_nr = (u32)(args->o_a.offset >>
2309                                       gmmu_page_shifts[pgsz_idx]);
2310
2311         vma = &vm->vma[pgsz_idx];
2312         err = vma->alloc(vma, &start_page_nr, args->pages);
2313         if (err) {
2314                 kfree(va_node);
2315                 goto clean_up;
2316         }
2317
2318         vaddr_start = (u64)start_page_nr << gmmu_page_shifts[pgsz_idx];
2319
2320         va_node->vaddr_start = vaddr_start;
2321         va_node->size = (u64)args->page_size * (u64)args->pages;
2322         va_node->pgsz_idx = args->page_size;
2323         INIT_LIST_HEAD(&va_node->va_buffers_list);
2324         INIT_LIST_HEAD(&va_node->reserved_va_list);
2325
2326         mutex_lock(&vm->update_gmmu_lock);
2327
2328         /* mark that we need to use sparse mappings here */
2329         if (args->flags & NVHOST_AS_ALLOC_SPACE_FLAGS_SPARSE) {
2330                 err = gk20a_vm_put_empty(vm, vaddr_start, args->pages,
2331                                          pgsz_idx);
2332                 if (err) {
2333                         mutex_unlock(&vm->update_gmmu_lock);
2334                         vma->free(vma, start_page_nr, args->pages);
2335                         kfree(va_node);
2336                         goto clean_up;
2337                 }
2338
2339                 va_node->sparse = true;
2340         }
2341         list_add_tail(&va_node->reserved_va_list, &vm->reserved_va_list);
2342
2343         mutex_unlock(&vm->update_gmmu_lock);
2344
2345         args->o_a.offset = vaddr_start;
2346
2347 clean_up:
2348         return err;
2349 }
2350
2351 int gk20a_vm_free_space(struct gk20a_as_share *as_share,
2352                         struct nvhost_as_free_space_args *args)
2353 {
2354         int err = -ENOMEM;
2355         int pgsz_idx;
2356         u32 start_page_nr;
2357         struct gk20a_allocator *vma;
2358         struct vm_gk20a *vm = as_share->vm;
2359         struct vm_reserved_va_node *va_node;
2360
2361         gk20a_dbg_fn("pgsz=0x%x nr_pages=0x%x o/a=0x%llx", args->page_size,
2362                         args->pages, args->offset);
2363
2364         /* determine pagesz idx */
2365         for (pgsz_idx = gmmu_page_size_small;
2366              pgsz_idx < gmmu_nr_page_sizes;
2367              pgsz_idx++) {
2368                 if (gmmu_page_sizes[pgsz_idx] == args->page_size)
2369                         break;
2370         }
2371
2372         if (pgsz_idx >= gmmu_nr_page_sizes) {
2373                 err = -EINVAL;
2374                 goto clean_up;
2375         }
2376
2377         start_page_nr = (u32)(args->offset >>
2378                               gmmu_page_shifts[pgsz_idx]);
2379
2380         vma = &vm->vma[pgsz_idx];
2381         err = vma->free(vma, start_page_nr, args->pages);
2382
2383         if (err)
2384                 goto clean_up;
2385
2386         mutex_lock(&vm->update_gmmu_lock);
2387         va_node = addr_to_reservation(vm, args->offset);
2388         if (va_node) {
2389                 struct mapped_buffer_node *buffer;
2390
2391                 /* there is no need to unallocate the buffers in va. Just
2392                  * convert them into normal buffers */
2393
2394                 list_for_each_entry(buffer,
2395                         &va_node->va_buffers_list, va_buffers_list)
2396                         list_del_init(&buffer->va_buffers_list);
2397
2398                 list_del(&va_node->reserved_va_list);
2399
2400                 /* if this was a sparse mapping, free the va */
2401                 if (va_node->sparse)
2402                         __locked_gmmu_unmap(vm,
2403                                 va_node->vaddr_start,
2404                                 va_node->size,
2405                                 va_node->pgsz_idx,
2406                                 false,
2407                                 gk20a_mem_flag_none);
2408                 kfree(va_node);
2409         }
2410         mutex_unlock(&vm->update_gmmu_lock);
2411
2412 clean_up:
2413         return err;
2414 }
2415
2416 int gk20a_vm_bind_channel(struct gk20a_as_share *as_share,
2417                           struct channel_gk20a *ch)
2418 {
2419         int err = 0;
2420         struct vm_gk20a *vm = as_share->vm;
2421
2422         gk20a_dbg_fn("");
2423
2424         ch->vm = vm;
2425         err = channel_gk20a_commit_va(ch);
2426         if (err)
2427                 ch->vm = 0;
2428
2429         return err;
2430 }
2431
2432 int gk20a_dmabuf_alloc_drvdata(struct dma_buf *dmabuf, struct device *dev)
2433 {
2434         struct gk20a_dmabuf_priv *priv;
2435         static DEFINE_MUTEX(priv_lock);
2436
2437         priv = dma_buf_get_drvdata(dmabuf, dev);
2438         if (likely(priv))
2439                 return 0;
2440
2441         mutex_lock(&priv_lock);
2442         priv = dma_buf_get_drvdata(dmabuf, dev);
2443         if (priv)
2444                 goto priv_exist_or_err;
2445         priv = kzalloc(sizeof(*priv), GFP_KERNEL);
2446         if (!priv) {
2447                 priv = ERR_PTR(-ENOMEM);
2448                 goto priv_exist_or_err;
2449         }
2450         mutex_init(&priv->lock);
2451         dma_buf_set_drvdata(dmabuf, dev, priv, gk20a_mm_delete_priv);
2452 priv_exist_or_err:
2453         mutex_unlock(&priv_lock);
2454         if (IS_ERR(priv))
2455                 return -ENOMEM;
2456
2457         return 0;
2458 }
2459
2460
2461 static int gk20a_dmabuf_get_kind(struct dma_buf *dmabuf)
2462 {
2463         int kind = 0;
2464 #ifdef CONFIG_TEGRA_NVMAP
2465         int err;
2466         u64 nvmap_param;
2467
2468         err = nvmap_get_dmabuf_param(dmabuf, NVMAP_HANDLE_PARAM_KIND,
2469                                      &nvmap_param);
2470         kind = err ? kind : nvmap_param;
2471 #endif
2472         return kind;
2473 }
2474
2475 int gk20a_vm_map_buffer(struct gk20a_as_share *as_share,
2476                         int dmabuf_fd,
2477                         u64 *offset_align,
2478                         u32 flags, /*NVHOST_AS_MAP_BUFFER_FLAGS_*/
2479                         int kind,
2480                         u64 buffer_offset,
2481                         u64 mapping_size)
2482 {
2483         int err = 0;
2484         struct vm_gk20a *vm = as_share->vm;
2485         struct dma_buf *dmabuf;
2486         u64 ret_va;
2487
2488         gk20a_dbg_fn("");
2489
2490         /* get ref to the mem handle (released on unmap_locked) */
2491         dmabuf = dma_buf_get(dmabuf_fd);
2492         if (IS_ERR(dmabuf))
2493                 return PTR_ERR(dmabuf);
2494
2495         if (!dmabuf)
2496                 return 0;
2497
2498         err = gk20a_dmabuf_alloc_drvdata(dmabuf, dev_from_vm(vm));
2499         if (err) {
2500                 dma_buf_put(dmabuf);
2501                 return err;
2502         }
2503
2504         if (kind == -1)
2505                 kind = gk20a_dmabuf_get_kind(dmabuf);
2506
2507         ret_va = gk20a_vm_map(vm, dmabuf, *offset_align,
2508                         flags, kind, NULL, true,
2509                         gk20a_mem_flag_none,
2510                         buffer_offset,
2511                         mapping_size);
2512
2513         *offset_align = ret_va;
2514         if (!ret_va) {
2515                 dma_buf_put(dmabuf);
2516                 err = -EINVAL;
2517         }
2518
2519         return err;
2520 }
2521
2522 int gk20a_vm_unmap_buffer(struct gk20a_as_share *as_share, u64 offset)
2523 {
2524         struct vm_gk20a *vm = as_share->vm;
2525
2526         gk20a_dbg_fn("");
2527
2528         gk20a_vm_unmap_user(vm, offset);
2529         return 0;
2530 }
2531
2532 int gk20a_init_bar1_vm(struct mm_gk20a *mm)
2533 {
2534         int err;
2535         phys_addr_t inst_pa;
2536         void *inst_ptr;
2537         struct vm_gk20a *vm = &mm->bar1.vm;
2538         struct gk20a *g = gk20a_from_mm(mm);
2539         struct device *d = dev_from_gk20a(g);
2540         struct inst_desc *inst_block = &mm->bar1.inst_block;
2541         u64 pde_addr;
2542         u32 pde_addr_lo;
2543         u32 pde_addr_hi;
2544         dma_addr_t iova;
2545
2546         vm->mm = mm;
2547
2548         mm->bar1.aperture_size = bar1_aperture_size_mb_gk20a() << 20;
2549
2550         gk20a_dbg_info("bar1 vm size = 0x%x", mm->bar1.aperture_size);
2551
2552         vm->va_start = mm->pde_stride * 1;
2553         vm->va_limit = mm->bar1.aperture_size;
2554
2555         {
2556                 u32 pde_lo, pde_hi;
2557                 pde_range_from_vaddr_range(vm,
2558                                            0, vm->va_limit-1,
2559                                            &pde_lo, &pde_hi);
2560                 vm->pdes.num_pdes = pde_hi + 1;
2561         }
2562
2563         /* bar1 is likely only to ever use/need small page sizes. */
2564         /* But just in case, for now... arrange for both.*/
2565         vm->pdes.ptes[gmmu_page_size_small] =
2566                 kzalloc(sizeof(struct page_table_gk20a) *
2567                         vm->pdes.num_pdes, GFP_KERNEL);
2568
2569         vm->pdes.ptes[gmmu_page_size_big] =
2570                 kzalloc(sizeof(struct page_table_gk20a) *
2571                         vm->pdes.num_pdes, GFP_KERNEL);
2572
2573         if (!(vm->pdes.ptes[gmmu_page_size_small] &&
2574               vm->pdes.ptes[gmmu_page_size_big]))
2575                 return -ENOMEM;
2576
2577         gk20a_dbg_info("init space for bar1 va_limit=0x%llx num_pdes=%d",
2578                    vm->va_limit, vm->pdes.num_pdes);
2579
2580
2581         /* allocate the page table directory */
2582         err = alloc_gmmu_pages(vm, 0, &vm->pdes.ref,
2583                                &vm->pdes.sgt, &vm->pdes.size);
2584         if (err)
2585                 goto clean_up;
2586
2587         err = map_gmmu_pages(vm->pdes.ref, vm->pdes.sgt, &vm->pdes.kv,
2588                              vm->pdes.size);
2589         if (err) {
2590                 free_gmmu_pages(vm, vm->pdes.ref, vm->pdes.sgt, 0,
2591                                         vm->pdes.size);
2592                 goto clean_up;
2593         }
2594         gk20a_dbg(gpu_dbg_pte, "bar 1 pdes.kv = 0x%p, pdes.phys = 0x%llx",
2595                         vm->pdes.kv, gk20a_mm_iova_addr(vm->pdes.sgt->sgl));
2596         /* we could release vm->pdes.kv but it's only one page... */
2597
2598         pde_addr = gk20a_mm_iova_addr(vm->pdes.sgt->sgl);
2599         pde_addr_lo = u64_lo32(pde_addr >> 12);
2600         pde_addr_hi = u64_hi32(pde_addr);
2601
2602         gk20a_dbg_info("pde pa=0x%llx pde_addr_lo=0x%x pde_addr_hi=0x%x",
2603                 (u64)gk20a_mm_iova_addr(vm->pdes.sgt->sgl),
2604                 pde_addr_lo, pde_addr_hi);
2605
2606         /* allocate instance mem for bar1 */
2607         inst_block->size = ram_in_alloc_size_v();
2608         inst_block->cpuva = dma_alloc_coherent(d, inst_block->size,
2609                                 &iova, GFP_KERNEL);
2610         if (!inst_block->cpuva) {
2611                 gk20a_err(d, "%s: memory allocation failed\n", __func__);
2612                 err = -ENOMEM;
2613                 goto clean_up;
2614         }
2615
2616         inst_block->iova = iova;
2617         inst_block->cpu_pa = gk20a_get_phys_from_iova(d, inst_block->iova);
2618         if (!inst_block->cpu_pa) {
2619                 gk20a_err(d, "%s: failed to get phys address\n", __func__);
2620                 err = -ENOMEM;
2621                 goto clean_up;
2622         }
2623
2624         inst_pa = inst_block->cpu_pa;
2625         inst_ptr = inst_block->cpuva;
2626
2627         gk20a_dbg_info("bar1 inst block physical phys = 0x%llx, kv = 0x%p",
2628                 (u64)inst_pa, inst_ptr);
2629
2630         memset(inst_ptr, 0, ram_fc_size_val_v());
2631
2632         gk20a_mem_wr32(inst_ptr, ram_in_page_dir_base_lo_w(),
2633                 ram_in_page_dir_base_target_vid_mem_f() |
2634                 ram_in_page_dir_base_vol_true_f() |
2635                 ram_in_page_dir_base_lo_f(pde_addr_lo));
2636
2637         gk20a_mem_wr32(inst_ptr, ram_in_page_dir_base_hi_w(),
2638                 ram_in_page_dir_base_hi_f(pde_addr_hi));
2639
2640         gk20a_mem_wr32(inst_ptr, ram_in_adr_limit_lo_w(),
2641                  u64_lo32(vm->va_limit) | 0xFFF);
2642
2643         gk20a_mem_wr32(inst_ptr, ram_in_adr_limit_hi_w(),
2644                 ram_in_adr_limit_hi_f(u64_hi32(vm->va_limit)));
2645
2646         gk20a_dbg_info("bar1 inst block ptr: %08llx",  (u64)inst_pa);
2647         gk20a_allocator_init(&vm->vma[gmmu_page_size_small], "gk20a_bar1",
2648                               1,/*start*/
2649                               (vm->va_limit >> 12) - 1 /* length*/,
2650                               1); /* align */
2651         /* initialize just in case we try to use it anyway */
2652         gk20a_allocator_init(&vm->vma[gmmu_page_size_big], "gk20a_bar1-unused",
2653                               0x0badc0de, /* start */
2654                               1, /* length */
2655                               1); /* align */
2656
2657         vm->mapped_buffers = RB_ROOT;
2658
2659         mutex_init(&vm->update_gmmu_lock);
2660         kref_init(&vm->ref);
2661         INIT_LIST_HEAD(&vm->reserved_va_list);
2662
2663         return 0;
2664
2665 clean_up:
2666         /* free, etc */
2667         if (inst_block->cpuva)
2668                 dma_free_coherent(d, inst_block->size,
2669                         inst_block->cpuva, inst_block->iova);
2670         inst_block->cpuva = NULL;
2671         inst_block->iova = 0;
2672         return err;
2673 }
2674
2675 /* pmu vm, share channel_vm interfaces */
2676 int gk20a_init_pmu_vm(struct mm_gk20a *mm)
2677 {
2678         int err;
2679         phys_addr_t inst_pa;
2680         void *inst_ptr;
2681         struct vm_gk20a *vm = &mm->pmu.vm;
2682         struct gk20a *g = gk20a_from_mm(mm);
2683         struct device *d = dev_from_gk20a(g);
2684         struct inst_desc *inst_block = &mm->pmu.inst_block;
2685         u64 pde_addr;
2686         u32 pde_addr_lo;
2687         u32 pde_addr_hi;
2688         dma_addr_t iova;
2689
2690         vm->mm = mm;
2691
2692         mm->pmu.aperture_size = GK20A_PMU_VA_SIZE;
2693
2694         gk20a_dbg_info("pmu vm size = 0x%x", mm->pmu.aperture_size);
2695
2696         vm->va_start  = GK20A_PMU_VA_START;
2697         vm->va_limit  = vm->va_start + mm->pmu.aperture_size;
2698
2699         {
2700                 u32 pde_lo, pde_hi;
2701                 pde_range_from_vaddr_range(vm,
2702                                            0, vm->va_limit-1,
2703                                            &pde_lo, &pde_hi);
2704                 vm->pdes.num_pdes = pde_hi + 1;
2705         }
2706
2707         /* The pmu is likely only to ever use/need small page sizes. */
2708         /* But just in case, for now... arrange for both.*/
2709         vm->pdes.ptes[gmmu_page_size_small] =
2710                 kzalloc(sizeof(struct page_table_gk20a) *
2711                         vm->pdes.num_pdes, GFP_KERNEL);
2712
2713         vm->pdes.ptes[gmmu_page_size_big] =
2714                 kzalloc(sizeof(struct page_table_gk20a) *
2715                         vm->pdes.num_pdes, GFP_KERNEL);
2716
2717         if (!(vm->pdes.ptes[gmmu_page_size_small] &&
2718               vm->pdes.ptes[gmmu_page_size_big]))
2719                 return -ENOMEM;
2720
2721         gk20a_dbg_info("init space for pmu va_limit=0x%llx num_pdes=%d",
2722                    vm->va_limit, vm->pdes.num_pdes);
2723
2724         /* allocate the page table directory */
2725         err = alloc_gmmu_pages(vm, 0, &vm->pdes.ref,
2726                                &vm->pdes.sgt, &vm->pdes.size);
2727         if (err)
2728                 goto clean_up;
2729
2730         err = map_gmmu_pages(vm->pdes.ref, vm->pdes.sgt, &vm->pdes.kv,
2731                              vm->pdes.size);
2732         if (err) {
2733                 free_gmmu_pages(vm, vm->pdes.ref, vm->pdes.sgt, 0,
2734                                         vm->pdes.size);
2735                 goto clean_up;
2736         }
2737         gk20a_dbg_info("pmu pdes phys @ 0x%llx",
2738                         (u64)gk20a_mm_iova_addr(vm->pdes.sgt->sgl));
2739         /* we could release vm->pdes.kv but it's only one page... */
2740
2741         pde_addr = gk20a_mm_iova_addr(vm->pdes.sgt->sgl);
2742         pde_addr_lo = u64_lo32(pde_addr >> 12);
2743         pde_addr_hi = u64_hi32(pde_addr);
2744
2745         gk20a_dbg_info("pde pa=0x%llx pde_addr_lo=0x%x pde_addr_hi=0x%x",
2746                         (u64)pde_addr, pde_addr_lo, pde_addr_hi);
2747
2748         /* allocate instance mem for pmu */
2749         inst_block->size = GK20A_PMU_INST_SIZE;
2750         inst_block->cpuva = dma_alloc_coherent(d, inst_block->size,
2751                                 &iova, GFP_KERNEL);
2752         if (!inst_block->cpuva) {
2753                 gk20a_err(d, "%s: memory allocation failed\n", __func__);
2754                 err = -ENOMEM;
2755                 goto clean_up;
2756         }
2757
2758         inst_block->iova = iova;
2759         inst_block->cpu_pa = gk20a_get_phys_from_iova(d, inst_block->iova);
2760         if (!inst_block->cpu_pa) {
2761                 gk20a_err(d, "%s: failed to get phys address\n", __func__);
2762                 err = -ENOMEM;
2763                 goto clean_up;
2764         }
2765
2766         inst_pa = inst_block->cpu_pa;
2767         inst_ptr = inst_block->cpuva;
2768
2769         gk20a_dbg_info("pmu inst block physical addr: 0x%llx", (u64)inst_pa);
2770
2771         memset(inst_ptr, 0, GK20A_PMU_INST_SIZE);
2772
2773         gk20a_mem_wr32(inst_ptr, ram_in_page_dir_base_lo_w(),
2774                 ram_in_page_dir_base_target_vid_mem_f() |
2775                 ram_in_page_dir_base_vol_true_f() |
2776                 ram_in_page_dir_base_lo_f(pde_addr_lo));
2777
2778         gk20a_mem_wr32(inst_ptr, ram_in_page_dir_base_hi_w(),
2779                 ram_in_page_dir_base_hi_f(pde_addr_hi));
2780
2781         gk20a_mem_wr32(inst_ptr, ram_in_adr_limit_lo_w(),
2782                  u64_lo32(vm->va_limit) | 0xFFF);
2783
2784         gk20a_mem_wr32(inst_ptr, ram_in_adr_limit_hi_w(),
2785                 ram_in_adr_limit_hi_f(u64_hi32(vm->va_limit)));
2786
2787         gk20a_allocator_init(&vm->vma[gmmu_page_size_small], "gk20a_pmu",
2788                               (vm->va_start >> 12), /* start */
2789                               (vm->va_limit - vm->va_start) >> 12, /*length*/
2790                               1); /* align */
2791         /* initialize just in case we try to use it anyway */
2792         gk20a_allocator_init(&vm->vma[gmmu_page_size_big], "gk20a_pmu-unused",
2793                               0x0badc0de, /* start */
2794                               1, /* length */
2795                               1); /* align */
2796
2797
2798         vm->mapped_buffers = RB_ROOT;
2799
2800         mutex_init(&vm->update_gmmu_lock);
2801         kref_init(&vm->ref);
2802         INIT_LIST_HEAD(&vm->reserved_va_list);
2803
2804         return 0;
2805
2806 clean_up:
2807         /* free, etc */
2808         if (inst_block->cpuva)
2809                 dma_free_coherent(d, inst_block->size,
2810                         inst_block->cpuva, inst_block->iova);
2811         inst_block->cpuva = NULL;
2812         inst_block->iova = 0;
2813         return err;
2814 }
2815
2816 int gk20a_mm_fb_flush(struct gk20a *g)
2817 {
2818         struct mm_gk20a *mm = &g->mm;
2819         u32 data;
2820         s32 retry = 100;
2821         int ret = 0;
2822
2823         gk20a_dbg_fn("");
2824
2825         mutex_lock(&mm->l2_op_lock);
2826
2827         /* Make sure all previous writes are committed to the L2. There's no
2828            guarantee that writes are to DRAM. This will be a sysmembar internal
2829            to the L2. */
2830         gk20a_writel(g, flush_fb_flush_r(),
2831                 flush_fb_flush_pending_busy_f());
2832
2833         do {
2834                 data = gk20a_readl(g, flush_fb_flush_r());
2835
2836                 if (flush_fb_flush_outstanding_v(data) ==
2837                         flush_fb_flush_outstanding_true_v() ||
2838                     flush_fb_flush_pending_v(data) ==
2839                         flush_fb_flush_pending_busy_v()) {
2840                                 gk20a_dbg_info("fb_flush 0x%x", data);
2841                                 retry--;
2842                                 usleep_range(20, 40);
2843                 } else
2844                         break;
2845         } while (retry >= 0 || !tegra_platform_is_silicon());
2846
2847         if (retry < 0) {
2848                 gk20a_warn(dev_from_gk20a(g),
2849                         "fb_flush too many retries");
2850                 ret = -EBUSY;
2851         }
2852
2853         mutex_unlock(&mm->l2_op_lock);
2854
2855         return ret;
2856 }
2857
2858 static void gk20a_mm_l2_invalidate_locked(struct gk20a *g)
2859 {
2860         u32 data;
2861         s32 retry = 200;
2862
2863         /* Invalidate any clean lines from the L2 so subsequent reads go to
2864            DRAM. Dirty lines are not affected by this operation. */
2865         gk20a_writel(g, flush_l2_system_invalidate_r(),
2866                 flush_l2_system_invalidate_pending_busy_f());
2867
2868         do {
2869                 data = gk20a_readl(g, flush_l2_system_invalidate_r());
2870
2871                 if (flush_l2_system_invalidate_outstanding_v(data) ==
2872                         flush_l2_system_invalidate_outstanding_true_v() ||
2873                     flush_l2_system_invalidate_pending_v(data) ==
2874                         flush_l2_system_invalidate_pending_busy_v()) {
2875                                 gk20a_dbg_info("l2_system_invalidate 0x%x",
2876                                                 data);
2877                                 retry--;
2878                                 usleep_range(20, 40);
2879                 } else
2880                         break;
2881         } while (retry >= 0 || !tegra_platform_is_silicon());
2882
2883         if (retry < 0)
2884                 gk20a_warn(dev_from_gk20a(g),
2885                         "l2_system_invalidate too many retries");
2886 }
2887
2888 void gk20a_mm_l2_invalidate(struct gk20a *g)
2889 {
2890         struct mm_gk20a *mm = &g->mm;
2891         mutex_lock(&mm->l2_op_lock);
2892         gk20a_mm_l2_invalidate_locked(g);
2893         mutex_unlock(&mm->l2_op_lock);
2894 }
2895
2896 void gk20a_mm_l2_flush(struct gk20a *g, bool invalidate)
2897 {
2898         struct mm_gk20a *mm = &g->mm;
2899         u32 data;
2900         s32 retry = 200;
2901
2902         gk20a_dbg_fn("");
2903
2904         mutex_lock(&mm->l2_op_lock);
2905
2906         /* Flush all dirty lines from the L2 to DRAM. Lines are left in the L2
2907            as clean, so subsequent reads might hit in the L2. */
2908         gk20a_writel(g, flush_l2_flush_dirty_r(),
2909                 flush_l2_flush_dirty_pending_busy_f());
2910
2911         do {
2912                 data = gk20a_readl(g, flush_l2_flush_dirty_r());
2913
2914                 if (flush_l2_flush_dirty_outstanding_v(data) ==
2915                         flush_l2_flush_dirty_outstanding_true_v() ||
2916                     flush_l2_flush_dirty_pending_v(data) ==
2917                         flush_l2_flush_dirty_pending_busy_v()) {
2918                                 gk20a_dbg_info("l2_flush_dirty 0x%x", data);
2919                                 retry--;
2920                                 usleep_range(20, 40);
2921                 } else
2922                         break;
2923         } while (retry >= 0 || !tegra_platform_is_silicon());
2924
2925         if (retry < 0)
2926                 gk20a_warn(dev_from_gk20a(g),
2927                         "l2_flush_dirty too many retries");
2928
2929         if (invalidate)
2930                 gk20a_mm_l2_invalidate_locked(g);
2931
2932         mutex_unlock(&mm->l2_op_lock);
2933 }
2934
2935
2936 int gk20a_vm_find_buffer(struct vm_gk20a *vm, u64 gpu_va,
2937                          struct dma_buf **dmabuf,
2938                          u64 *offset)
2939 {
2940         struct mapped_buffer_node *mapped_buffer;
2941
2942         gk20a_dbg_fn("gpu_va=0x%llx", gpu_va);
2943
2944         mutex_lock(&vm->update_gmmu_lock);
2945
2946         mapped_buffer = find_mapped_buffer_range_locked(&vm->mapped_buffers,
2947                                                         gpu_va);
2948         if (!mapped_buffer) {
2949                 mutex_unlock(&vm->update_gmmu_lock);
2950                 return -EINVAL;
2951         }
2952
2953         *dmabuf = mapped_buffer->dmabuf;
2954         *offset = gpu_va - mapped_buffer->addr;
2955
2956         mutex_unlock(&vm->update_gmmu_lock);
2957
2958         return 0;
2959 }
2960
2961 void __gk20a_mm_tlb_invalidate(struct vm_gk20a *vm)
2962 {
2963         struct gk20a *g = gk20a_from_vm(vm);
2964         u32 addr_lo = u64_lo32(gk20a_mm_iova_addr(vm->pdes.sgt->sgl) >> 12);
2965         u32 data;
2966         s32 retry = 200;
2967         static DEFINE_MUTEX(tlb_lock);
2968
2969         gk20a_dbg_fn("");
2970
2971         if (!g->power_on)
2972                 return;
2973
2974         mutex_lock(&tlb_lock);
2975         do {
2976                 data = gk20a_readl(g, fb_mmu_ctrl_r());
2977                 if (fb_mmu_ctrl_pri_fifo_space_v(data) != 0)
2978                         break;
2979                 usleep_range(20, 40);
2980                 retry--;
2981         } while (retry >= 0 || !tegra_platform_is_silicon());
2982
2983         if (retry < 0) {
2984                 gk20a_warn(dev_from_gk20a(g),
2985                         "wait mmu fifo space too many retries");
2986                 goto out;
2987         }
2988
2989         gk20a_writel(g, fb_mmu_invalidate_pdb_r(),
2990                 fb_mmu_invalidate_pdb_addr_f(addr_lo) |
2991                 fb_mmu_invalidate_pdb_aperture_vid_mem_f());
2992
2993         gk20a_writel(g, fb_mmu_invalidate_r(),
2994                 fb_mmu_invalidate_all_va_true_f() |
2995                 fb_mmu_invalidate_trigger_true_f());
2996
2997         do {
2998                 data = gk20a_readl(g, fb_mmu_ctrl_r());
2999                 if (fb_mmu_ctrl_pri_fifo_empty_v(data) !=
3000                         fb_mmu_ctrl_pri_fifo_empty_false_f())
3001                         break;
3002                 retry--;
3003                 usleep_range(20, 40);
3004         } while (retry >= 0 || !tegra_platform_is_silicon());
3005
3006         if (retry < 0)
3007                 gk20a_warn(dev_from_gk20a(g),
3008                         "mmu invalidate too many retries");
3009
3010 out:
3011         mutex_unlock(&tlb_lock);
3012 }
3013
3014 void gk20a_mm_tlb_invalidate(struct vm_gk20a *vm)
3015 {
3016         struct gk20a *g = gk20a_from_vm(vm);
3017
3018         gk20a_dbg_fn("");
3019
3020         /* pagetables are considered sw states which are preserved after
3021            prepare_poweroff. When gk20a deinit releases those pagetables,
3022            common code in vm unmap path calls tlb invalidate that touches
3023            hw. Use the power_on flag to skip tlb invalidation when gpu
3024            power is turned off */
3025
3026         if (!g->power_on)
3027                 return;
3028
3029         /* No need to invalidate if tlb is clean */
3030         mutex_lock(&vm->update_gmmu_lock);
3031         if (!vm->tlb_dirty) {
3032                 mutex_unlock(&vm->update_gmmu_lock);
3033                 return;
3034         }
3035         vm->tlb_dirty = false;
3036         mutex_unlock(&vm->update_gmmu_lock);
3037
3038         __gk20a_mm_tlb_invalidate(vm);
3039 }
3040
3041 int gk20a_mm_suspend(struct gk20a *g)
3042 {
3043         gk20a_dbg_fn("");
3044
3045         g->ops.ltc.elpg_flush(g);
3046
3047         gk20a_dbg_fn("done");
3048         return 0;
3049 }
3050
3051 void gk20a_mm_ltc_isr(struct gk20a *g)
3052 {
3053         u32 intr;
3054
3055         intr = gk20a_readl(g, ltc_ltc0_ltss_intr_r());
3056         gk20a_err(dev_from_gk20a(g), "ltc: %08x\n", intr);
3057         gk20a_writel(g, ltc_ltc0_ltss_intr_r(), intr);
3058 }
3059
3060 bool gk20a_mm_mmu_debug_mode_enabled(struct gk20a *g)
3061 {
3062         u32 debug_ctrl = gk20a_readl(g, fb_mmu_debug_ctrl_r());
3063         return fb_mmu_debug_ctrl_debug_v(debug_ctrl) ==
3064                 fb_mmu_debug_ctrl_debug_enabled_v();
3065 }