gpu: nvgpu: remove unused vpr refetch functions
[linux-3.10.git] / drivers / gpu / nvgpu / gk20a / mm_gk20a.c
1 /*
2  * drivers/video/tegra/host/gk20a/mm_gk20a.c
3  *
4  * GK20A memory management
5  *
6  * Copyright (c) 2011-2014, NVIDIA CORPORATION.  All rights reserved.
7  *
8  * This program is free software; you can redistribute it and/or modify it
9  * under the terms and conditions of the GNU General Public License,
10  * version 2, as published by the Free Software Foundation.
11  *
12  * This program is distributed in the hope it will be useful, but WITHOUT
13  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
15  * more details.
16  *
17  * You should have received a copy of the GNU General Public License along with
18  * this program; if not, write to the Free Software Foundation, Inc.,
19  * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
20  */
21
22 #include <linux/delay.h>
23 #include <linux/highmem.h>
24 #include <linux/log2.h>
25 #include <linux/nvhost.h>
26 #include <linux/pm_runtime.h>
27 #include <linux/scatterlist.h>
28 #include <linux/nvmap.h>
29 #include <linux/tegra-soc.h>
30 #include <linux/vmalloc.h>
31 #include <linux/dma-buf.h>
32 #include <asm/cacheflush.h>
33
34 #include "gk20a.h"
35 #include "mm_gk20a.h"
36 #include "hw_gmmu_gk20a.h"
37 #include "hw_fb_gk20a.h"
38 #include "hw_bus_gk20a.h"
39 #include "hw_ram_gk20a.h"
40 #include "hw_mc_gk20a.h"
41 #include "hw_flush_gk20a.h"
42 #include "hw_ltc_gk20a.h"
43
44 #include "kind_gk20a.h"
45
46 #ifdef CONFIG_ARM64
47 #define outer_flush_range(a, b)
48 #define __cpuc_flush_dcache_area __flush_dcache_area
49 #endif
50
51 /*
52  * GPU mapping life cycle
53  * ======================
54  *
55  * Kernel mappings
56  * ---------------
57  *
58  * Kernel mappings are created through vm.map(..., false):
59  *
60  *  - Mappings to the same allocations are reused and refcounted.
61  *  - This path does not support deferred unmapping (i.e. kernel must wait for
62  *    all hw operations on the buffer to complete before unmapping).
63  *  - References to dmabuf are owned and managed by the (kernel) clients of
64  *    the gk20a_vm layer.
65  *
66  *
67  * User space mappings
68  * -------------------
69  *
70  * User space mappings are created through as.map_buffer -> vm.map(..., true):
71  *
72  *  - Mappings to the same allocations are reused and refcounted.
73  *  - This path supports deferred unmapping (i.e. we delay the actual unmapping
74  *    until all hw operations have completed).
75  *  - References to dmabuf are owned and managed by the vm_gk20a
76  *    layer itself. vm.map acquires these refs, and sets
77  *    mapped_buffer->own_mem_ref to record that we must release the refs when we
78  *    actually unmap.
79  *
80  */
81
82 static inline int vm_aspace_id(struct vm_gk20a *vm)
83 {
84         /* -1 is bar1 or pmu, etc. */
85         return vm->as_share ? vm->as_share->id : -1;
86 }
87 static inline u32 hi32(u64 f)
88 {
89         return (u32)(f >> 32);
90 }
91 static inline u32 lo32(u64 f)
92 {
93         return (u32)(f & 0xffffffff);
94 }
95
96 #define FLUSH_CPU_DCACHE(va, pa, size)  \
97         do {    \
98                 __cpuc_flush_dcache_area((void *)(va), (size_t)(size)); \
99                 outer_flush_range(pa, pa + (size_t)(size));             \
100         } while (0)
101
102 static void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer);
103 static struct mapped_buffer_node *find_mapped_buffer_locked(
104                                         struct rb_root *root, u64 addr);
105 static struct mapped_buffer_node *find_mapped_buffer_reverse_locked(
106                                 struct rb_root *root, struct dma_buf *dmabuf,
107                                 u32 kind);
108 static int update_gmmu_ptes_locked(struct vm_gk20a *vm,
109                                    enum gmmu_pgsz_gk20a pgsz_idx,
110                                    struct sg_table *sgt,
111                                    u64 first_vaddr, u64 last_vaddr,
112                                    u8 kind_v, u32 ctag_offset, bool cacheable,
113                                    int rw_flag);
114 static void update_gmmu_pde_locked(struct vm_gk20a *vm, u32 i);
115 static void gk20a_vm_remove_support(struct vm_gk20a *vm);
116
117
118 /* note: keep the page sizes sorted lowest to highest here */
119 static const u32 gmmu_page_sizes[gmmu_nr_page_sizes] = { SZ_4K, SZ_128K };
120 static const u32 gmmu_page_shifts[gmmu_nr_page_sizes] = { 12, 17 };
121 static const u64 gmmu_page_offset_masks[gmmu_nr_page_sizes] = { 0xfffLL,
122                                                                 0x1ffffLL };
123 static const u64 gmmu_page_masks[gmmu_nr_page_sizes] = { ~0xfffLL, ~0x1ffffLL };
124
125 struct gk20a_comptags {
126         u32 offset;
127         u32 lines;
128 };
129
130 struct gk20a_dmabuf_priv {
131         struct mutex lock;
132
133         struct gk20a_allocator *comptag_allocator;
134         struct gk20a_comptags comptags;
135
136         struct dma_buf_attachment *attach;
137         struct sg_table *sgt;
138
139         int pin_count;
140 };
141
142 static void gk20a_mm_delete_priv(void *_priv)
143 {
144         struct gk20a_dmabuf_priv *priv = _priv;
145         if (!priv)
146                 return;
147
148         if (priv->comptags.lines) {
149                 BUG_ON(!priv->comptag_allocator);
150                 priv->comptag_allocator->free(priv->comptag_allocator,
151                                               priv->comptags.offset,
152                                               priv->comptags.lines);
153         }
154
155         kfree(priv);
156 }
157
158 struct sg_table *gk20a_mm_pin(struct device *dev, struct dma_buf *dmabuf)
159 {
160         struct gk20a_dmabuf_priv *priv;
161
162         priv = dma_buf_get_drvdata(dmabuf, dev);
163         if (WARN_ON(!priv))
164                 return ERR_PTR(-EINVAL);
165
166         mutex_lock(&priv->lock);
167
168         if (priv->pin_count == 0) {
169                 priv->attach = dma_buf_attach(dmabuf, dev);
170                 if (IS_ERR(priv->attach)) {
171                         mutex_unlock(&priv->lock);
172                         return (struct sg_table *)priv->attach;
173                 }
174
175                 priv->sgt = dma_buf_map_attachment(priv->attach,
176                                                    DMA_BIDIRECTIONAL);
177                 if (IS_ERR(priv->sgt)) {
178                         dma_buf_detach(dmabuf, priv->attach);
179                         mutex_unlock(&priv->lock);
180                         return priv->sgt;
181                 }
182         }
183
184         priv->pin_count++;
185         mutex_unlock(&priv->lock);
186         return priv->sgt;
187 }
188
189 void gk20a_mm_unpin(struct device *dev, struct dma_buf *dmabuf,
190                     struct sg_table *sgt)
191 {
192         struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(dmabuf, dev);
193         dma_addr_t dma_addr;
194
195         if (IS_ERR(priv) || !priv)
196                 return;
197
198         mutex_lock(&priv->lock);
199         WARN_ON(priv->sgt != sgt);
200         priv->pin_count--;
201         WARN_ON(priv->pin_count < 0);
202         dma_addr = sg_dma_address(priv->sgt->sgl);
203         if (priv->pin_count == 0) {
204                 dma_buf_unmap_attachment(priv->attach, priv->sgt,
205                                          DMA_BIDIRECTIONAL);
206                 dma_buf_detach(dmabuf, priv->attach);
207         }
208         mutex_unlock(&priv->lock);
209 }
210
211
212 static void gk20a_get_comptags(struct device *dev,
213                                struct dma_buf *dmabuf,
214                                struct gk20a_comptags *comptags)
215 {
216         struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(dmabuf, dev);
217
218         if (!comptags)
219                 return;
220
221         if (!priv) {
222                 comptags->lines = 0;
223                 comptags->offset = 0;
224                 return;
225         }
226
227         *comptags = priv->comptags;
228 }
229
230 static int gk20a_alloc_comptags(struct device *dev,
231                                 struct dma_buf *dmabuf,
232                                 struct gk20a_allocator *allocator,
233                                 int lines)
234 {
235         struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(dmabuf, dev);
236         u32 offset = 0;
237         int err;
238
239         if (!priv)
240                 return -ENOSYS;
241
242         if (!lines)
243                 return -EINVAL;
244
245         /* store the allocator so we can use it when we free the ctags */
246         priv->comptag_allocator = allocator;
247         err = allocator->alloc(allocator, &offset, lines);
248         if (!err) {
249                 priv->comptags.lines = lines;
250                 priv->comptags.offset = offset;
251         }
252         return err;
253 }
254
255
256
257
258 static int gk20a_init_mm_reset_enable_hw(struct gk20a *g)
259 {
260         gk20a_dbg_fn("");
261         if (g->ops.fb.reset)
262                 g->ops.fb.reset(g);
263
264         if (g->ops.fb.init_fs_state)
265                 g->ops.fb.init_fs_state(g);
266
267         return 0;
268 }
269
270 void gk20a_remove_mm_support(struct mm_gk20a *mm)
271 {
272         struct gk20a *g = mm->g;
273         struct device *d = dev_from_gk20a(g);
274         struct vm_gk20a *vm = &mm->bar1.vm;
275         struct inst_desc *inst_block = &mm->bar1.inst_block;
276
277         gk20a_dbg_fn("");
278
279         if (inst_block->cpuva)
280                 dma_free_coherent(d, inst_block->size,
281                         inst_block->cpuva, inst_block->iova);
282         inst_block->cpuva = NULL;
283         inst_block->iova = 0;
284
285         gk20a_vm_remove_support(vm);
286 }
287
288 int gk20a_init_mm_setup_sw(struct gk20a *g)
289 {
290         struct mm_gk20a *mm = &g->mm;
291         int i;
292
293         gk20a_dbg_fn("");
294
295         if (mm->sw_ready) {
296                 gk20a_dbg_fn("skip init");
297                 return 0;
298         }
299
300         mm->g = g;
301         mutex_init(&mm->l2_op_lock);
302         mm->big_page_size = gmmu_page_sizes[gmmu_page_size_big];
303         mm->compression_page_size = gmmu_page_sizes[gmmu_page_size_big];
304         mm->pde_stride    = mm->big_page_size << 10;
305         mm->pde_stride_shift = ilog2(mm->pde_stride);
306         BUG_ON(mm->pde_stride_shift > 31); /* we have assumptions about this */
307
308         for (i = 0; i < ARRAY_SIZE(gmmu_page_sizes); i++) {
309
310                 u32 num_ptes, pte_space, num_pages;
311
312                 /* assuming "full" page tables */
313                 num_ptes = mm->pde_stride / gmmu_page_sizes[i];
314
315                 pte_space = num_ptes * gmmu_pte__size_v();
316                 /* allocate whole pages */
317                 pte_space = roundup(pte_space, PAGE_SIZE);
318
319                 num_pages = pte_space / PAGE_SIZE;
320                 /* make sure "order" is viable */
321                 BUG_ON(!is_power_of_2(num_pages));
322
323                 mm->page_table_sizing[i].num_ptes = num_ptes;
324                 mm->page_table_sizing[i].order = ilog2(num_pages);
325         }
326
327         /*TBD: make channel vm size configurable */
328         mm->channel.size = 1ULL << NV_GMMU_VA_RANGE;
329
330         gk20a_dbg_info("channel vm size: %dMB", (int)(mm->channel.size >> 20));
331
332         gk20a_dbg_info("small page-size (%dKB) pte array: %dKB",
333                         gmmu_page_sizes[gmmu_page_size_small] >> 10,
334                         (mm->page_table_sizing[gmmu_page_size_small].num_ptes *
335                          gmmu_pte__size_v()) >> 10);
336
337         gk20a_dbg_info("big page-size (%dKB) pte array: %dKB",
338                         gmmu_page_sizes[gmmu_page_size_big] >> 10,
339                         (mm->page_table_sizing[gmmu_page_size_big].num_ptes *
340                          gmmu_pte__size_v()) >> 10);
341
342
343         gk20a_init_bar1_vm(mm);
344
345         mm->remove_support = gk20a_remove_mm_support;
346         mm->sw_ready = true;
347
348         gk20a_dbg_fn("done");
349         return 0;
350 }
351
352 /* make sure gk20a_init_mm_support is called before */
353 static int gk20a_init_mm_setup_hw(struct gk20a *g)
354 {
355         struct mm_gk20a *mm = &g->mm;
356         struct inst_desc *inst_block = &mm->bar1.inst_block;
357         phys_addr_t inst_pa = inst_block->cpu_pa;
358
359         gk20a_dbg_fn("");
360
361         /* set large page size in fb
362          * note this is very early on, can we defer it ? */
363         {
364                 u32 fb_mmu_ctrl = gk20a_readl(g, fb_mmu_ctrl_r());
365
366                 if (gmmu_page_sizes[gmmu_page_size_big] == SZ_128K)
367                         fb_mmu_ctrl = (fb_mmu_ctrl &
368                                        ~fb_mmu_ctrl_vm_pg_size_f(~0x0)) |
369                                 fb_mmu_ctrl_vm_pg_size_128kb_f();
370                 else
371                         BUG_ON(1); /* no support/testing for larger ones yet */
372
373                 gk20a_writel(g, fb_mmu_ctrl_r(), fb_mmu_ctrl);
374         }
375
376         inst_pa = (u32)(inst_pa >> bar1_instance_block_shift_gk20a());
377         gk20a_dbg_info("bar1 inst block ptr: 0x%08x",  (u32)inst_pa);
378
379         gk20a_writel(g, bus_bar1_block_r(),
380                      bus_bar1_block_target_vid_mem_f() |
381                      bus_bar1_block_mode_virtual_f() |
382                      bus_bar1_block_ptr_f(inst_pa));
383         if (gk20a_mm_fb_flush(g) || gk20a_mm_fb_flush(g))
384                 return -EBUSY;
385
386         gk20a_dbg_fn("done");
387         return 0;
388 }
389
390 int gk20a_init_mm_support(struct gk20a *g)
391 {
392         u32 err;
393
394         err = gk20a_init_mm_reset_enable_hw(g);
395         if (err)
396                 return err;
397
398         err = gk20a_init_mm_setup_sw(g);
399         if (err)
400                 return err;
401
402         err = gk20a_init_mm_setup_hw(g);
403         if (err)
404                 return err;
405
406         return err;
407 }
408
409 #ifdef CONFIG_GK20A_PHYS_PAGE_TABLES
410 static int alloc_gmmu_pages(struct vm_gk20a *vm, u32 order,
411                             void **handle,
412                             struct sg_table **sgt,
413                             size_t *size)
414 {
415         u32 num_pages = 1 << order;
416         u32 len = num_pages * PAGE_SIZE;
417         int err;
418         struct page *pages;
419
420         gk20a_dbg_fn("");
421
422         pages = alloc_pages(GFP_KERNEL, order);
423         if (!pages) {
424                 gk20a_dbg(gpu_dbg_pte, "alloc_pages failed\n");
425                 goto err_out;
426         }
427         *sgt = kzalloc(sizeof(*sgt), GFP_KERNEL);
428         if (!sgt) {
429                 gk20a_dbg(gpu_dbg_pte, "cannot allocate sg table");
430                 goto err_alloced;
431         }
432         err = sg_alloc_table(*sgt, 1, GFP_KERNEL);
433         if (err) {
434                 gk20a_dbg(gpu_dbg_pte, "sg_alloc_table failed\n");
435                 goto err_sg_table;
436         }
437         sg_set_page((*sgt)->sgl, pages, len, 0);
438         *handle = page_address(pages);
439         memset(*handle, 0, len);
440         *size = len;
441         FLUSH_CPU_DCACHE(*handle, sg_phys((*sgt)->sgl), len);
442
443         return 0;
444
445 err_sg_table:
446         kfree(*sgt);
447 err_alloced:
448         __free_pages(pages, order);
449 err_out:
450         return -ENOMEM;
451 }
452
453 static void free_gmmu_pages(struct vm_gk20a *vm, void *handle,
454                             struct sg_table *sgt, u32 order,
455                             size_t size)
456 {
457         gk20a_dbg_fn("");
458         BUG_ON(sgt == NULL);
459         free_pages((unsigned long)handle, order);
460         sg_free_table(sgt);
461         kfree(sgt);
462 }
463
464 static int map_gmmu_pages(void *handle, struct sg_table *sgt,
465                           void **va, size_t size)
466 {
467         FLUSH_CPU_DCACHE(handle, sg_phys(sgt->sgl), sgt->sgl->length);
468         *va = handle;
469         return 0;
470 }
471
472 static void unmap_gmmu_pages(void *handle, struct sg_table *sgt, void *va)
473 {
474         FLUSH_CPU_DCACHE(handle, sg_phys(sgt->sgl), sgt->sgl->length);
475 }
476 #else
477
478 static int alloc_gmmu_pages(struct vm_gk20a *vm, u32 order,
479                             void **handle,
480                             struct sg_table **sgt,
481                             size_t *size)
482 {
483         struct device *d = dev_from_vm(vm);
484         u32 num_pages = 1 << order;
485         u32 len = num_pages * PAGE_SIZE;
486         dma_addr_t iova;
487         DEFINE_DMA_ATTRS(attrs);
488         struct page **pages;
489         void *cpuva;
490         int err = 0;
491
492         gk20a_dbg_fn("");
493
494         *size = len;
495
496         if (IS_ENABLED(CONFIG_ARM64)) {
497                 cpuva = dma_zalloc_coherent(d, len, &iova, GFP_KERNEL);
498                 if (!cpuva) {
499                         gk20a_err(d, "memory allocation failed\n");
500                         goto err_out;
501                 }
502
503                 err = gk20a_get_sgtable(d, sgt, cpuva, iova, len);
504                 if (err) {
505                         gk20a_err(d, "sgt allocation failed\n");
506                         goto err_free;
507                 }
508
509                 *handle = cpuva;
510         } else {
511                 dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, &attrs);
512                 pages = dma_alloc_attrs(d, len, &iova, GFP_KERNEL, &attrs);
513                 if (!pages) {
514                         gk20a_err(d, "memory allocation failed\n");
515                         goto err_out;
516                 }
517
518                 err = gk20a_get_sgtable_from_pages(d, sgt, pages,
519                                         iova, len);
520                 if (err) {
521                         gk20a_err(d, "sgt allocation failed\n");
522                         goto err_free;
523                 }
524
525                 *handle = (void *)pages;
526         }
527
528         return 0;
529
530 err_free:
531         if (IS_ENABLED(CONFIG_ARM64)) {
532                 dma_free_coherent(d, len, handle, iova);
533                 cpuva = NULL;
534         } else {
535                 dma_free_attrs(d, len, pages, iova, &attrs);
536                 pages = NULL;
537         }
538         iova = 0;
539 err_out:
540         return -ENOMEM;
541 }
542
543 static void free_gmmu_pages(struct vm_gk20a *vm, void *handle,
544                             struct sg_table *sgt, u32 order,
545                             size_t size)
546 {
547         struct device *d = dev_from_vm(vm);
548         u64 iova;
549         DEFINE_DMA_ATTRS(attrs);
550         struct page **pages;
551
552         gk20a_dbg_fn("");
553         BUG_ON(sgt == NULL);
554
555         iova = sg_dma_address(sgt->sgl);
556
557         gk20a_free_sgtable(&sgt);
558
559         if (IS_ENABLED(CONFIG_ARM64)) {
560                 dma_free_coherent(d, size, handle, iova);
561         } else {
562                 pages = (struct page **)handle;
563                 dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, &attrs);
564                 dma_free_attrs(d, size, pages, iova, &attrs);
565                 pages = NULL;
566         }
567
568         handle = NULL;
569         iova = 0;
570 }
571
572 static int map_gmmu_pages(void *handle, struct sg_table *sgt,
573                           void **kva, size_t size)
574 {
575         int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
576         struct page **pages;
577         gk20a_dbg_fn("");
578
579         if (IS_ENABLED(CONFIG_ARM64)) {
580                 *kva = handle;
581         } else {
582                 pages = (struct page **)handle;
583                 *kva = vmap(pages, count, 0, pgprot_dmacoherent(PAGE_KERNEL));
584                 if (!(*kva))
585                         return -ENOMEM;
586         }
587
588         return 0;
589 }
590
591 static void unmap_gmmu_pages(void *handle, struct sg_table *sgt, void *va)
592 {
593         gk20a_dbg_fn("");
594
595         if (!IS_ENABLED(CONFIG_ARM64))
596                 vunmap(va);
597         va = NULL;
598 }
599 #endif
600
601 /* allocate a phys contig region big enough for a full
602  * sized gmmu page table for the given gmmu_page_size.
603  * the whole range is zeroed so it's "invalid"/will fault
604  */
605
606 static int zalloc_gmmu_page_table_gk20a(struct vm_gk20a *vm,
607                                         enum gmmu_pgsz_gk20a gmmu_pgsz_idx,
608                                         struct page_table_gk20a *pte)
609 {
610         int err;
611         u32 pte_order;
612         void *handle = NULL;
613         struct sg_table *sgt;
614         size_t size;
615
616         gk20a_dbg_fn("");
617
618         /* allocate enough pages for the table */
619         pte_order = vm->mm->page_table_sizing[gmmu_pgsz_idx].order;
620
621         err = alloc_gmmu_pages(vm, pte_order, &handle, &sgt, &size);
622         if (err)
623                 return err;
624
625         gk20a_dbg(gpu_dbg_pte, "pte = 0x%p, addr=%08llx, size %d",
626                         pte, gk20a_mm_iova_addr(sgt->sgl), pte_order);
627
628         pte->ref = handle;
629         pte->sgt = sgt;
630         pte->size = size;
631
632         return 0;
633 }
634
635 /* given address range (inclusive) determine the pdes crossed */
636 static inline void pde_range_from_vaddr_range(struct vm_gk20a *vm,
637                                               u64 addr_lo, u64 addr_hi,
638                                               u32 *pde_lo, u32 *pde_hi)
639 {
640         *pde_lo = (u32)(addr_lo >> vm->mm->pde_stride_shift);
641         *pde_hi = (u32)(addr_hi >> vm->mm->pde_stride_shift);
642         gk20a_dbg(gpu_dbg_pte, "addr_lo=0x%llx addr_hi=0x%llx pde_ss=%d",
643                    addr_lo, addr_hi, vm->mm->pde_stride_shift);
644         gk20a_dbg(gpu_dbg_pte, "pde_lo=%d pde_hi=%d",
645                    *pde_lo, *pde_hi);
646 }
647
648 static inline u32 *pde_from_index(struct vm_gk20a *vm, u32 i)
649 {
650         return (u32 *) (((u8 *)vm->pdes.kv) + i*gmmu_pde__size_v());
651 }
652
653 static inline u32 pte_index_from_vaddr(struct vm_gk20a *vm,
654                                        u64 addr, enum gmmu_pgsz_gk20a pgsz_idx)
655 {
656         u32 ret;
657         /* mask off pde part */
658         addr = addr & ((((u64)1) << vm->mm->pde_stride_shift) - ((u64)1));
659         /* shift over to get pte index. note assumption that pte index
660          * doesn't leak over into the high 32b */
661         ret = (u32)(addr >> gmmu_page_shifts[pgsz_idx]);
662
663         gk20a_dbg(gpu_dbg_pte, "addr=0x%llx pte_i=0x%x", addr, ret);
664         return ret;
665 }
666
667 static inline void pte_space_page_offset_from_index(u32 i, u32 *pte_page,
668                                                     u32 *pte_offset)
669 {
670         /* ptes are 8B regardless of pagesize */
671         /* pte space pages are 4KB. so 512 ptes per 4KB page*/
672         *pte_page = i >> 9;
673
674         /* this offset is a pte offset, not a byte offset */
675         *pte_offset = i & ((1<<9)-1);
676
677         gk20a_dbg(gpu_dbg_pte, "i=0x%x pte_page=0x%x pte_offset=0x%x",
678                    i, *pte_page, *pte_offset);
679 }
680
681
682 /*
683  * given a pde index/page table number make sure it has
684  * backing store and if not go ahead allocate it and
685  * record it in the appropriate pde
686  */
687 static int validate_gmmu_page_table_gk20a_locked(struct vm_gk20a *vm,
688                                 u32 i, enum gmmu_pgsz_gk20a gmmu_pgsz_idx)
689 {
690         int err;
691         struct page_table_gk20a *pte =
692                 vm->pdes.ptes[gmmu_pgsz_idx] + i;
693
694         gk20a_dbg_fn("");
695
696         /* if it's already in place it's valid */
697         if (pte->ref)
698                 return 0;
699
700         gk20a_dbg(gpu_dbg_pte, "alloc %dKB ptes for pde %d",
701                    gmmu_page_sizes[gmmu_pgsz_idx]/1024, i);
702
703         err = zalloc_gmmu_page_table_gk20a(vm, gmmu_pgsz_idx, pte);
704         if (err)
705                 return err;
706
707         /* rewrite pde */
708         update_gmmu_pde_locked(vm, i);
709
710         return 0;
711 }
712
713 static struct vm_reserved_va_node *addr_to_reservation(struct vm_gk20a *vm,
714                                                        u64 addr)
715 {
716         struct vm_reserved_va_node *va_node;
717         list_for_each_entry(va_node, &vm->reserved_va_list, reserved_va_list)
718                 if (addr >= va_node->vaddr_start &&
719                     addr < (u64)va_node->vaddr_start + (u64)va_node->size)
720                         return va_node;
721
722         return NULL;
723 }
724
725 int gk20a_vm_get_buffers(struct vm_gk20a *vm,
726                          struct mapped_buffer_node ***mapped_buffers,
727                          int *num_buffers)
728 {
729         struct mapped_buffer_node *mapped_buffer;
730         struct mapped_buffer_node **buffer_list;
731         struct rb_node *node;
732         int i = 0;
733
734         mutex_lock(&vm->update_gmmu_lock);
735
736         buffer_list = kzalloc(sizeof(*buffer_list) *
737                               vm->num_user_mapped_buffers, GFP_KERNEL);
738         if (!buffer_list) {
739                 mutex_unlock(&vm->update_gmmu_lock);
740                 return -ENOMEM;
741         }
742
743         node = rb_first(&vm->mapped_buffers);
744         while (node) {
745                 mapped_buffer =
746                         container_of(node, struct mapped_buffer_node, node);
747                 if (mapped_buffer->user_mapped) {
748                         buffer_list[i] = mapped_buffer;
749                         kref_get(&mapped_buffer->ref);
750                         i++;
751                 }
752                 node = rb_next(&mapped_buffer->node);
753         }
754
755         BUG_ON(i != vm->num_user_mapped_buffers);
756
757         *num_buffers = vm->num_user_mapped_buffers;
758         *mapped_buffers = buffer_list;
759
760         mutex_unlock(&vm->update_gmmu_lock);
761
762         return 0;
763 }
764
765 static void gk20a_vm_unmap_locked_kref(struct kref *ref)
766 {
767         struct mapped_buffer_node *mapped_buffer =
768                 container_of(ref, struct mapped_buffer_node, ref);
769         gk20a_vm_unmap_locked(mapped_buffer);
770 }
771
772 void gk20a_vm_put_buffers(struct vm_gk20a *vm,
773                                  struct mapped_buffer_node **mapped_buffers,
774                                  int num_buffers)
775 {
776         int i;
777
778         mutex_lock(&vm->update_gmmu_lock);
779
780         for (i = 0; i < num_buffers; ++i)
781                 kref_put(&mapped_buffers[i]->ref,
782                          gk20a_vm_unmap_locked_kref);
783
784         mutex_unlock(&vm->update_gmmu_lock);
785
786         kfree(mapped_buffers);
787 }
788
789 static void gk20a_vm_unmap_user(struct vm_gk20a *vm, u64 offset)
790 {
791         struct device *d = dev_from_vm(vm);
792         int retries;
793         struct mapped_buffer_node *mapped_buffer;
794
795         mutex_lock(&vm->update_gmmu_lock);
796
797         mapped_buffer = find_mapped_buffer_locked(&vm->mapped_buffers, offset);
798         if (!mapped_buffer) {
799                 mutex_unlock(&vm->update_gmmu_lock);
800                 gk20a_err(d, "invalid addr to unmap 0x%llx", offset);
801                 return;
802         }
803
804         if (mapped_buffer->flags & NVHOST_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) {
805                 mutex_unlock(&vm->update_gmmu_lock);
806
807                 retries = 1000;
808                 while (retries) {
809                         if (atomic_read(&mapped_buffer->ref.refcount) == 1)
810                                 break;
811                         retries--;
812                         udelay(50);
813                 }
814                 if (!retries)
815                         gk20a_err(d, "sync-unmap failed on 0x%llx",
816                                                                 offset);
817                 mutex_lock(&vm->update_gmmu_lock);
818         }
819
820         mapped_buffer->user_mapped--;
821         if (mapped_buffer->user_mapped == 0)
822                 vm->num_user_mapped_buffers--;
823         kref_put(&mapped_buffer->ref, gk20a_vm_unmap_locked_kref);
824
825         mutex_unlock(&vm->update_gmmu_lock);
826 }
827
828 static u64 gk20a_vm_alloc_va(struct vm_gk20a *vm,
829                              u64 size,
830                              enum gmmu_pgsz_gk20a gmmu_pgsz_idx)
831
832 {
833         struct gk20a_allocator *vma = &vm->vma[gmmu_pgsz_idx];
834         int err;
835         u64 offset;
836         u32 start_page_nr = 0, num_pages;
837         u64 gmmu_page_size = gmmu_page_sizes[gmmu_pgsz_idx];
838
839         if (gmmu_pgsz_idx >= ARRAY_SIZE(gmmu_page_sizes)) {
840                 dev_warn(dev_from_vm(vm),
841                          "invalid page size requested in gk20a vm alloc");
842                 return -EINVAL;
843         }
844
845         if ((gmmu_pgsz_idx == gmmu_page_size_big) && !vm->big_pages) {
846                 dev_warn(dev_from_vm(vm),
847                          "unsupportd page size requested");
848                 return -EINVAL;
849
850         }
851
852         /* be certain we round up to gmmu_page_size if needed */
853         /* TBD: DIV_ROUND_UP -> undefined reference to __aeabi_uldivmod */
854         size = (size + ((u64)gmmu_page_size - 1)) & ~((u64)gmmu_page_size - 1);
855
856         gk20a_dbg_info("size=0x%llx @ pgsz=%dKB", size,
857                         gmmu_page_sizes[gmmu_pgsz_idx]>>10);
858
859         /* The vma allocator represents page accounting. */
860         num_pages = size >> gmmu_page_shifts[gmmu_pgsz_idx];
861
862         err = vma->alloc(vma, &start_page_nr, num_pages);
863
864         if (err) {
865                 gk20a_err(dev_from_vm(vm),
866                            "%s oom: sz=0x%llx", vma->name, size);
867                 return 0;
868         }
869
870         offset = (u64)start_page_nr << gmmu_page_shifts[gmmu_pgsz_idx];
871         gk20a_dbg_fn("%s found addr: 0x%llx", vma->name, offset);
872
873         return offset;
874 }
875
876 static int gk20a_vm_free_va(struct vm_gk20a *vm,
877                              u64 offset, u64 size,
878                              enum gmmu_pgsz_gk20a pgsz_idx)
879 {
880         struct gk20a_allocator *vma = &vm->vma[pgsz_idx];
881         u32 page_size = gmmu_page_sizes[pgsz_idx];
882         u32 page_shift = gmmu_page_shifts[pgsz_idx];
883         u32 start_page_nr, num_pages;
884         int err;
885
886         gk20a_dbg_info("%s free addr=0x%llx, size=0x%llx",
887                         vma->name, offset, size);
888
889         start_page_nr = (u32)(offset >> page_shift);
890         num_pages = (u32)((size + page_size - 1) >> page_shift);
891
892         err = vma->free(vma, start_page_nr, num_pages);
893         if (err) {
894                 gk20a_err(dev_from_vm(vm),
895                            "not found: offset=0x%llx, sz=0x%llx",
896                            offset, size);
897         }
898
899         return err;
900 }
901
902 static int insert_mapped_buffer(struct rb_root *root,
903                                 struct mapped_buffer_node *mapped_buffer)
904 {
905         struct rb_node **new_node = &(root->rb_node), *parent = NULL;
906
907         /* Figure out where to put new node */
908         while (*new_node) {
909                 struct mapped_buffer_node *cmp_with =
910                         container_of(*new_node, struct mapped_buffer_node,
911                                      node);
912
913                 parent = *new_node;
914
915                 if (cmp_with->addr > mapped_buffer->addr) /* u64 cmp */
916                         new_node = &((*new_node)->rb_left);
917                 else if (cmp_with->addr != mapped_buffer->addr) /* u64 cmp */
918                         new_node = &((*new_node)->rb_right);
919                 else
920                         return -EINVAL; /* no fair dup'ing */
921         }
922
923         /* Add new node and rebalance tree. */
924         rb_link_node(&mapped_buffer->node, parent, new_node);
925         rb_insert_color(&mapped_buffer->node, root);
926
927         return 0;
928 }
929
930 static struct mapped_buffer_node *find_mapped_buffer_reverse_locked(
931                                 struct rb_root *root, struct dma_buf *dmabuf,
932                                 u32 kind)
933 {
934         struct rb_node *node = rb_first(root);
935         while (node) {
936                 struct mapped_buffer_node *mapped_buffer =
937                         container_of(node, struct mapped_buffer_node, node);
938                 if (mapped_buffer->dmabuf == dmabuf &&
939                     kind == mapped_buffer->kind)
940                         return mapped_buffer;
941                 node = rb_next(&mapped_buffer->node);
942         }
943         return 0;
944 }
945
946 static struct mapped_buffer_node *find_mapped_buffer_locked(
947                                         struct rb_root *root, u64 addr)
948 {
949
950         struct rb_node *node = root->rb_node;
951         while (node) {
952                 struct mapped_buffer_node *mapped_buffer =
953                         container_of(node, struct mapped_buffer_node, node);
954                 if (mapped_buffer->addr > addr) /* u64 cmp */
955                         node = node->rb_left;
956                 else if (mapped_buffer->addr != addr) /* u64 cmp */
957                         node = node->rb_right;
958                 else
959                         return mapped_buffer;
960         }
961         return 0;
962 }
963
964 static struct mapped_buffer_node *find_mapped_buffer_range_locked(
965                                         struct rb_root *root, u64 addr)
966 {
967         struct rb_node *node = root->rb_node;
968         while (node) {
969                 struct mapped_buffer_node *m =
970                         container_of(node, struct mapped_buffer_node, node);
971                 if (m->addr <= addr && m->addr + m->size > addr)
972                         return m;
973                 else if (m->addr > addr) /* u64 cmp */
974                         node = node->rb_left;
975                 else
976                         node = node->rb_right;
977         }
978         return 0;
979 }
980
981 #define BFR_ATTRS (sizeof(nvmap_bfr_param)/sizeof(nvmap_bfr_param[0]))
982
983 struct buffer_attrs {
984         struct sg_table *sgt;
985         u64 size;
986         u64 align;
987         u32 ctag_offset;
988         u32 ctag_lines;
989         int pgsz_idx;
990         u8 kind_v;
991         u8 uc_kind_v;
992 };
993
994 static void gmmu_select_page_size(struct buffer_attrs *bfr)
995 {
996         int i;
997         /*  choose the biggest first (top->bottom) */
998         for (i = (gmmu_nr_page_sizes-1); i >= 0; i--)
999                 if (!(gmmu_page_offset_masks[i] & bfr->align)) {
1000                         /* would like to add this too but nvmap returns the
1001                          * original requested size not the allocated size.
1002                          * (!(gmmu_page_offset_masks[i] & bfr->size)) */
1003                         bfr->pgsz_idx = i;
1004                         break;
1005                 }
1006 }
1007
1008 static int setup_buffer_kind_and_compression(struct device *d,
1009                                              u32 flags,
1010                                              struct buffer_attrs *bfr,
1011                                              enum gmmu_pgsz_gk20a pgsz_idx)
1012 {
1013         bool kind_compressible;
1014
1015         if (unlikely(bfr->kind_v == gmmu_pte_kind_invalid_v()))
1016                 bfr->kind_v = gmmu_pte_kind_pitch_v();
1017
1018         if (unlikely(!gk20a_kind_is_supported(bfr->kind_v))) {
1019                 gk20a_err(d, "kind 0x%x not supported", bfr->kind_v);
1020                 return -EINVAL;
1021         }
1022
1023         bfr->uc_kind_v = gmmu_pte_kind_invalid_v();
1024         /* find a suitable uncompressed kind if it becomes necessary later */
1025         kind_compressible = gk20a_kind_is_compressible(bfr->kind_v);
1026         if (kind_compressible) {
1027                 bfr->uc_kind_v = gk20a_get_uncompressed_kind(bfr->kind_v);
1028                 if (unlikely(bfr->uc_kind_v == gmmu_pte_kind_invalid_v())) {
1029                         /* shouldn't happen, but it is worth cross-checking */
1030                         gk20a_err(d, "comptag kind 0x%x can't be"
1031                                    " downgraded to uncompressed kind",
1032                                    bfr->kind_v);
1033                         return -EINVAL;
1034                 }
1035         }
1036         /* comptags only supported for suitable kinds, 128KB pagesize */
1037         if (unlikely(kind_compressible &&
1038                      (gmmu_page_sizes[pgsz_idx] != 128*1024))) {
1039                 /*
1040                 gk20a_warn(d, "comptags specified"
1041                 " but pagesize being used doesn't support it");*/
1042                 /* it is safe to fall back to uncompressed as
1043                    functionality is not harmed */
1044                 bfr->kind_v = bfr->uc_kind_v;
1045                 kind_compressible = false;
1046         }
1047         if (kind_compressible)
1048                 bfr->ctag_lines = ALIGN(bfr->size, COMP_TAG_LINE_SIZE) >>
1049                         COMP_TAG_LINE_SIZE_SHIFT;
1050         else
1051                 bfr->ctag_lines = 0;
1052
1053         return 0;
1054 }
1055
1056 static int validate_fixed_buffer(struct vm_gk20a *vm,
1057                                  struct buffer_attrs *bfr,
1058                                  u64 map_offset)
1059 {
1060         struct device *dev = dev_from_vm(vm);
1061         struct vm_reserved_va_node *va_node;
1062         struct mapped_buffer_node *buffer;
1063
1064         if (map_offset & gmmu_page_offset_masks[bfr->pgsz_idx]) {
1065                 gk20a_err(dev, "map offset must be buffer page size aligned 0x%llx",
1066                            map_offset);
1067                 return -EINVAL;
1068         }
1069
1070         /* find the space reservation */
1071         va_node = addr_to_reservation(vm, map_offset);
1072         if (!va_node) {
1073                 gk20a_warn(dev, "fixed offset mapping without space allocation");
1074                 return -EINVAL;
1075         }
1076
1077         /* check that this mappings does not collide with existing
1078          * mappings by checking the overlapping area between the current
1079          * buffer and all other mapped buffers */
1080
1081         list_for_each_entry(buffer,
1082                 &va_node->va_buffers_list, va_buffers_list) {
1083                 s64 begin = max(buffer->addr, map_offset);
1084                 s64 end = min(buffer->addr +
1085                         buffer->size, map_offset + bfr->size);
1086                 if (end - begin > 0) {
1087                         gk20a_warn(dev, "overlapping buffer map requested");
1088                         return -EINVAL;
1089                 }
1090         }
1091
1092         return 0;
1093 }
1094
1095 static u64 __locked_gmmu_map(struct vm_gk20a *vm,
1096                                 u64 map_offset,
1097                                 struct sg_table *sgt,
1098                                 u64 size,
1099                                 int pgsz_idx,
1100                                 u8 kind_v,
1101                                 u32 ctag_offset,
1102                                 u32 flags,
1103                                 int rw_flag)
1104 {
1105         int err = 0, i = 0;
1106         bool allocated = false;
1107         u32 pde_lo, pde_hi;
1108         struct device *d = dev_from_vm(vm);
1109
1110         /* Allocate (or validate when map_offset != 0) the virtual address. */
1111         if (!map_offset) {
1112                 map_offset = gk20a_vm_alloc_va(vm, size,
1113                                           pgsz_idx);
1114                 if (!map_offset) {
1115                         gk20a_err(d, "failed to allocate va space");
1116                         err = -ENOMEM;
1117                         goto fail_alloc;
1118                 }
1119                 allocated = true;
1120         }
1121
1122         pde_range_from_vaddr_range(vm,
1123                                    map_offset,
1124                                    map_offset + size - 1,
1125                                    &pde_lo, &pde_hi);
1126
1127         /* mark the addr range valid (but with 0 phys addr, which will fault) */
1128         for (i = pde_lo; i <= pde_hi; i++) {
1129                 err = validate_gmmu_page_table_gk20a_locked(vm, i,
1130                                                             pgsz_idx);
1131                 if (err) {
1132                         gk20a_err(d, "failed to validate page table %d: %d",
1133                                                            i, err);
1134                         goto fail_validate;
1135                 }
1136         }
1137
1138         err = update_gmmu_ptes_locked(vm, pgsz_idx,
1139                                       sgt,
1140                                       map_offset, map_offset + size - 1,
1141                                       kind_v,
1142                                       ctag_offset,
1143                                       flags &
1144                                       NVHOST_MAP_BUFFER_FLAGS_CACHEABLE_TRUE,
1145                                       rw_flag);
1146         if (err) {
1147                 gk20a_err(d, "failed to update ptes on map");
1148                 goto fail_validate;
1149         }
1150
1151         return map_offset;
1152 fail_validate:
1153         if (allocated)
1154                 gk20a_vm_free_va(vm, map_offset, size, pgsz_idx);
1155 fail_alloc:
1156         gk20a_err(d, "%s: failed with err=%d\n", __func__, err);
1157         return 0;
1158 }
1159
1160 static void __locked_gmmu_unmap(struct vm_gk20a *vm,
1161                                 u64 vaddr,
1162                                 u64 size,
1163                                 int pgsz_idx,
1164                                 bool va_allocated,
1165                                 int rw_flag)
1166 {
1167         int err = 0;
1168         struct gk20a *g = gk20a_from_vm(vm);
1169
1170         if (va_allocated) {
1171                 err = gk20a_vm_free_va(vm, vaddr, size, pgsz_idx);
1172                 if (err) {
1173                         dev_err(dev_from_vm(vm),
1174                                 "failed to free va");
1175                         return;
1176                 }
1177         }
1178
1179         /* unmap here needs to know the page size we assigned at mapping */
1180         err = update_gmmu_ptes_locked(vm,
1181                                 pgsz_idx,
1182                                 0, /* n/a for unmap */
1183                                 vaddr,
1184                                 vaddr + size - 1,
1185                                 0, 0, false /* n/a for unmap */,
1186                                 rw_flag);
1187         if (err)
1188                 dev_err(dev_from_vm(vm),
1189                         "failed to update gmmu ptes on unmap");
1190
1191         /* detect which if any pdes/ptes can now be released */
1192
1193         /* flush l2 so any dirty lines are written out *now*.
1194          *  also as we could potentially be switching this buffer
1195          * from nonvolatile (l2 cacheable) to volatile (l2 non-cacheable) at
1196          * some point in the future we need to invalidate l2.  e.g. switching
1197          * from a render buffer unmap (here) to later using the same memory
1198          * for gmmu ptes.  note the positioning of this relative to any smmu
1199          * unmapping (below). */
1200
1201         gk20a_mm_l2_flush(g, true);
1202 }
1203
1204 static u64 gk20a_vm_map_duplicate_locked(struct vm_gk20a *vm,
1205                                          struct dma_buf *dmabuf,
1206                                          u64 offset_align,
1207                                          u32 flags,
1208                                          int kind,
1209                                          struct sg_table **sgt,
1210                                          bool user_mapped,
1211                                          int rw_flag)
1212 {
1213         struct mapped_buffer_node *mapped_buffer = 0;
1214
1215         mapped_buffer =
1216                 find_mapped_buffer_reverse_locked(&vm->mapped_buffers,
1217                                                   dmabuf, kind);
1218         if (!mapped_buffer)
1219                 return 0;
1220
1221         if (mapped_buffer->flags != flags)
1222                 return 0;
1223
1224         if (flags & NVHOST_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET &&
1225             mapped_buffer->addr != offset_align)
1226                 return 0;
1227
1228         BUG_ON(mapped_buffer->vm != vm);
1229
1230         /* mark the buffer as used */
1231         if (user_mapped) {
1232                 if (mapped_buffer->user_mapped == 0)
1233                         vm->num_user_mapped_buffers++;
1234                 mapped_buffer->user_mapped++;
1235
1236                 /* If the mapping comes from user space, we own
1237                  * the handle ref. Since we reuse an
1238                  * existing mapping here, we need to give back those
1239                  * refs once in order not to leak.
1240                  */
1241                 if (mapped_buffer->own_mem_ref)
1242                         dma_buf_put(mapped_buffer->dmabuf);
1243                 else
1244                         mapped_buffer->own_mem_ref = true;
1245         }
1246         kref_get(&mapped_buffer->ref);
1247
1248         gk20a_dbg(gpu_dbg_map,
1249                    "reusing as=%d pgsz=%d flags=0x%x ctags=%d "
1250                    "start=%d gv=0x%x,%08x -> 0x%x,%08x -> 0x%x,%08x "
1251                    "own_mem_ref=%d user_mapped=%d",
1252                    vm_aspace_id(vm), mapped_buffer->pgsz_idx,
1253                    mapped_buffer->flags,
1254                    mapped_buffer->ctag_lines,
1255                    mapped_buffer->ctag_offset,
1256                    hi32(mapped_buffer->addr), lo32(mapped_buffer->addr),
1257                    hi32((u64)sg_dma_address(mapped_buffer->sgt->sgl)),
1258                    lo32((u64)sg_dma_address(mapped_buffer->sgt->sgl)),
1259                    hi32((u64)sg_phys(mapped_buffer->sgt->sgl)),
1260                    lo32((u64)sg_phys(mapped_buffer->sgt->sgl)),
1261                    mapped_buffer->own_mem_ref, user_mapped);
1262
1263         if (sgt)
1264                 *sgt = mapped_buffer->sgt;
1265         return mapped_buffer->addr;
1266 }
1267
1268 u64 gk20a_vm_map(struct vm_gk20a *vm,
1269                         struct dma_buf *dmabuf,
1270                         u64 offset_align,
1271                         u32 flags /*NVHOST_AS_MAP_BUFFER_FLAGS_*/,
1272                         int kind,
1273                         struct sg_table **sgt,
1274                         bool user_mapped,
1275                         int rw_flag)
1276 {
1277         struct gk20a *g = gk20a_from_vm(vm);
1278         struct gk20a_allocator *ctag_allocator = &g->gr.comp_tags;
1279         struct device *d = dev_from_vm(vm);
1280         struct mapped_buffer_node *mapped_buffer = 0;
1281         bool inserted = false, va_allocated = false;
1282         u32 gmmu_page_size = 0;
1283         u64 map_offset = 0;
1284         int err = 0;
1285         struct buffer_attrs bfr = {0};
1286         struct gk20a_comptags comptags;
1287         u64 buf_addr;
1288
1289         mutex_lock(&vm->update_gmmu_lock);
1290
1291         /* check if this buffer is already mapped */
1292         map_offset = gk20a_vm_map_duplicate_locked(vm, dmabuf, offset_align,
1293                                                    flags, kind, sgt,
1294                                                    user_mapped, rw_flag);
1295         if (map_offset) {
1296                 mutex_unlock(&vm->update_gmmu_lock);
1297                 return map_offset;
1298         }
1299
1300         /* pin buffer to get phys/iovmm addr */
1301         bfr.sgt = gk20a_mm_pin(d, dmabuf);
1302         if (IS_ERR(bfr.sgt)) {
1303                 /* Falling back to physical is actually possible
1304                  * here in many cases if we use 4K phys pages in the
1305                  * gmmu.  However we have some regions which require
1306                  * contig regions to work properly (either phys-contig
1307                  * or contig through smmu io_vaspace).  Until we can
1308                  * track the difference between those two cases we have
1309                  * to fail the mapping when we run out of SMMU space.
1310                  */
1311                 gk20a_warn(d, "oom allocating tracking buffer");
1312                 goto clean_up;
1313         }
1314
1315         if (sgt)
1316                 *sgt = bfr.sgt;
1317
1318         bfr.kind_v = kind;
1319         bfr.size = dmabuf->size;
1320         buf_addr = (u64)sg_dma_address(bfr.sgt->sgl);
1321         if (unlikely(!buf_addr))
1322                 buf_addr = (u64)sg_phys(bfr.sgt->sgl);
1323         bfr.align = 1 << __ffs(buf_addr);
1324         bfr.pgsz_idx = -1;
1325
1326         /* If FIX_OFFSET is set, pgsz is determined. Otherwise, select
1327          * page size according to memory alignment */
1328         if (flags & NVHOST_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) {
1329                 bfr.pgsz_idx = NV_GMMU_VA_IS_UPPER(offset_align) ?
1330                                 gmmu_page_size_big : gmmu_page_size_small;
1331         } else {
1332                 if (vm->big_pages)
1333                         gmmu_select_page_size(&bfr);
1334                 else
1335                         bfr.pgsz_idx = gmmu_page_size_small;
1336         }
1337
1338         /* validate/adjust bfr attributes */
1339         if (unlikely(bfr.pgsz_idx == -1)) {
1340                 gk20a_err(d, "unsupported page size detected");
1341                 goto clean_up;
1342         }
1343
1344         if (unlikely(bfr.pgsz_idx < gmmu_page_size_small ||
1345                      bfr.pgsz_idx > gmmu_page_size_big)) {
1346                 BUG_ON(1);
1347                 err = -EINVAL;
1348                 goto clean_up;
1349         }
1350         gmmu_page_size = gmmu_page_sizes[bfr.pgsz_idx];
1351
1352         /* Check if we should use a fixed offset for mapping this buffer */
1353         if (flags & NVHOST_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET)  {
1354                 err = validate_fixed_buffer(vm, &bfr, offset_align);
1355                 if (err)
1356                         goto clean_up;
1357
1358                 map_offset = offset_align;
1359                 va_allocated = false;
1360         } else
1361                 va_allocated = true;
1362
1363         if (sgt)
1364                 *sgt = bfr.sgt;
1365
1366         err = setup_buffer_kind_and_compression(d, flags, &bfr, bfr.pgsz_idx);
1367         if (unlikely(err)) {
1368                 gk20a_err(d, "failure setting up kind and compression");
1369                 goto clean_up;
1370         }
1371
1372         /* bar1 and pmu vm don't need ctag */
1373         if (!vm->enable_ctag)
1374                 bfr.ctag_lines = 0;
1375
1376         gk20a_get_comptags(d, dmabuf, &comptags);
1377
1378         if (bfr.ctag_lines && !comptags.lines) {
1379                 /* allocate compression resources if needed */
1380                 err = gk20a_alloc_comptags(d, dmabuf, ctag_allocator,
1381                                            bfr.ctag_lines);
1382                 if (err) {
1383                         /* ok to fall back here if we ran out */
1384                         /* TBD: we can partially alloc ctags as well... */
1385                         bfr.ctag_lines = bfr.ctag_offset = 0;
1386                         bfr.kind_v = bfr.uc_kind_v;
1387                 } else {
1388                         gk20a_get_comptags(d, dmabuf, &comptags);
1389
1390                         /* init/clear the ctag buffer */
1391                         g->ops.ltc.cbc_ctrl(g, gk20a_cbc_op_clear,
1392                                           comptags.offset,
1393                                           comptags.offset + comptags.lines - 1);
1394                 }
1395         }
1396
1397         /* store the comptag info */
1398         bfr.ctag_offset = comptags.offset;
1399
1400         /* update gmmu ptes */
1401         map_offset = __locked_gmmu_map(vm, map_offset,
1402                                         bfr.sgt,
1403                                         bfr.size,
1404                                         bfr.pgsz_idx,
1405                                         bfr.kind_v,
1406                                         bfr.ctag_offset,
1407                                         flags, rw_flag);
1408         if (!map_offset)
1409                 goto clean_up;
1410
1411         gk20a_dbg(gpu_dbg_map,
1412            "as=%d pgsz=%d "
1413            "kind=0x%x kind_uc=0x%x flags=0x%x "
1414            "ctags=%d start=%d gv=0x%x,%08x -> 0x%x,%08x -> 0x%x,%08x",
1415            vm_aspace_id(vm), gmmu_page_size,
1416            bfr.kind_v, bfr.uc_kind_v, flags,
1417            bfr.ctag_lines, bfr.ctag_offset,
1418            hi32(map_offset), lo32(map_offset),
1419            hi32((u64)sg_dma_address(bfr.sgt->sgl)),
1420            lo32((u64)sg_dma_address(bfr.sgt->sgl)),
1421            hi32((u64)sg_phys(bfr.sgt->sgl)),
1422            lo32((u64)sg_phys(bfr.sgt->sgl)));
1423
1424 #if defined(NVHOST_DEBUG)
1425         {
1426                 int i;
1427                 struct scatterlist *sg = NULL;
1428                 gk20a_dbg(gpu_dbg_pte, "for_each_sg(bfr.sgt->sgl, sg, bfr.sgt->nents, i)");
1429                 for_each_sg(bfr.sgt->sgl, sg, bfr.sgt->nents, i ) {
1430                         u64 da = sg_dma_address(sg);
1431                         u64 pa = sg_phys(sg);
1432                         u64 len = sg->length;
1433                         gk20a_dbg(gpu_dbg_pte, "i=%d pa=0x%x,%08x da=0x%x,%08x len=0x%x,%08x",
1434                                    i, hi32(pa), lo32(pa), hi32(da), lo32(da),
1435                                    hi32(len), lo32(len));
1436                 }
1437         }
1438 #endif
1439
1440         /* keep track of the buffer for unmapping */
1441         /* TBD: check for multiple mapping of same buffer */
1442         mapped_buffer = kzalloc(sizeof(*mapped_buffer), GFP_KERNEL);
1443         if (!mapped_buffer) {
1444                 gk20a_warn(d, "oom allocating tracking buffer");
1445                 goto clean_up;
1446         }
1447         mapped_buffer->dmabuf      = dmabuf;
1448         mapped_buffer->sgt         = bfr.sgt;
1449         mapped_buffer->addr        = map_offset;
1450         mapped_buffer->size        = bfr.size;
1451         mapped_buffer->pgsz_idx    = bfr.pgsz_idx;
1452         mapped_buffer->ctag_offset = bfr.ctag_offset;
1453         mapped_buffer->ctag_lines  = bfr.ctag_lines;
1454         mapped_buffer->vm          = vm;
1455         mapped_buffer->flags       = flags;
1456         mapped_buffer->kind        = kind;
1457         mapped_buffer->va_allocated = va_allocated;
1458         mapped_buffer->user_mapped = user_mapped ? 1 : 0;
1459         mapped_buffer->own_mem_ref = user_mapped;
1460         INIT_LIST_HEAD(&mapped_buffer->unmap_list);
1461         INIT_LIST_HEAD(&mapped_buffer->va_buffers_list);
1462         kref_init(&mapped_buffer->ref);
1463
1464         err = insert_mapped_buffer(&vm->mapped_buffers, mapped_buffer);
1465         if (err) {
1466                 gk20a_err(d, "failed to insert into mapped buffer tree");
1467                 goto clean_up;
1468         }
1469         inserted = true;
1470         if (user_mapped)
1471                 vm->num_user_mapped_buffers++;
1472
1473         gk20a_dbg_info("allocated va @ 0x%llx", map_offset);
1474
1475         if (!va_allocated) {
1476                 struct vm_reserved_va_node *va_node;
1477
1478                 /* find the space reservation */
1479                 va_node = addr_to_reservation(vm, map_offset);
1480                 list_add_tail(&mapped_buffer->va_buffers_list,
1481                               &va_node->va_buffers_list);
1482                 mapped_buffer->va_node = va_node;
1483         }
1484
1485         mutex_unlock(&vm->update_gmmu_lock);
1486
1487         /* Invalidate kernel mappings immediately */
1488         if (vm_aspace_id(vm) == -1)
1489                 gk20a_mm_tlb_invalidate(vm);
1490
1491         return map_offset;
1492
1493 clean_up:
1494         if (inserted) {
1495                 rb_erase(&mapped_buffer->node, &vm->mapped_buffers);
1496                 if (user_mapped)
1497                         vm->num_user_mapped_buffers--;
1498         }
1499         kfree(mapped_buffer);
1500         if (va_allocated)
1501                 gk20a_vm_free_va(vm, map_offset, bfr.size, bfr.pgsz_idx);
1502         if (!IS_ERR(bfr.sgt))
1503                 gk20a_mm_unpin(d, dmabuf, bfr.sgt);
1504
1505         mutex_unlock(&vm->update_gmmu_lock);
1506         gk20a_dbg_info("err=%d\n", err);
1507         return 0;
1508 }
1509
1510 u64 gk20a_gmmu_map(struct vm_gk20a *vm,
1511                 struct sg_table **sgt,
1512                 u64 size,
1513                 u32 flags,
1514                 int rw_flag)
1515 {
1516         u64 vaddr;
1517
1518         mutex_lock(&vm->update_gmmu_lock);
1519         vaddr = __locked_gmmu_map(vm, 0, /* already mapped? - No */
1520                                 *sgt, /* sg table */
1521                                 size,
1522                                 0, /* page size index = 0 i.e. SZ_4K */
1523                                 0, /* kind */
1524                                 0, /* ctag_offset */
1525                                 flags, rw_flag);
1526         mutex_unlock(&vm->update_gmmu_lock);
1527         if (!vaddr) {
1528                 gk20a_err(dev_from_vm(vm), "failed to allocate va space");
1529                 return 0;
1530         }
1531
1532         /* Invalidate kernel mappings immediately */
1533         gk20a_mm_tlb_invalidate(vm);
1534
1535         return vaddr;
1536 }
1537
1538 void gk20a_gmmu_unmap(struct vm_gk20a *vm,
1539                 u64 vaddr,
1540                 u64 size,
1541                 int rw_flag)
1542 {
1543         mutex_lock(&vm->update_gmmu_lock);
1544         __locked_gmmu_unmap(vm,
1545                         vaddr,
1546                         size,
1547                         0, /* page size 4K */
1548                         true, /*va_allocated */
1549                         rw_flag);
1550         mutex_unlock(&vm->update_gmmu_lock);
1551 }
1552
1553 phys_addr_t gk20a_get_phys_from_iova(struct device *d,
1554                                 u64 dma_addr)
1555 {
1556         phys_addr_t phys;
1557         u64 iova;
1558
1559         struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(d);
1560         if (!mapping)
1561                 return dma_addr;
1562
1563         iova = dma_addr & PAGE_MASK;
1564         phys = iommu_iova_to_phys(mapping->domain, iova);
1565         return phys;
1566 }
1567
1568 /* get sg_table from already allocated buffer */
1569 int gk20a_get_sgtable(struct device *d, struct sg_table **sgt,
1570                         void *cpuva, u64 iova,
1571                         size_t size)
1572 {
1573         int err = 0;
1574         *sgt = kzalloc(sizeof(struct sg_table), GFP_KERNEL);
1575         if (!(*sgt)) {
1576                 dev_err(d, "failed to allocate memory\n");
1577                 err = -ENOMEM;
1578                 goto fail;
1579         }
1580         err = dma_get_sgtable(d, *sgt,
1581                         cpuva, iova,
1582                         size);
1583         if (err) {
1584                 dev_err(d, "failed to create sg table\n");
1585                 goto fail;
1586         }
1587         sg_dma_address((*sgt)->sgl) = iova;
1588
1589         return 0;
1590  fail:
1591         if (*sgt) {
1592                 kfree(*sgt);
1593                 *sgt = NULL;
1594         }
1595         return err;
1596 }
1597
1598 int gk20a_get_sgtable_from_pages(struct device *d, struct sg_table **sgt,
1599                         struct page **pages, u64 iova,
1600                         size_t size)
1601 {
1602         int err = 0;
1603         *sgt = kzalloc(sizeof(struct sg_table), GFP_KERNEL);
1604         if (!(*sgt)) {
1605                 dev_err(d, "failed to allocate memory\n");
1606                 err = -ENOMEM;
1607                 goto fail;
1608         }
1609         err = sg_alloc_table(*sgt, 1, GFP_KERNEL);
1610         if (err) {
1611                 dev_err(d, "failed to allocate sg_table\n");
1612                 goto fail;
1613         }
1614         sg_set_page((*sgt)->sgl, *pages, size, 0);
1615         sg_dma_address((*sgt)->sgl) = iova;
1616
1617         return 0;
1618  fail:
1619         if (*sgt) {
1620                 kfree(*sgt);
1621                 *sgt = NULL;
1622         }
1623         return err;
1624 }
1625
1626 void gk20a_free_sgtable(struct sg_table **sgt)
1627 {
1628         sg_free_table(*sgt);
1629         kfree(*sgt);
1630         *sgt = NULL;
1631 }
1632
1633 u64 gk20a_mm_iova_addr(struct scatterlist *sgl)
1634 {
1635         u64 result = sg_phys(sgl);
1636 #ifdef CONFIG_TEGRA_IOMMU_SMMU
1637         if (sg_dma_address(sgl) == DMA_ERROR_CODE)
1638                 result = 0;
1639         else if (sg_dma_address(sgl)) {
1640                 result = sg_dma_address(sgl) |
1641                         1ULL << NV_MC_SMMU_VADDR_TRANSLATION_BIT;
1642         }
1643 #endif
1644         return result;
1645 }
1646
1647 static int update_gmmu_ptes_locked(struct vm_gk20a *vm,
1648                                    enum gmmu_pgsz_gk20a pgsz_idx,
1649                                    struct sg_table *sgt,
1650                                    u64 first_vaddr, u64 last_vaddr,
1651                                    u8 kind_v, u32 ctag_offset,
1652                                    bool cacheable,
1653                                    int rw_flag)
1654 {
1655         int err;
1656         u32 pde_lo, pde_hi, pde_i;
1657         struct scatterlist *cur_chunk;
1658         unsigned int cur_offset;
1659         u32 pte_w[2] = {0, 0}; /* invalid pte */
1660         u32 ctag = ctag_offset;
1661         u32 ctag_incr;
1662         u32 page_size  = gmmu_page_sizes[pgsz_idx];
1663         u64 addr = 0;
1664
1665         pde_range_from_vaddr_range(vm, first_vaddr, last_vaddr,
1666                                    &pde_lo, &pde_hi);
1667
1668         gk20a_dbg(gpu_dbg_pte, "size_idx=%d, pde_lo=%d, pde_hi=%d",
1669                    pgsz_idx, pde_lo, pde_hi);
1670
1671         /* If ctag_offset !=0 add 1 else add 0.  The idea is to avoid a branch
1672          * below (per-pte). Note: this doesn't work unless page size (when
1673          * comptags are active) is 128KB. We have checks elsewhere for that. */
1674         ctag_incr = !!ctag_offset;
1675
1676         if (sgt)
1677                 cur_chunk = sgt->sgl;
1678         else
1679                 cur_chunk = NULL;
1680
1681         cur_offset = 0;
1682
1683         for (pde_i = pde_lo; pde_i <= pde_hi; pde_i++) {
1684                 u32 pte_lo, pte_hi;
1685                 u32 pte_cur;
1686                 void *pte_kv_cur;
1687
1688                 struct page_table_gk20a *pte = vm->pdes.ptes[pgsz_idx] + pde_i;
1689
1690                 if (pde_i == pde_lo)
1691                         pte_lo = pte_index_from_vaddr(vm, first_vaddr,
1692                                                       pgsz_idx);
1693                 else
1694                         pte_lo = 0;
1695
1696                 if ((pde_i != pde_hi) && (pde_hi != pde_lo))
1697                         pte_hi = vm->mm->page_table_sizing[pgsz_idx].num_ptes-1;
1698                 else
1699                         pte_hi = pte_index_from_vaddr(vm, last_vaddr,
1700                                                       pgsz_idx);
1701
1702                 /* get cpu access to the ptes */
1703                 err = map_gmmu_pages(pte->ref, pte->sgt, &pte_kv_cur,
1704                                      pte->size);
1705                 if (err) {
1706                         gk20a_err(dev_from_vm(vm),
1707                                    "couldn't map ptes for update as=%d pte_ref_cnt=%d",
1708                                    vm_aspace_id(vm), pte->ref_cnt);
1709                         goto clean_up;
1710                 }
1711
1712                 gk20a_dbg(gpu_dbg_pte, "pte_lo=%d, pte_hi=%d", pte_lo, pte_hi);
1713                 for (pte_cur = pte_lo; pte_cur <= pte_hi; pte_cur++) {
1714
1715                         if (likely(sgt)) {
1716                                 u64 new_addr = gk20a_mm_iova_addr(cur_chunk);
1717                                 if (new_addr) {
1718                                         addr = new_addr;
1719                                         addr += cur_offset;
1720                                 }
1721
1722                                 pte_w[0] = gmmu_pte_valid_true_f() |
1723                                         gmmu_pte_address_sys_f(addr
1724                                                 >> gmmu_pte_address_shift_v());
1725                                 pte_w[1] = gmmu_pte_aperture_video_memory_f() |
1726                                         gmmu_pte_kind_f(kind_v) |
1727                                         gmmu_pte_comptagline_f(ctag);
1728
1729                                 if (rw_flag == gk20a_mem_flag_read_only) {
1730                                         pte_w[0] |= gmmu_pte_read_only_true_f();
1731                                         pte_w[1] |=
1732                                                 gmmu_pte_write_disable_true_f();
1733                                 } else if (rw_flag ==
1734                                            gk20a_mem_flag_write_only) {
1735                                         pte_w[1] |=
1736                                                 gmmu_pte_read_disable_true_f();
1737                                 }
1738
1739                                 if (!cacheable)
1740                                         pte_w[1] |= gmmu_pte_vol_true_f();
1741
1742                                 pte->ref_cnt++;
1743
1744                                 gk20a_dbg(gpu_dbg_pte,
1745                                            "pte_cur=%d addr=0x%x,%08x kind=%d"
1746                                            " ctag=%d vol=%d refs=%d"
1747                                            " [0x%08x,0x%08x]",
1748                                            pte_cur, hi32(addr), lo32(addr),
1749                                            kind_v, ctag, !cacheable,
1750                                            pte->ref_cnt, pte_w[1], pte_w[0]);
1751
1752                                 ctag += ctag_incr;
1753                                 cur_offset += page_size;
1754                                 addr += page_size;
1755                                 while (cur_chunk &&
1756                                         cur_offset >= cur_chunk->length) {
1757                                         cur_offset -= cur_chunk->length;
1758                                         cur_chunk = sg_next(cur_chunk);
1759                                 }
1760
1761                         } else {
1762                                 pte->ref_cnt--;
1763                                 gk20a_dbg(gpu_dbg_pte,
1764                                            "pte_cur=%d ref=%d [0x0,0x0]",
1765                                            pte_cur, pte->ref_cnt);
1766                         }
1767
1768                         gk20a_mem_wr32(pte_kv_cur + pte_cur*8, 0, pte_w[0]);
1769                         gk20a_mem_wr32(pte_kv_cur + pte_cur*8, 1, pte_w[1]);
1770                 }
1771
1772                 unmap_gmmu_pages(pte->ref, pte->sgt, pte_kv_cur);
1773
1774                 if (pte->ref_cnt == 0) {
1775                         /* It can make sense to keep around one page table for
1776                          * each flavor (empty)... in case a new map is coming
1777                          * right back to alloc (and fill it in) again.
1778                          * But: deferring unmapping should help with pathologic
1779                          * unmap/map/unmap/map cases where we'd trigger pte
1780                          * free/alloc/free/alloc.
1781                          */
1782                         free_gmmu_pages(vm, pte->ref, pte->sgt,
1783                                 vm->mm->page_table_sizing[pgsz_idx].order,
1784                                 pte->size);
1785                         pte->ref = NULL;
1786
1787                         /* rewrite pde */
1788                         update_gmmu_pde_locked(vm, pde_i);
1789                 }
1790
1791         }
1792
1793         smp_mb();
1794         vm->tlb_dirty = true;
1795         gk20a_dbg_fn("set tlb dirty");
1796
1797         return 0;
1798
1799 clean_up:
1800         /*TBD: potentially rewrite above to pre-map everything it needs to
1801          * as that's the only way it can fail */
1802         return err;
1803
1804 }
1805
1806
1807 /* for gk20a the "video memory" apertures here are misnomers. */
1808 static inline u32 big_valid_pde0_bits(u64 pte_addr)
1809 {
1810         u32 pde0_bits =
1811                 gmmu_pde_aperture_big_video_memory_f() |
1812                 gmmu_pde_address_big_sys_f(
1813                            (u32)(pte_addr >> gmmu_pde_address_shift_v()));
1814         return  pde0_bits;
1815 }
1816 static inline u32 small_valid_pde1_bits(u64 pte_addr)
1817 {
1818         u32 pde1_bits =
1819                 gmmu_pde_aperture_small_video_memory_f() |
1820                 gmmu_pde_vol_small_true_f() | /* tbd: why? */
1821                 gmmu_pde_address_small_sys_f(
1822                            (u32)(pte_addr >> gmmu_pde_address_shift_v()));
1823         return pde1_bits;
1824 }
1825
1826 /* Given the current state of the ptes associated with a pde,
1827    determine value and write it out.  There's no checking
1828    here to determine whether or not a change was actually
1829    made.  So, superfluous updates will cause unnecessary
1830    pde invalidations.
1831 */
1832 static void update_gmmu_pde_locked(struct vm_gk20a *vm, u32 i)
1833 {
1834         bool small_valid, big_valid;
1835         u64 pte_addr[2] = {0, 0};
1836         struct page_table_gk20a *small_pte =
1837                 vm->pdes.ptes[gmmu_page_size_small] + i;
1838         struct page_table_gk20a *big_pte =
1839                 vm->pdes.ptes[gmmu_page_size_big] + i;
1840         u32 pde_v[2] = {0, 0};
1841         u32 *pde;
1842
1843         small_valid = small_pte && small_pte->ref;
1844         big_valid   = big_pte && big_pte->ref;
1845
1846         if (small_valid)
1847                 pte_addr[gmmu_page_size_small] =
1848                         gk20a_mm_iova_addr(small_pte->sgt->sgl);
1849         if (big_valid)
1850                 pte_addr[gmmu_page_size_big] =
1851                         gk20a_mm_iova_addr(big_pte->sgt->sgl);
1852
1853         pde_v[0] = gmmu_pde_size_full_f();
1854         pde_v[0] |= big_valid ?
1855                 big_valid_pde0_bits(pte_addr[gmmu_page_size_big])
1856                 :
1857                 (gmmu_pde_aperture_big_invalid_f());
1858
1859         pde_v[1] |= (small_valid ?
1860                      small_valid_pde1_bits(pte_addr[gmmu_page_size_small])
1861                      :
1862                      (gmmu_pde_aperture_small_invalid_f() |
1863                       gmmu_pde_vol_small_false_f())
1864                      )
1865                 |
1866                 (big_valid ? (gmmu_pde_vol_big_true_f()) :
1867                  gmmu_pde_vol_big_false_f());
1868
1869         pde = pde_from_index(vm, i);
1870
1871         gk20a_mem_wr32(pde, 0, pde_v[0]);
1872         gk20a_mem_wr32(pde, 1, pde_v[1]);
1873
1874         smp_mb();
1875
1876         FLUSH_CPU_DCACHE(pde,
1877                          sg_phys(vm->pdes.sgt->sgl) + (i*gmmu_pde__size_v()),
1878                          sizeof(u32)*2);
1879
1880         gk20a_mm_l2_invalidate(vm->mm->g);
1881
1882         gk20a_dbg(gpu_dbg_pte, "pde:%d = 0x%x,0x%08x\n", i, pde_v[1], pde_v[0]);
1883
1884         vm->tlb_dirty  = true;
1885 }
1886
1887
1888 static int gk20a_vm_put_empty(struct vm_gk20a *vm, u64 vaddr,
1889                                u32 num_pages, u32 pgsz_idx)
1890 {
1891         struct mm_gk20a *mm = vm->mm;
1892         struct gk20a *g = mm->g;
1893         u32 pgsz = gmmu_page_sizes[pgsz_idx];
1894         u32 i;
1895         dma_addr_t iova;
1896
1897         /* allocate the zero page if the va does not already have one */
1898         if (!vm->zero_page_cpuva) {
1899                 int err = 0;
1900                 vm->zero_page_cpuva = dma_alloc_coherent(&g->dev->dev,
1901                                                          mm->big_page_size,
1902                                                          &iova,
1903                                                          GFP_KERNEL);
1904                 if (!vm->zero_page_cpuva) {
1905                         dev_err(&g->dev->dev, "failed to allocate zero page\n");
1906                         return -ENOMEM;
1907                 }
1908
1909                 vm->zero_page_iova = iova;
1910                 err = gk20a_get_sgtable(&g->dev->dev, &vm->zero_page_sgt,
1911                                         vm->zero_page_cpuva, vm->zero_page_iova,
1912                                         mm->big_page_size);
1913                 if (err) {
1914                         dma_free_coherent(&g->dev->dev, mm->big_page_size,
1915                                           vm->zero_page_cpuva,
1916                                           vm->zero_page_iova);
1917                         vm->zero_page_iova = 0;
1918                         vm->zero_page_cpuva = NULL;
1919
1920                         dev_err(&g->dev->dev, "failed to create sg table for zero page\n");
1921                         return -ENOMEM;
1922                 }
1923         }
1924
1925         for (i = 0; i < num_pages; i++) {
1926                 u64 page_vaddr = __locked_gmmu_map(vm, vaddr,
1927                         vm->zero_page_sgt, pgsz, pgsz_idx, 0, 0,
1928                         NVHOST_AS_ALLOC_SPACE_FLAGS_FIXED_OFFSET,
1929                         gk20a_mem_flag_none);
1930
1931                 if (!page_vaddr) {
1932                         gk20a_err(dev_from_vm(vm), "failed to remap clean buffers!");
1933                         goto err_unmap;
1934                 }
1935                 vaddr += pgsz;
1936         }
1937
1938         gk20a_mm_l2_flush(mm->g, true);
1939
1940         return 0;
1941
1942 err_unmap:
1943
1944         WARN_ON(1);
1945         /* something went wrong. unmap pages */
1946         while (i--) {
1947                 vaddr -= pgsz;
1948                 __locked_gmmu_unmap(vm, vaddr, pgsz, pgsz_idx, 0,
1949                                     gk20a_mem_flag_none);
1950         }
1951
1952         return -EINVAL;
1953 }
1954
1955 /* NOTE! mapped_buffers lock must be held */
1956 static void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer)
1957 {
1958         struct vm_gk20a *vm = mapped_buffer->vm;
1959
1960         if (mapped_buffer->va_node &&
1961             mapped_buffer->va_node->sparse) {
1962                 u64 vaddr = mapped_buffer->addr;
1963                 u32 pgsz_idx = mapped_buffer->pgsz_idx;
1964                 u32 num_pages = mapped_buffer->size >>
1965                         gmmu_page_shifts[pgsz_idx];
1966
1967                 /* there is little we can do if this fails... */
1968                 gk20a_vm_put_empty(vm, vaddr, num_pages, pgsz_idx);
1969
1970         } else
1971                 __locked_gmmu_unmap(vm,
1972                                 mapped_buffer->addr,
1973                                 mapped_buffer->size,
1974                                 mapped_buffer->pgsz_idx,
1975                                 mapped_buffer->va_allocated,
1976                                 gk20a_mem_flag_none);
1977
1978         gk20a_dbg(gpu_dbg_map, "as=%d pgsz=%d gv=0x%x,%08x own_mem_ref=%d",
1979                    vm_aspace_id(vm), gmmu_page_sizes[mapped_buffer->pgsz_idx],
1980                    hi32(mapped_buffer->addr), lo32(mapped_buffer->addr),
1981                    mapped_buffer->own_mem_ref);
1982
1983         gk20a_mm_unpin(dev_from_vm(vm), mapped_buffer->dmabuf,
1984                        mapped_buffer->sgt);
1985
1986         /* remove from mapped buffer tree and remove list, free */
1987         rb_erase(&mapped_buffer->node, &vm->mapped_buffers);
1988         if (!list_empty(&mapped_buffer->va_buffers_list))
1989                 list_del(&mapped_buffer->va_buffers_list);
1990
1991         /* keep track of mapped buffers */
1992         if (mapped_buffer->user_mapped)
1993                 vm->num_user_mapped_buffers--;
1994
1995         if (mapped_buffer->own_mem_ref)
1996                 dma_buf_put(mapped_buffer->dmabuf);
1997
1998         kfree(mapped_buffer);
1999
2000         return;
2001 }
2002
2003 void gk20a_vm_unmap(struct vm_gk20a *vm, u64 offset)
2004 {
2005         struct device *d = dev_from_vm(vm);
2006         struct mapped_buffer_node *mapped_buffer;
2007
2008         mutex_lock(&vm->update_gmmu_lock);
2009         mapped_buffer = find_mapped_buffer_locked(&vm->mapped_buffers, offset);
2010         if (!mapped_buffer) {
2011                 mutex_unlock(&vm->update_gmmu_lock);
2012                 gk20a_err(d, "invalid addr to unmap 0x%llx", offset);
2013                 return;
2014         }
2015         kref_put(&mapped_buffer->ref, gk20a_vm_unmap_locked_kref);
2016         mutex_unlock(&vm->update_gmmu_lock);
2017 }
2018
2019 static void gk20a_vm_remove_support(struct vm_gk20a *vm)
2020 {
2021         struct gk20a *g = vm->mm->g;
2022         struct mapped_buffer_node *mapped_buffer;
2023         struct vm_reserved_va_node *va_node, *va_node_tmp;
2024         struct rb_node *node;
2025         int i;
2026
2027         gk20a_dbg_fn("");
2028         mutex_lock(&vm->update_gmmu_lock);
2029
2030         /* TBD: add a flag here for the unmap code to recognize teardown
2031          * and short-circuit any otherwise expensive operations. */
2032
2033         node = rb_first(&vm->mapped_buffers);
2034         while (node) {
2035                 mapped_buffer =
2036                         container_of(node, struct mapped_buffer_node, node);
2037                 gk20a_vm_unmap_locked(mapped_buffer);
2038                 node = rb_first(&vm->mapped_buffers);
2039         }
2040
2041         /* destroy remaining reserved memory areas */
2042         list_for_each_entry_safe(va_node, va_node_tmp, &vm->reserved_va_list,
2043                 reserved_va_list) {
2044                 list_del(&va_node->reserved_va_list);
2045                 kfree(va_node);
2046         }
2047
2048         /* unmapping all buffers above may not actually free
2049          * all vm ptes.  jettison them here for certain... */
2050         for (i = 0; i < vm->pdes.num_pdes; i++) {
2051                 struct page_table_gk20a *pte =
2052                         &vm->pdes.ptes[gmmu_page_size_small][i];
2053                 if (pte->ref) {
2054                         free_gmmu_pages(vm, pte->ref, pte->sgt,
2055                                 vm->mm->page_table_sizing[gmmu_page_size_small].order,
2056                                 pte->size);
2057                         pte->ref = NULL;
2058                 }
2059                 pte = &vm->pdes.ptes[gmmu_page_size_big][i];
2060                 if (pte->ref) {
2061                         free_gmmu_pages(vm, pte->ref, pte->sgt,
2062                                 vm->mm->page_table_sizing[gmmu_page_size_big].order,
2063                                 pte->size);
2064                         pte->ref = NULL;
2065                 }
2066         }
2067
2068         unmap_gmmu_pages(vm->pdes.ref, vm->pdes.sgt, vm->pdes.kv);
2069         free_gmmu_pages(vm, vm->pdes.ref, vm->pdes.sgt, 0, vm->pdes.size);
2070
2071         kfree(vm->pdes.ptes[gmmu_page_size_small]);
2072         kfree(vm->pdes.ptes[gmmu_page_size_big]);
2073         gk20a_allocator_destroy(&vm->vma[gmmu_page_size_small]);
2074         gk20a_allocator_destroy(&vm->vma[gmmu_page_size_big]);
2075
2076         mutex_unlock(&vm->update_gmmu_lock);
2077
2078         /* release zero page if used */
2079         if (vm->zero_page_cpuva)
2080                 dma_free_coherent(&g->dev->dev, vm->mm->big_page_size,
2081                                   vm->zero_page_cpuva, vm->zero_page_iova);
2082
2083         /* vm is not used anymore. release it. */
2084         kfree(vm);
2085 }
2086
2087 static void gk20a_vm_remove_support_kref(struct kref *ref)
2088 {
2089         struct vm_gk20a *vm = container_of(ref, struct vm_gk20a, ref);
2090         gk20a_vm_remove_support(vm);
2091 }
2092
2093 void gk20a_vm_get(struct vm_gk20a *vm)
2094 {
2095         kref_get(&vm->ref);
2096 }
2097
2098 void gk20a_vm_put(struct vm_gk20a *vm)
2099 {
2100         kref_put(&vm->ref, gk20a_vm_remove_support_kref);
2101 }
2102
2103 /* address space interfaces for the gk20a module */
2104 int gk20a_vm_alloc_share(struct gk20a_as_share *as_share)
2105 {
2106         struct gk20a_as *as = as_share->as;
2107         struct gk20a *g = gk20a_from_as(as);
2108         struct mm_gk20a *mm = &g->mm;
2109         struct vm_gk20a *vm;
2110         u64 vma_size;
2111         u32 num_pages, low_hole_pages;
2112         char name[32];
2113         int err;
2114
2115         gk20a_dbg_fn("");
2116
2117         vm = kzalloc(sizeof(*vm), GFP_KERNEL);
2118         if (!vm)
2119                 return -ENOMEM;
2120
2121         as_share->vm = vm;
2122
2123         vm->mm = mm;
2124         vm->as_share = as_share;
2125
2126         vm->big_pages = true;
2127
2128         vm->va_start  = mm->pde_stride;   /* create a one pde hole */
2129         vm->va_limit  = mm->channel.size; /* note this means channel.size is
2130                                              really just the max */
2131         {
2132                 u32 pde_lo, pde_hi;
2133                 pde_range_from_vaddr_range(vm,
2134                                            0, vm->va_limit-1,
2135                                            &pde_lo, &pde_hi);
2136                 vm->pdes.num_pdes = pde_hi + 1;
2137         }
2138
2139         vm->pdes.ptes[gmmu_page_size_small] =
2140                 kzalloc(sizeof(struct page_table_gk20a) *
2141                         vm->pdes.num_pdes, GFP_KERNEL);
2142
2143         vm->pdes.ptes[gmmu_page_size_big] =
2144                 kzalloc(sizeof(struct page_table_gk20a) *
2145                         vm->pdes.num_pdes, GFP_KERNEL);
2146
2147         if (!(vm->pdes.ptes[gmmu_page_size_small] &&
2148               vm->pdes.ptes[gmmu_page_size_big]))
2149                 return -ENOMEM;
2150
2151         gk20a_dbg_info("init space for va_limit=0x%llx num_pdes=%d",
2152                    vm->va_limit, vm->pdes.num_pdes);
2153
2154         /* allocate the page table directory */
2155         err = alloc_gmmu_pages(vm, 0, &vm->pdes.ref,
2156                                &vm->pdes.sgt, &vm->pdes.size);
2157         if (err)
2158                 return -ENOMEM;
2159
2160         err = map_gmmu_pages(vm->pdes.ref, vm->pdes.sgt, &vm->pdes.kv,
2161                              vm->pdes.size);
2162         if (err) {
2163                 free_gmmu_pages(vm, vm->pdes.ref, vm->pdes.sgt, 0,
2164                                         vm->pdes.size);
2165                 return -ENOMEM;
2166         }
2167         gk20a_dbg(gpu_dbg_pte, "pdes.kv = 0x%p, pdes.phys = 0x%llx",
2168                         vm->pdes.kv,
2169                         gk20a_mm_iova_addr(vm->pdes.sgt->sgl));
2170         /* we could release vm->pdes.kv but it's only one page... */
2171
2172
2173         /* low-half: alloc small pages */
2174         /* high-half: alloc big pages */
2175         vma_size = mm->channel.size >> 1;
2176
2177         snprintf(name, sizeof(name), "gk20a_as_%d-%dKB", as_share->id,
2178                  gmmu_page_sizes[gmmu_page_size_small]>>10);
2179         num_pages = (u32)(vma_size >> gmmu_page_shifts[gmmu_page_size_small]);
2180
2181         /* num_pages above is without regard to the low-side hole. */
2182         low_hole_pages = (vm->va_start >>
2183                           gmmu_page_shifts[gmmu_page_size_small]);
2184
2185         gk20a_allocator_init(&vm->vma[gmmu_page_size_small], name,
2186               low_hole_pages,             /* start */
2187               num_pages - low_hole_pages, /* length */
2188               1);                         /* align */
2189
2190         snprintf(name, sizeof(name), "gk20a_as_%d-%dKB", as_share->id,
2191                  gmmu_page_sizes[gmmu_page_size_big]>>10);
2192
2193         num_pages = (u32)(vma_size >> gmmu_page_shifts[gmmu_page_size_big]);
2194         gk20a_allocator_init(&vm->vma[gmmu_page_size_big], name,
2195                               num_pages, /* start */
2196                               num_pages, /* length */
2197                               1); /* align */
2198
2199         vm->mapped_buffers = RB_ROOT;
2200
2201         mutex_init(&vm->update_gmmu_lock);
2202         kref_init(&vm->ref);
2203         INIT_LIST_HEAD(&vm->reserved_va_list);
2204
2205         vm->enable_ctag = true;
2206
2207         return 0;
2208 }
2209
2210
2211 int gk20a_vm_release_share(struct gk20a_as_share *as_share)
2212 {
2213         struct vm_gk20a *vm = as_share->vm;
2214
2215         gk20a_dbg_fn("");
2216
2217         vm->as_share = NULL;
2218
2219         /* put as reference to vm */
2220         gk20a_vm_put(vm);
2221
2222         as_share->vm = NULL;
2223
2224         return 0;
2225 }
2226
2227
2228 int gk20a_vm_alloc_space(struct gk20a_as_share *as_share,
2229                          struct nvhost_as_alloc_space_args *args)
2230
2231 {       int err = -ENOMEM;
2232         int pgsz_idx;
2233         u32 start_page_nr;
2234         struct gk20a_allocator *vma;
2235         struct vm_gk20a *vm = as_share->vm;
2236         struct vm_reserved_va_node *va_node;
2237         u64 vaddr_start = 0;
2238
2239         gk20a_dbg_fn("flags=0x%x pgsz=0x%x nr_pages=0x%x o/a=0x%llx",
2240                         args->flags, args->page_size, args->pages,
2241                         args->o_a.offset);
2242
2243         /* determine pagesz idx */
2244         for (pgsz_idx = gmmu_page_size_small;
2245              pgsz_idx < gmmu_nr_page_sizes;
2246              pgsz_idx++) {
2247                 if (gmmu_page_sizes[pgsz_idx] == args->page_size)
2248                         break;
2249         }
2250
2251         if (pgsz_idx >= gmmu_nr_page_sizes) {
2252                 err = -EINVAL;
2253                 goto clean_up;
2254         }
2255
2256         va_node = kzalloc(sizeof(*va_node), GFP_KERNEL);
2257         if (!va_node) {
2258                 err = -ENOMEM;
2259                 goto clean_up;
2260         }
2261
2262         if (args->flags & NVHOST_AS_ALLOC_SPACE_FLAGS_SPARSE &&
2263             pgsz_idx != gmmu_page_size_big) {
2264                 err = -ENOSYS;
2265                 kfree(va_node);
2266                 goto clean_up;
2267         }
2268
2269         start_page_nr = 0;
2270         if (args->flags & NVHOST_AS_ALLOC_SPACE_FLAGS_FIXED_OFFSET)
2271                 start_page_nr = (u32)(args->o_a.offset >>
2272                                       gmmu_page_shifts[pgsz_idx]);
2273
2274         vma = &vm->vma[pgsz_idx];
2275         err = vma->alloc(vma, &start_page_nr, args->pages);
2276         if (err) {
2277                 kfree(va_node);
2278                 goto clean_up;
2279         }
2280
2281         vaddr_start = (u64)start_page_nr << gmmu_page_shifts[pgsz_idx];
2282
2283         va_node->vaddr_start = vaddr_start;
2284         va_node->size = (u64)args->page_size * (u64)args->pages;
2285         va_node->pgsz_idx = args->page_size;
2286         INIT_LIST_HEAD(&va_node->va_buffers_list);
2287         INIT_LIST_HEAD(&va_node->reserved_va_list);
2288
2289         mutex_lock(&vm->update_gmmu_lock);
2290
2291         /* mark that we need to use sparse mappings here */
2292         if (args->flags & NVHOST_AS_ALLOC_SPACE_FLAGS_SPARSE) {
2293                 err = gk20a_vm_put_empty(vm, vaddr_start, args->pages,
2294                                          pgsz_idx);
2295                 if (err) {
2296                         mutex_unlock(&vm->update_gmmu_lock);
2297                         vma->free(vma, start_page_nr, args->pages);
2298                         kfree(va_node);
2299                         goto clean_up;
2300                 }
2301
2302                 va_node->sparse = true;
2303         }
2304
2305         list_add_tail(&va_node->reserved_va_list, &vm->reserved_va_list);
2306
2307         mutex_unlock(&vm->update_gmmu_lock);
2308
2309         args->o_a.offset = vaddr_start;
2310
2311 clean_up:
2312         return err;
2313 }
2314
2315 int gk20a_vm_free_space(struct gk20a_as_share *as_share,
2316                         struct nvhost_as_free_space_args *args)
2317 {
2318         int err = -ENOMEM;
2319         int pgsz_idx;
2320         u32 start_page_nr;
2321         struct gk20a_allocator *vma;
2322         struct vm_gk20a *vm = as_share->vm;
2323         struct vm_reserved_va_node *va_node;
2324
2325         gk20a_dbg_fn("pgsz=0x%x nr_pages=0x%x o/a=0x%llx", args->page_size,
2326                         args->pages, args->offset);
2327
2328         /* determine pagesz idx */
2329         for (pgsz_idx = gmmu_page_size_small;
2330              pgsz_idx < gmmu_nr_page_sizes;
2331              pgsz_idx++) {
2332                 if (gmmu_page_sizes[pgsz_idx] == args->page_size)
2333                         break;
2334         }
2335
2336         if (pgsz_idx >= gmmu_nr_page_sizes) {
2337                 err = -EINVAL;
2338                 goto clean_up;
2339         }
2340
2341         start_page_nr = (u32)(args->offset >>
2342                               gmmu_page_shifts[pgsz_idx]);
2343
2344         vma = &vm->vma[pgsz_idx];
2345         err = vma->free(vma, start_page_nr, args->pages);
2346
2347         if (err)
2348                 goto clean_up;
2349
2350         mutex_lock(&vm->update_gmmu_lock);
2351         va_node = addr_to_reservation(vm, args->offset);
2352         if (va_node) {
2353                 struct mapped_buffer_node *buffer;
2354
2355                 /* there is no need to unallocate the buffers in va. Just
2356                  * convert them into normal buffers */
2357
2358                 list_for_each_entry(buffer,
2359                         &va_node->va_buffers_list, va_buffers_list)
2360                         list_del_init(&buffer->va_buffers_list);
2361
2362                 list_del(&va_node->reserved_va_list);
2363
2364                 /* if this was a sparse mapping, free the va */
2365                 if (va_node->sparse)
2366                         __locked_gmmu_unmap(vm,
2367                                 va_node->vaddr_start,
2368                                 va_node->size,
2369                                 va_node->pgsz_idx,
2370                                 false,
2371                                 gk20a_mem_flag_none);
2372                 kfree(va_node);
2373         }
2374         mutex_unlock(&vm->update_gmmu_lock);
2375
2376 clean_up:
2377         return err;
2378 }
2379
2380 int gk20a_vm_bind_channel(struct gk20a_as_share *as_share,
2381                           struct channel_gk20a *ch)
2382 {
2383         int err = 0;
2384         struct vm_gk20a *vm = as_share->vm;
2385
2386         gk20a_dbg_fn("");
2387
2388         ch->vm = vm;
2389         err = channel_gk20a_commit_va(ch);
2390         if (err)
2391                 ch->vm = 0;
2392
2393         return err;
2394 }
2395
2396 int gk20a_dmabuf_alloc_drvdata(struct dma_buf *dmabuf, struct device *dev)
2397 {
2398         struct gk20a_dmabuf_priv *priv;
2399         static DEFINE_MUTEX(priv_lock);
2400
2401         priv = dma_buf_get_drvdata(dmabuf, dev);
2402         if (likely(priv))
2403                 return 0;
2404
2405         mutex_lock(&priv_lock);
2406         priv = dma_buf_get_drvdata(dmabuf, dev);
2407         if (priv)
2408                 goto priv_exist_or_err;
2409         priv = kzalloc(sizeof(*priv), GFP_KERNEL);
2410         if (!priv) {
2411                 priv = ERR_PTR(-ENOMEM);
2412                 goto priv_exist_or_err;
2413         }
2414         mutex_init(&priv->lock);
2415         dma_buf_set_drvdata(dmabuf, dev, priv, gk20a_mm_delete_priv);
2416 priv_exist_or_err:
2417         mutex_unlock(&priv_lock);
2418         if (IS_ERR(priv))
2419                 return -ENOMEM;
2420
2421         return 0;
2422 }
2423
2424
2425 static int gk20a_dmabuf_get_kind(struct dma_buf *dmabuf)
2426 {
2427         int kind = 0;
2428 #ifdef CONFIG_TEGRA_NVMAP
2429         int err;
2430         u64 nvmap_param;
2431
2432         err = nvmap_get_dmabuf_param(dmabuf, NVMAP_HANDLE_PARAM_KIND,
2433                                      &nvmap_param);
2434         kind = err ? kind : nvmap_param;
2435 #endif
2436         return kind;
2437 }
2438
2439 int gk20a_vm_map_buffer(struct gk20a_as_share *as_share,
2440                         int dmabuf_fd,
2441                         u64 *offset_align,
2442                         u32 flags, /*NVHOST_AS_MAP_BUFFER_FLAGS_*/
2443                         int kind)
2444 {
2445         int err = 0;
2446         struct vm_gk20a *vm = as_share->vm;
2447         struct dma_buf *dmabuf;
2448         u64 ret_va;
2449
2450         gk20a_dbg_fn("");
2451
2452         /* get ref to the mem handle (released on unmap_locked) */
2453         dmabuf = dma_buf_get(dmabuf_fd);
2454         if (!dmabuf)
2455                 return 0;
2456
2457         err = gk20a_dmabuf_alloc_drvdata(dmabuf, dev_from_vm(vm));
2458         if (err) {
2459                 dma_buf_put(dmabuf);
2460                 return err;
2461         }
2462
2463         if (kind == -1)
2464                 kind = gk20a_dmabuf_get_kind(dmabuf);
2465
2466         ret_va = gk20a_vm_map(vm, dmabuf, *offset_align,
2467                         flags, kind, NULL, true,
2468                         gk20a_mem_flag_none);
2469         *offset_align = ret_va;
2470         if (!ret_va) {
2471                 dma_buf_put(dmabuf);
2472                 err = -EINVAL;
2473         }
2474
2475         return err;
2476 }
2477
2478 int gk20a_vm_unmap_buffer(struct gk20a_as_share *as_share, u64 offset)
2479 {
2480         struct vm_gk20a *vm = as_share->vm;
2481
2482         gk20a_dbg_fn("");
2483
2484         gk20a_vm_unmap_user(vm, offset);
2485         return 0;
2486 }
2487
2488 int gk20a_init_bar1_vm(struct mm_gk20a *mm)
2489 {
2490         int err;
2491         phys_addr_t inst_pa;
2492         void *inst_ptr;
2493         struct vm_gk20a *vm = &mm->bar1.vm;
2494         struct gk20a *g = gk20a_from_mm(mm);
2495         struct device *d = dev_from_gk20a(g);
2496         struct inst_desc *inst_block = &mm->bar1.inst_block;
2497         u64 pde_addr;
2498         u32 pde_addr_lo;
2499         u32 pde_addr_hi;
2500         dma_addr_t iova;
2501
2502         vm->mm = mm;
2503
2504         mm->bar1.aperture_size = bar1_aperture_size_mb_gk20a() << 20;
2505
2506         gk20a_dbg_info("bar1 vm size = 0x%x", mm->bar1.aperture_size);
2507
2508         vm->va_start = mm->pde_stride * 1;
2509         vm->va_limit = mm->bar1.aperture_size;
2510
2511         {
2512                 u32 pde_lo, pde_hi;
2513                 pde_range_from_vaddr_range(vm,
2514                                            0, vm->va_limit-1,
2515                                            &pde_lo, &pde_hi);
2516                 vm->pdes.num_pdes = pde_hi + 1;
2517         }
2518
2519         /* bar1 is likely only to ever use/need small page sizes. */
2520         /* But just in case, for now... arrange for both.*/
2521         vm->pdes.ptes[gmmu_page_size_small] =
2522                 kzalloc(sizeof(struct page_table_gk20a) *
2523                         vm->pdes.num_pdes, GFP_KERNEL);
2524
2525         vm->pdes.ptes[gmmu_page_size_big] =
2526                 kzalloc(sizeof(struct page_table_gk20a) *
2527                         vm->pdes.num_pdes, GFP_KERNEL);
2528
2529         if (!(vm->pdes.ptes[gmmu_page_size_small] &&
2530               vm->pdes.ptes[gmmu_page_size_big]))
2531                 return -ENOMEM;
2532
2533         gk20a_dbg_info("init space for bar1 va_limit=0x%llx num_pdes=%d",
2534                    vm->va_limit, vm->pdes.num_pdes);
2535
2536
2537         /* allocate the page table directory */
2538         err = alloc_gmmu_pages(vm, 0, &vm->pdes.ref,
2539                                &vm->pdes.sgt, &vm->pdes.size);
2540         if (err)
2541                 goto clean_up;
2542
2543         err = map_gmmu_pages(vm->pdes.ref, vm->pdes.sgt, &vm->pdes.kv,
2544                              vm->pdes.size);
2545         if (err) {
2546                 free_gmmu_pages(vm, vm->pdes.ref, vm->pdes.sgt, 0,
2547                                         vm->pdes.size);
2548                 goto clean_up;
2549         }
2550         gk20a_dbg(gpu_dbg_pte, "bar 1 pdes.kv = 0x%p, pdes.phys = 0x%llx",
2551                         vm->pdes.kv, gk20a_mm_iova_addr(vm->pdes.sgt->sgl));
2552         /* we could release vm->pdes.kv but it's only one page... */
2553
2554         pde_addr = gk20a_mm_iova_addr(vm->pdes.sgt->sgl);
2555         pde_addr_lo = u64_lo32(pde_addr >> 12);
2556         pde_addr_hi = u64_hi32(pde_addr);
2557
2558         gk20a_dbg_info("pde pa=0x%llx pde_addr_lo=0x%x pde_addr_hi=0x%x",
2559                 (u64)gk20a_mm_iova_addr(vm->pdes.sgt->sgl),
2560                 pde_addr_lo, pde_addr_hi);
2561
2562         /* allocate instance mem for bar1 */
2563         inst_block->size = ram_in_alloc_size_v();
2564         inst_block->cpuva = dma_alloc_coherent(d, inst_block->size,
2565                                 &iova, GFP_KERNEL);
2566         if (!inst_block->cpuva) {
2567                 gk20a_err(d, "%s: memory allocation failed\n", __func__);
2568                 err = -ENOMEM;
2569                 goto clean_up;
2570         }
2571
2572         inst_block->iova = iova;
2573         inst_block->cpu_pa = gk20a_get_phys_from_iova(d, inst_block->iova);
2574         if (!inst_block->cpu_pa) {
2575                 gk20a_err(d, "%s: failed to get phys address\n", __func__);
2576                 err = -ENOMEM;
2577                 goto clean_up;
2578         }
2579
2580         inst_pa = inst_block->cpu_pa;
2581         inst_ptr = inst_block->cpuva;
2582
2583         gk20a_dbg_info("bar1 inst block physical phys = 0x%llx, kv = 0x%p",
2584                 (u64)inst_pa, inst_ptr);
2585
2586         memset(inst_ptr, 0, ram_fc_size_val_v());
2587
2588         gk20a_mem_wr32(inst_ptr, ram_in_page_dir_base_lo_w(),
2589                 ram_in_page_dir_base_target_vid_mem_f() |
2590                 ram_in_page_dir_base_vol_true_f() |
2591                 ram_in_page_dir_base_lo_f(pde_addr_lo));
2592
2593         gk20a_mem_wr32(inst_ptr, ram_in_page_dir_base_hi_w(),
2594                 ram_in_page_dir_base_hi_f(pde_addr_hi));
2595
2596         gk20a_mem_wr32(inst_ptr, ram_in_adr_limit_lo_w(),
2597                  u64_lo32(vm->va_limit) | 0xFFF);
2598
2599         gk20a_mem_wr32(inst_ptr, ram_in_adr_limit_hi_w(),
2600                 ram_in_adr_limit_hi_f(u64_hi32(vm->va_limit)));
2601
2602         gk20a_dbg_info("bar1 inst block ptr: %08llx",  (u64)inst_pa);
2603         gk20a_allocator_init(&vm->vma[gmmu_page_size_small], "gk20a_bar1",
2604                               1,/*start*/
2605                               (vm->va_limit >> 12) - 1 /* length*/,
2606                               1); /* align */
2607         /* initialize just in case we try to use it anyway */
2608         gk20a_allocator_init(&vm->vma[gmmu_page_size_big], "gk20a_bar1-unused",
2609                               0x0badc0de, /* start */
2610                               1, /* length */
2611                               1); /* align */
2612
2613         vm->mapped_buffers = RB_ROOT;
2614
2615         mutex_init(&vm->update_gmmu_lock);
2616         kref_init(&vm->ref);
2617         INIT_LIST_HEAD(&vm->reserved_va_list);
2618
2619         return 0;
2620
2621 clean_up:
2622         /* free, etc */
2623         if (inst_block->cpuva)
2624                 dma_free_coherent(d, inst_block->size,
2625                         inst_block->cpuva, inst_block->iova);
2626         inst_block->cpuva = NULL;
2627         inst_block->iova = 0;
2628         return err;
2629 }
2630
2631 /* pmu vm, share channel_vm interfaces */
2632 int gk20a_init_pmu_vm(struct mm_gk20a *mm)
2633 {
2634         int err;
2635         phys_addr_t inst_pa;
2636         void *inst_ptr;
2637         struct vm_gk20a *vm = &mm->pmu.vm;
2638         struct gk20a *g = gk20a_from_mm(mm);
2639         struct device *d = dev_from_gk20a(g);
2640         struct inst_desc *inst_block = &mm->pmu.inst_block;
2641         u64 pde_addr;
2642         u32 pde_addr_lo;
2643         u32 pde_addr_hi;
2644         dma_addr_t iova;
2645
2646         vm->mm = mm;
2647
2648         mm->pmu.aperture_size = GK20A_PMU_VA_SIZE;
2649
2650         gk20a_dbg_info("pmu vm size = 0x%x", mm->pmu.aperture_size);
2651
2652         vm->va_start  = GK20A_PMU_VA_START;
2653         vm->va_limit  = vm->va_start + mm->pmu.aperture_size;
2654
2655         {
2656                 u32 pde_lo, pde_hi;
2657                 pde_range_from_vaddr_range(vm,
2658                                            0, vm->va_limit-1,
2659                                            &pde_lo, &pde_hi);
2660                 vm->pdes.num_pdes = pde_hi + 1;
2661         }
2662
2663         /* The pmu is likely only to ever use/need small page sizes. */
2664         /* But just in case, for now... arrange for both.*/
2665         vm->pdes.ptes[gmmu_page_size_small] =
2666                 kzalloc(sizeof(struct page_table_gk20a) *
2667                         vm->pdes.num_pdes, GFP_KERNEL);
2668
2669         vm->pdes.ptes[gmmu_page_size_big] =
2670                 kzalloc(sizeof(struct page_table_gk20a) *
2671                         vm->pdes.num_pdes, GFP_KERNEL);
2672
2673         if (!(vm->pdes.ptes[gmmu_page_size_small] &&
2674               vm->pdes.ptes[gmmu_page_size_big]))
2675                 return -ENOMEM;
2676
2677         gk20a_dbg_info("init space for pmu va_limit=0x%llx num_pdes=%d",
2678                    vm->va_limit, vm->pdes.num_pdes);
2679
2680         /* allocate the page table directory */
2681         err = alloc_gmmu_pages(vm, 0, &vm->pdes.ref,
2682                                &vm->pdes.sgt, &vm->pdes.size);
2683         if (err)
2684                 goto clean_up;
2685
2686         err = map_gmmu_pages(vm->pdes.ref, vm->pdes.sgt, &vm->pdes.kv,
2687                              vm->pdes.size);
2688         if (err) {
2689                 free_gmmu_pages(vm, vm->pdes.ref, vm->pdes.sgt, 0,
2690                                         vm->pdes.size);
2691                 goto clean_up;
2692         }
2693         gk20a_dbg_info("pmu pdes phys @ 0x%llx",
2694                         (u64)gk20a_mm_iova_addr(vm->pdes.sgt->sgl));
2695         /* we could release vm->pdes.kv but it's only one page... */
2696
2697         pde_addr = gk20a_mm_iova_addr(vm->pdes.sgt->sgl);
2698         pde_addr_lo = u64_lo32(pde_addr >> 12);
2699         pde_addr_hi = u64_hi32(pde_addr);
2700
2701         gk20a_dbg_info("pde pa=0x%llx pde_addr_lo=0x%x pde_addr_hi=0x%x",
2702                         (u64)pde_addr, pde_addr_lo, pde_addr_hi);
2703
2704         /* allocate instance mem for pmu */
2705         inst_block->size = GK20A_PMU_INST_SIZE;
2706         inst_block->cpuva = dma_alloc_coherent(d, inst_block->size,
2707                                 &iova, GFP_KERNEL);
2708         if (!inst_block->cpuva) {
2709                 gk20a_err(d, "%s: memory allocation failed\n", __func__);
2710                 err = -ENOMEM;
2711                 goto clean_up;
2712         }
2713
2714         inst_block->iova = iova;
2715         inst_block->cpu_pa = gk20a_get_phys_from_iova(d, inst_block->iova);
2716         if (!inst_block->cpu_pa) {
2717                 gk20a_err(d, "%s: failed to get phys address\n", __func__);
2718                 err = -ENOMEM;
2719                 goto clean_up;
2720         }
2721
2722         inst_pa = inst_block->cpu_pa;
2723         inst_ptr = inst_block->cpuva;
2724
2725         gk20a_dbg_info("pmu inst block physical addr: 0x%llx", (u64)inst_pa);
2726
2727         memset(inst_ptr, 0, GK20A_PMU_INST_SIZE);
2728
2729         gk20a_mem_wr32(inst_ptr, ram_in_page_dir_base_lo_w(),
2730                 ram_in_page_dir_base_target_vid_mem_f() |
2731                 ram_in_page_dir_base_vol_true_f() |
2732                 ram_in_page_dir_base_lo_f(pde_addr_lo));
2733
2734         gk20a_mem_wr32(inst_ptr, ram_in_page_dir_base_hi_w(),
2735                 ram_in_page_dir_base_hi_f(pde_addr_hi));
2736
2737         gk20a_mem_wr32(inst_ptr, ram_in_adr_limit_lo_w(),
2738                  u64_lo32(vm->va_limit) | 0xFFF);
2739
2740         gk20a_mem_wr32(inst_ptr, ram_in_adr_limit_hi_w(),
2741                 ram_in_adr_limit_hi_f(u64_hi32(vm->va_limit)));
2742
2743         gk20a_allocator_init(&vm->vma[gmmu_page_size_small], "gk20a_pmu",
2744                               (vm->va_start >> 12), /* start */
2745                               (vm->va_limit - vm->va_start) >> 12, /*length*/
2746                               1); /* align */
2747         /* initialize just in case we try to use it anyway */
2748         gk20a_allocator_init(&vm->vma[gmmu_page_size_big], "gk20a_pmu-unused",
2749                               0x0badc0de, /* start */
2750                               1, /* length */
2751                               1); /* align */
2752
2753
2754         vm->mapped_buffers = RB_ROOT;
2755
2756         mutex_init(&vm->update_gmmu_lock);
2757         kref_init(&vm->ref);
2758         INIT_LIST_HEAD(&vm->reserved_va_list);
2759
2760         return 0;
2761
2762 clean_up:
2763         /* free, etc */
2764         if (inst_block->cpuva)
2765                 dma_free_coherent(d, inst_block->size,
2766                         inst_block->cpuva, inst_block->iova);
2767         inst_block->cpuva = NULL;
2768         inst_block->iova = 0;
2769         return err;
2770 }
2771
2772 int gk20a_mm_fb_flush(struct gk20a *g)
2773 {
2774         struct mm_gk20a *mm = &g->mm;
2775         u32 data;
2776         s32 retry = 100;
2777         int ret = 0;
2778
2779         gk20a_dbg_fn("");
2780
2781         mutex_lock(&mm->l2_op_lock);
2782
2783         g->ops.ltc.elpg_flush(g);
2784
2785         /* Make sure all previous writes are committed to the L2. There's no
2786            guarantee that writes are to DRAM. This will be a sysmembar internal
2787            to the L2. */
2788         gk20a_writel(g, flush_fb_flush_r(),
2789                 flush_fb_flush_pending_busy_f());
2790
2791         do {
2792                 data = gk20a_readl(g, flush_fb_flush_r());
2793
2794                 if (flush_fb_flush_outstanding_v(data) ==
2795                         flush_fb_flush_outstanding_true_v() ||
2796                     flush_fb_flush_pending_v(data) ==
2797                         flush_fb_flush_pending_busy_v()) {
2798                                 gk20a_dbg_info("fb_flush 0x%x", data);
2799                                 retry--;
2800                                 usleep_range(20, 40);
2801                 } else
2802                         break;
2803         } while (retry >= 0 || !tegra_platform_is_silicon());
2804
2805         if (retry < 0) {
2806                 gk20a_warn(dev_from_gk20a(g),
2807                         "fb_flush too many retries");
2808                 ret = -EBUSY;
2809         }
2810
2811         mutex_unlock(&mm->l2_op_lock);
2812
2813         return ret;
2814 }
2815
2816 static void gk20a_mm_l2_invalidate_locked(struct gk20a *g)
2817 {
2818         u32 data;
2819         s32 retry = 200;
2820
2821         /* Invalidate any clean lines from the L2 so subsequent reads go to
2822            DRAM. Dirty lines are not affected by this operation. */
2823         gk20a_writel(g, flush_l2_system_invalidate_r(),
2824                 flush_l2_system_invalidate_pending_busy_f());
2825
2826         do {
2827                 data = gk20a_readl(g, flush_l2_system_invalidate_r());
2828
2829                 if (flush_l2_system_invalidate_outstanding_v(data) ==
2830                         flush_l2_system_invalidate_outstanding_true_v() ||
2831                     flush_l2_system_invalidate_pending_v(data) ==
2832                         flush_l2_system_invalidate_pending_busy_v()) {
2833                                 gk20a_dbg_info("l2_system_invalidate 0x%x",
2834                                                 data);
2835                                 retry--;
2836                                 usleep_range(20, 40);
2837                 } else
2838                         break;
2839         } while (retry >= 0 || !tegra_platform_is_silicon());
2840
2841         if (retry < 0)
2842                 gk20a_warn(dev_from_gk20a(g),
2843                         "l2_system_invalidate too many retries");
2844 }
2845
2846 void gk20a_mm_l2_invalidate(struct gk20a *g)
2847 {
2848         struct mm_gk20a *mm = &g->mm;
2849         mutex_lock(&mm->l2_op_lock);
2850         gk20a_mm_l2_invalidate_locked(g);
2851         mutex_unlock(&mm->l2_op_lock);
2852 }
2853
2854 void gk20a_mm_l2_flush(struct gk20a *g, bool invalidate)
2855 {
2856         struct mm_gk20a *mm = &g->mm;
2857         u32 data;
2858         s32 retry = 200;
2859
2860         gk20a_dbg_fn("");
2861
2862         mutex_lock(&mm->l2_op_lock);
2863
2864         /* Flush all dirty lines from the L2 to DRAM. Lines are left in the L2
2865            as clean, so subsequent reads might hit in the L2. */
2866         gk20a_writel(g, flush_l2_flush_dirty_r(),
2867                 flush_l2_flush_dirty_pending_busy_f());
2868
2869         do {
2870                 data = gk20a_readl(g, flush_l2_flush_dirty_r());
2871
2872                 if (flush_l2_flush_dirty_outstanding_v(data) ==
2873                         flush_l2_flush_dirty_outstanding_true_v() ||
2874                     flush_l2_flush_dirty_pending_v(data) ==
2875                         flush_l2_flush_dirty_pending_busy_v()) {
2876                                 gk20a_dbg_info("l2_flush_dirty 0x%x", data);
2877                                 retry--;
2878                                 usleep_range(20, 40);
2879                 } else
2880                         break;
2881         } while (retry >= 0 || !tegra_platform_is_silicon());
2882
2883         if (retry < 0)
2884                 gk20a_warn(dev_from_gk20a(g),
2885                         "l2_flush_dirty too many retries");
2886
2887         if (invalidate)
2888                 gk20a_mm_l2_invalidate_locked(g);
2889
2890         mutex_unlock(&mm->l2_op_lock);
2891 }
2892
2893
2894 int gk20a_vm_find_buffer(struct vm_gk20a *vm, u64 gpu_va,
2895                          struct dma_buf **dmabuf,
2896                          u64 *offset)
2897 {
2898         struct mapped_buffer_node *mapped_buffer;
2899
2900         gk20a_dbg_fn("gpu_va=0x%llx", gpu_va);
2901
2902         mutex_lock(&vm->update_gmmu_lock);
2903
2904         mapped_buffer = find_mapped_buffer_range_locked(&vm->mapped_buffers,
2905                                                         gpu_va);
2906         if (!mapped_buffer) {
2907                 mutex_unlock(&vm->update_gmmu_lock);
2908                 return -EINVAL;
2909         }
2910
2911         *dmabuf = mapped_buffer->dmabuf;
2912         *offset = gpu_va - mapped_buffer->addr;
2913
2914         mutex_unlock(&vm->update_gmmu_lock);
2915
2916         return 0;
2917 }
2918
2919 void gk20a_mm_tlb_invalidate(struct vm_gk20a *vm)
2920 {
2921         struct gk20a *g = gk20a_from_vm(vm);
2922         u32 addr_lo = u64_lo32(gk20a_mm_iova_addr(vm->pdes.sgt->sgl) >> 12);
2923         u32 data;
2924         s32 retry = 200;
2925         static DEFINE_MUTEX(tlb_lock);
2926
2927         gk20a_dbg_fn("");
2928
2929         /* pagetables are considered sw states which are preserved after
2930            prepare_poweroff. When gk20a deinit releases those pagetables,
2931            common code in vm unmap path calls tlb invalidate that touches
2932            hw. Use the power_on flag to skip tlb invalidation when gpu
2933            power is turned off */
2934
2935         if (!g->power_on)
2936                 return;
2937
2938         /* No need to invalidate if tlb is clean */
2939         mutex_lock(&vm->update_gmmu_lock);
2940         if (!vm->tlb_dirty) {
2941                 mutex_unlock(&vm->update_gmmu_lock);
2942                 return;
2943         }
2944
2945         mutex_lock(&tlb_lock);
2946         do {
2947                 data = gk20a_readl(g, fb_mmu_ctrl_r());
2948                 if (fb_mmu_ctrl_pri_fifo_space_v(data) != 0)
2949                         break;
2950                 usleep_range(20, 40);
2951                 retry--;
2952         } while (retry >= 0 || !tegra_platform_is_silicon());
2953
2954         if (retry < 0) {
2955                 gk20a_warn(dev_from_gk20a(g),
2956                         "wait mmu fifo space too many retries");
2957                 goto out;
2958         }
2959
2960         gk20a_writel(g, fb_mmu_invalidate_pdb_r(),
2961                 fb_mmu_invalidate_pdb_addr_f(addr_lo) |
2962                 fb_mmu_invalidate_pdb_aperture_vid_mem_f());
2963
2964         gk20a_writel(g, fb_mmu_invalidate_r(),
2965                 fb_mmu_invalidate_all_va_true_f() |
2966                 fb_mmu_invalidate_trigger_true_f());
2967
2968         do {
2969                 data = gk20a_readl(g, fb_mmu_ctrl_r());
2970                 if (fb_mmu_ctrl_pri_fifo_empty_v(data) !=
2971                         fb_mmu_ctrl_pri_fifo_empty_false_f())
2972                         break;
2973                 retry--;
2974                 usleep_range(20, 40);
2975         } while (retry >= 0 || !tegra_platform_is_silicon());
2976
2977         if (retry < 0)
2978                 gk20a_warn(dev_from_gk20a(g),
2979                         "mmu invalidate too many retries");
2980
2981 out:
2982         mutex_unlock(&tlb_lock);
2983         vm->tlb_dirty = false;
2984         mutex_unlock(&vm->update_gmmu_lock);
2985 }
2986
2987 int gk20a_mm_suspend(struct gk20a *g)
2988 {
2989         gk20a_dbg_fn("");
2990
2991         gk20a_mm_fb_flush(g);
2992         gk20a_mm_l2_flush(g, true);
2993
2994         gk20a_dbg_fn("done");
2995         return 0;
2996 }
2997
2998 void gk20a_mm_ltc_isr(struct gk20a *g)
2999 {
3000         u32 intr;
3001
3002         intr = gk20a_readl(g, ltc_ltc0_ltss_intr_r());
3003         gk20a_err(dev_from_gk20a(g), "ltc: %08x\n", intr);
3004         gk20a_writel(g, ltc_ltc0_ltss_intr_r(), intr);
3005 }
3006
3007 bool gk20a_mm_mmu_debug_mode_enabled(struct gk20a *g)
3008 {
3009         u32 debug_ctrl = gk20a_readl(g, fb_mmu_debug_ctrl_r());
3010         return fb_mmu_debug_ctrl_debug_v(debug_ctrl) ==
3011                 fb_mmu_debug_ctrl_debug_enabled_v();
3012 }