2 * drivers/video/tegra/nvmap/nvmap.c
4 * Memory manager for Tegra GPU
6 * Copyright (c) 2009-2011, NVIDIA Corporation.
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
13 * This program is distributed in the hope that it will be useful, but WITHOUT
14 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
18 * You should have received a copy of the GNU General Public License along
19 * with this program; if not, write to the Free Software Foundation, Inc.,
20 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
23 #include <linux/err.h>
24 #include <linux/highmem.h>
26 #include <linux/rbtree.h>
27 #include <linux/vmalloc.h>
28 #include <linux/wait.h>
29 #include <linux/slab.h>
31 #include <asm/pgtable.h>
32 #include <asm/tlbflush.h>
34 #include <mach/iovmm.h>
35 #include <mach/nvmap.h>
38 #include "nvmap_mru.h"
40 /* private nvmap_handle flag for pinning duplicate detection */
41 #define NVMAP_HANDLE_VISITED (0x1ul << 31)
43 /* map the backing pages for a heap_pgalloc handle into its IOVMM area */
44 static void map_iovmm_area(struct nvmap_handle *h)
46 tegra_iovmm_addr_t va;
49 BUG_ON(!h->heap_pgalloc || !h->pgalloc.area);
50 BUG_ON(h->size & ~PAGE_MASK);
51 WARN_ON(!h->pgalloc.dirty);
53 for (va = h->pgalloc.area->iovm_start, i = 0;
54 va < (h->pgalloc.area->iovm_start + h->size);
55 i++, va += PAGE_SIZE) {
56 BUG_ON(!pfn_valid(page_to_pfn(h->pgalloc.pages[i])));
57 tegra_iovmm_vm_insert_pfn(h->pgalloc.area, va,
58 page_to_pfn(h->pgalloc.pages[i]));
60 h->pgalloc.dirty = false;
63 /* must be called inside nvmap_pin_lock, to ensure that an entire stream
64 * of pins will complete without racing with a second stream. handle should
65 * have nvmap_handle_get (or nvmap_validate_get) called before calling
67 static int pin_locked(struct nvmap_client *client, struct nvmap_handle *h)
69 struct tegra_iovmm_area *area;
72 if (atomic_inc_return(&h->pin) == 1) {
73 if (h->heap_pgalloc && !h->pgalloc.contig) {
74 area = nvmap_handle_iovmm(client, h);
76 /* no race here, inside the pin mutex */
80 if (area != h->pgalloc.area)
81 h->pgalloc.dirty = true;
82 h->pgalloc.area = area;
88 static int wait_pin_locked(struct nvmap_client *client, struct nvmap_handle *h)
92 ret = pin_locked(client, h);
95 ret = wait_event_interruptible(client->share->pin_wait,
96 !pin_locked(client, h));
99 return ret ? -EINTR : 0;
103 /* doesn't need to be called inside nvmap_pin_lock, since this will only
104 * expand the available VM area */
105 static int handle_unpin(struct nvmap_client *client, struct nvmap_handle *h)
109 nvmap_mru_lock(client->share);
111 if (atomic_read(&h->pin) == 0) {
112 nvmap_err(client, "%s unpinning unpinned handle %p\n",
113 current->group_leader->comm, h);
114 nvmap_mru_unlock(client->share);
120 if (!atomic_dec_return(&h->pin)) {
121 if (h->heap_pgalloc && h->pgalloc.area) {
122 /* if a secure handle is clean (i.e., mapped into
123 * IOVMM, it needs to be zapped on unpin. */
124 if (h->secure && !h->pgalloc.dirty) {
125 tegra_iovmm_zap_vm(h->pgalloc.area);
126 h->pgalloc.dirty = true;
128 nvmap_mru_insert_locked(client->share, h);
133 nvmap_mru_unlock(client->share);
139 static int handle_unpin_noref(struct nvmap_client *client, unsigned long id)
141 struct nvmap_handle *h;
144 h = nvmap_validate_get(client, id);
146 nvmap_err(client, "%s attempting to unpin invalid handle %p\n",
147 current->group_leader->comm, (void *)id);
151 nvmap_err(client, "%s unpinning unreferenced handle %p\n",
152 current->group_leader->comm, h);
155 w = handle_unpin(client, h);
160 void nvmap_unpin_ids(struct nvmap_client *client,
161 unsigned int nr, const unsigned long *ids)
166 for (i = 0; i < nr; i++) {
167 struct nvmap_handle_ref *ref;
172 nvmap_ref_lock(client);
173 ref = _nvmap_validate_id_locked(client, ids[i]);
175 struct nvmap_handle *h = ref->handle;
176 int e = atomic_add_unless(&ref->pin, -1, 0);
178 nvmap_ref_unlock(client);
181 nvmap_err(client, "%s unpinning unpinned "
183 current->group_leader->comm, ids[i]);
185 do_wake |= handle_unpin(client, h);
188 nvmap_ref_unlock(client);
190 do_wake |= handle_unpin_noref(client, ids[i]);
192 nvmap_err(client, "%s unpinning invalid "
194 current->group_leader->comm, ids[i]);
199 wake_up(&client->share->pin_wait);
202 /* pins a list of handle_ref objects; same conditions apply as to
203 * _nvmap_handle_pin, but also bumps the pin count of each handle_ref. */
204 int nvmap_pin_ids(struct nvmap_client *client,
205 unsigned int nr, const unsigned long *ids)
210 struct nvmap_handle **h = (struct nvmap_handle **)ids;
211 struct nvmap_handle_ref *ref;
213 /* to optimize for the common case (client provided valid handle
214 * references and the pin succeeds), increment the handle_ref pin
215 * count during validation. in error cases, the tree will need to
216 * be re-walked, since the handle_ref is discarded so that an
217 * allocation isn't required. if a handle_ref is not found,
218 * locally validate that the caller has permission to pin the handle;
219 * handle_refs are not created in this case, so it is possible that
220 * if the caller crashes after pinning a global handle, the handle
221 * will be permanently leaked. */
222 nvmap_ref_lock(client);
223 for (i = 0; i < nr && !ret; i++) {
224 ref = _nvmap_validate_id_locked(client, ids[i]);
226 atomic_inc(&ref->pin);
227 nvmap_handle_get(h[i]);
229 struct nvmap_handle *verify;
230 nvmap_ref_unlock(client);
231 verify = nvmap_validate_get(client, ids[i]);
233 nvmap_warn(client, "%s pinning unreferenced "
235 current->group_leader->comm, h[i]);
238 nvmap_ref_lock(client);
241 nvmap_ref_unlock(client);
248 ret = mutex_lock_interruptible(&client->share->pin_lock);
252 for (cnt = 0; cnt < nr && !ret; cnt++) {
253 ret = wait_pin_locked(client, h[cnt]);
255 mutex_unlock(&client->share->pin_lock);
260 for (i = 0; i < cnt; i++)
261 do_wake |= handle_unpin(client, h[i]);
264 wake_up(&client->share->pin_wait);
268 for (i = 0; i < nr; i++) {
269 if (h[i]->heap_pgalloc && h[i]->pgalloc.dirty)
270 map_iovmm_area(h[i]);
276 nvmap_ref_lock(client);
277 for (i = 0; i < nr; i++) {
278 ref = _nvmap_validate_id_locked(client, ids[i]);
280 nvmap_warn(client, "%s freed handle %p "
282 current->group_leader->comm,
286 atomic_dec(&ref->pin);
288 nvmap_ref_unlock(client);
290 for (i = cnt; i < nr; i++)
291 nvmap_handle_put(h[i]);
297 static phys_addr_t handle_phys(struct nvmap_handle *h)
301 if (h->heap_pgalloc && h->pgalloc.contig) {
302 addr = page_to_phys(h->pgalloc.pages[0]);
303 } else if (h->heap_pgalloc) {
304 BUG_ON(!h->pgalloc.area);
305 addr = h->pgalloc.area->iovm_start;
307 addr = h->carveout->base;
313 /* stores the physical address (+offset) of each handle relocation entry
314 * into its output location. see nvmap_pin_array for more details.
316 * each entry in arr (i.e., each relocation request) specifies two handles:
317 * the handle to pin (pin), and the handle where the address of pin should be
318 * written (patch). in pseudocode, this loop basically looks like:
320 * for (i = 0; i < nr; i++) {
321 * (pin, pin_offset, patch, patch_offset) = arr[i];
322 * patch[patch_offset] = address_of(pin) + pin_offset;
325 static int nvmap_reloc_pin_array(struct nvmap_client *client,
326 const struct nvmap_pinarray_elem *arr,
327 int nr, struct nvmap_handle *gather)
329 struct nvmap_handle *last_patch = NULL;
330 unsigned int last_pfn = 0;
335 pte = nvmap_alloc_pte(client->dev, &addr);
339 for (i = 0; i < nr; i++) {
340 struct nvmap_handle *patch;
341 struct nvmap_handle *pin;
342 phys_addr_t reloc_addr;
346 /* all of the handles are validated and get'ted prior to
347 * calling this function, so casting is safe here */
348 pin = (struct nvmap_handle *)arr[i].pin_mem;
350 if (arr[i].patch_mem == (unsigned long)last_patch) {
352 } else if (arr[i].patch_mem == (unsigned long)gather) {
356 nvmap_handle_put(last_patch);
358 patch = nvmap_get_handle_id(client, arr[i].patch_mem);
360 nvmap_free_pte(client->dev, pte);
366 if (patch->heap_pgalloc) {
367 unsigned int page = arr[i].patch_offset >> PAGE_SHIFT;
368 phys = page_to_phys(patch->pgalloc.pages[page]);
369 phys += (arr[i].patch_offset & ~PAGE_MASK);
371 phys = patch->carveout->base + arr[i].patch_offset;
374 pfn = __phys_to_pfn(phys);
375 if (pfn != last_pfn) {
376 pgprot_t prot = nvmap_pgprot(patch, pgprot_kernel);
377 phys_addr_t kaddr = (phys_addr_t)addr;
378 set_pte_at(&init_mm, kaddr, *pte, pfn_pte(pfn, prot));
379 flush_tlb_kernel_page(kaddr);
383 reloc_addr = handle_phys(pin) + arr[i].pin_offset;
384 __raw_writel(reloc_addr, addr + (phys & ~PAGE_MASK));
387 nvmap_free_pte(client->dev, pte);
390 nvmap_handle_put(last_patch);
397 static int nvmap_validate_get_pin_array(struct nvmap_client *client,
398 const struct nvmap_pinarray_elem *arr,
399 int nr, struct nvmap_handle **h)
405 nvmap_ref_lock(client);
407 for (i = 0; i < nr; i++) {
408 struct nvmap_handle_ref *ref;
410 if (need_resched()) {
411 nvmap_ref_unlock(client);
413 nvmap_ref_lock(client);
416 ref = _nvmap_validate_id_locked(client, arr[i].pin_mem);
419 nvmap_warn(client, "falied to validate id\n");
420 else if (!ref->handle)
421 nvmap_warn(client, "id had no associated handle\n");
422 else if (!ref->handle->alloc)
423 nvmap_warn(client, "handle had no allocation\n");
425 if (!ref || !ref->handle || !ref->handle->alloc) {
430 /* a handle may be referenced multiple times in arr, but
431 * it will only be pinned once; this ensures that the
432 * minimum number of sync-queue slots in the host driver
433 * are dedicated to storing unpin lists, which allows
434 * for greater parallelism between the CPU and graphics
436 if (ref->handle->flags & NVMAP_HANDLE_VISITED)
439 ref->handle->flags |= NVMAP_HANDLE_VISITED;
441 h[count] = nvmap_handle_get(ref->handle);
446 nvmap_ref_unlock(client);
449 for (i = 0; i < count; i++) {
450 h[i]->flags &= ~NVMAP_HANDLE_VISITED;
451 nvmap_handle_put(h[i]);
458 /* a typical mechanism host1x clients use for using the Tegra graphics
459 * processor is to build a command buffer which contains relocatable
460 * memory handle commands, and rely on the kernel to convert these in-place
461 * to addresses which are understood by the GPU hardware.
463 * this is implemented by having clients provide a sideband array
464 * of relocatable handles (+ offsets) and the location in the command
465 * buffer handle to patch with the GPU address when the client submits
466 * its command buffer to the host1x driver.
468 * the host driver also uses this relocation mechanism internally to
469 * relocate the client's (unpinned) command buffers into host-addressable
472 * @client: nvmap_client which should be used for validation; should be
473 * owned by the process which is submitting command buffers
474 * @gather: special handle for relocated command buffer outputs used
475 * internally by the host driver. if this handle is encountered
476 * as an output handle in the relocation array, it is assumed
477 * to be a known-good output and is not validated.
478 * @arr: array of ((relocatable handle, offset), (output handle, offset))
480 * @nr: number of entries in arr
481 * @unique_arr: list of nvmap_handle objects which were pinned by
482 * nvmap_pin_array. must be unpinned by the caller after the
483 * command buffers referenced in gather have completed.
485 int nvmap_pin_array(struct nvmap_client *client, struct nvmap_handle *gather,
486 const struct nvmap_pinarray_elem *arr, int nr,
487 struct nvmap_handle **unique_arr)
494 if (mutex_lock_interruptible(&client->share->pin_lock)) {
495 nvmap_warn(client, "%s interrupted when acquiring pin lock\n",
496 current->group_leader->comm);
500 count = nvmap_validate_get_pin_array(client, arr, nr, unique_arr);
502 mutex_unlock(&client->share->pin_lock);
503 nvmap_warn(client, "failed to validate pin array\n");
507 for (i = 0; i < count; i++)
508 unique_arr[i]->flags &= ~NVMAP_HANDLE_VISITED;
510 for (pinned = 0; pinned < count && !ret; pinned++)
511 ret = wait_pin_locked(client, unique_arr[pinned]);
513 mutex_unlock(&client->share->pin_lock);
516 ret = nvmap_reloc_pin_array(client, arr, nr, gather);
521 for (i = pinned; i < count; i++)
522 nvmap_handle_put(unique_arr[i]);
524 for (i = 0; i < pinned; i++)
525 do_wake |= handle_unpin(client, unique_arr[i]);
528 wake_up(&client->share->pin_wait);
532 for (i = 0; i < count; i++) {
533 if (unique_arr[i]->heap_pgalloc &&
534 unique_arr[i]->pgalloc.dirty)
535 map_iovmm_area(unique_arr[i]);
542 phys_addr_t nvmap_pin(struct nvmap_client *client,
543 struct nvmap_handle_ref *ref)
545 struct nvmap_handle *h;
549 h = nvmap_handle_get(ref->handle);
553 atomic_inc(&ref->pin);
555 if (WARN_ON(mutex_lock_interruptible(&client->share->pin_lock))) {
558 ret = wait_pin_locked(client, h);
559 mutex_unlock(&client->share->pin_lock);
563 atomic_dec(&ref->pin);
566 if (h->heap_pgalloc && h->pgalloc.dirty)
568 phys = handle_phys(h);
574 phys_addr_t nvmap_handle_address(struct nvmap_client *c, unsigned long id)
576 struct nvmap_handle *h;
579 h = nvmap_get_handle_id(c, id);
582 mutex_lock(&h->lock);
583 phys = handle_phys(h);
584 mutex_unlock(&h->lock);
590 void nvmap_unpin(struct nvmap_client *client, struct nvmap_handle_ref *ref)
592 atomic_dec(&ref->pin);
593 if (handle_unpin(client, ref->handle))
594 wake_up(&client->share->pin_wait);
597 void nvmap_unpin_handles(struct nvmap_client *client,
598 struct nvmap_handle **h, int nr)
603 for (i = 0; i < nr; i++) {
606 do_wake |= handle_unpin(client, h[i]);
610 wake_up(&client->share->pin_wait);
613 void *nvmap_mmap(struct nvmap_handle_ref *ref)
615 struct nvmap_handle *h;
617 unsigned long adj_size;
622 h = nvmap_handle_get(ref->handle);
626 prot = nvmap_pgprot(h, pgprot_kernel);
629 return vm_map_ram(h->pgalloc.pages, h->size >> PAGE_SHIFT,
632 /* carveout - explicitly map the pfns into a vmalloc area */
634 nvmap_usecount_inc(h);
636 adj_size = h->carveout->base & ~PAGE_MASK;
638 adj_size = PAGE_ALIGN(adj_size);
640 v = alloc_vm_area(adj_size, NULL);
642 nvmap_usecount_dec(h);
647 p = v->addr + (h->carveout->base & ~PAGE_MASK);
649 for (offs = 0; offs < adj_size; offs += PAGE_SIZE) {
650 unsigned long addr = (unsigned long) v->addr + offs;
657 pfn = __phys_to_pfn(h->carveout->base + offs);
658 pgd = pgd_offset_k(addr);
659 pud = pud_alloc(&init_mm, pgd, addr);
662 pmd = pmd_alloc(&init_mm, pud, addr);
665 pte = pte_alloc_kernel(pmd, addr);
668 set_pte_at(&init_mm, addr, pte, pfn_pte(pfn, prot));
669 flush_tlb_kernel_page(addr);
672 if (offs != adj_size) {
674 nvmap_usecount_dec(h);
679 /* leave the handle ref count incremented by 1, so that
680 * the handle will not be freed while the kernel mapping exists.
681 * nvmap_handle_put will be called by unmapping this address */
685 void nvmap_munmap(struct nvmap_handle_ref *ref, void *addr)
687 struct nvmap_handle *h;
694 if (h->heap_pgalloc) {
695 vm_unmap_ram(addr, h->size >> PAGE_SHIFT);
697 struct vm_struct *vm;
698 addr -= (h->carveout->base & ~PAGE_MASK);
699 vm = remove_vm_area(addr);
702 nvmap_usecount_dec(h);
707 struct nvmap_handle_ref *nvmap_alloc(struct nvmap_client *client, size_t size,
708 size_t align, unsigned int flags)
710 const unsigned int default_heap = (NVMAP_HEAP_SYSMEM |
711 NVMAP_HEAP_CARVEOUT_GENERIC);
712 struct nvmap_handle_ref *r = NULL;
715 r = nvmap_create_handle(client, size);
719 err = nvmap_alloc_handle_id(client, nvmap_ref_to_id(r),
720 default_heap, align, flags);
723 nvmap_free_handle_id(client, nvmap_ref_to_id(r));
730 /* allocates memory with specifed iovm_start address. */
731 struct nvmap_handle_ref *nvmap_alloc_iovm(struct nvmap_client *client,
732 size_t size, size_t align, unsigned int flags, unsigned int iovm_start)
735 struct nvmap_handle *h;
736 struct nvmap_handle_ref *r;
737 const unsigned int default_heap = NVMAP_HEAP_IOVMM;
739 /* size need to be more than one page.
740 * otherwise heap preference would change to system heap.
742 if (size <= PAGE_SIZE)
743 size = PAGE_SIZE << 1;
744 r = nvmap_create_handle(client, size);
745 if (IS_ERR_OR_NULL(r))
749 h->pgalloc.iovm_addr = iovm_start;
750 err = nvmap_alloc_handle_id(client, nvmap_ref_to_id(r),
751 default_heap, align, flags);
755 err = mutex_lock_interruptible(&client->share->pin_lock);
758 err = pin_locked(client, h);
759 mutex_unlock(&client->share->pin_lock);
765 nvmap_free_handle_id(client, nvmap_ref_to_id(r));
769 void nvmap_free_iovm(struct nvmap_client *client, struct nvmap_handle_ref *r)
771 unsigned long ref_id = nvmap_ref_to_id(r);
773 nvmap_unpin_ids(client, 1, &ref_id);
774 nvmap_free_handle_id(client, ref_id);
777 void nvmap_free(struct nvmap_client *client, struct nvmap_handle_ref *r)
779 nvmap_free_handle_id(client, nvmap_ref_to_id(r));
783 * create a mapping to the user's buffer and write it
784 * (uses similar logic from nvmap_reloc_pin_array to map the cmdbuf)
786 int nvmap_patch_word(struct nvmap_client *client,
787 struct nvmap_handle *patch,
788 u32 patch_offset, u32 patch_value)
797 if (patch_offset >= patch->size) {
798 nvmap_warn(client, "read/write outside of handle\n");
802 pte = nvmap_alloc_pte(client->dev, &addr);
806 /* derive physaddr of cmdbuf WAIT to patch */
807 if (patch->heap_pgalloc) {
808 unsigned int page = patch_offset >> PAGE_SHIFT;
809 phys = page_to_phys(patch->pgalloc.pages[page]);
810 phys += (patch_offset & ~PAGE_MASK);
812 phys = patch->carveout->base + patch_offset;
815 pfn = __phys_to_pfn(phys);
816 prot = nvmap_pgprot(patch, pgprot_kernel);
817 kaddr = (unsigned long)addr;
819 /* write PTE, so addr points to cmdbuf PFN */
820 set_pte_at(&init_mm, kaddr, *pte, pfn_pte(pfn, prot));
821 flush_tlb_kernel_page(kaddr);
823 /* write patch_value to addr + page offset */
824 __raw_writel(patch_value, addr + (phys & ~PAGE_MASK));
826 nvmap_free_pte(client->dev, pte);