ARM: tegra: Use proper type for physical addresses
[linux-2.6.git] / drivers / video / tegra / nvmap / nvmap_dev.c
1 /*
2  * drivers/video/tegra/nvmap/nvmap_dev.c
3  *
4  * User-space interface to nvmap
5  *
6  * Copyright (c) 2011, NVIDIA Corporation.
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License as published by
10  * the Free Software Foundation; either version 2 of the License, or
11  * (at your option) any later version.
12  *
13  * This program is distributed in the hope that it will be useful, but WITHOUT
14  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
16  * more details.
17  *
18  * You should have received a copy of the GNU General Public License along
19  * with this program; if not, write to the Free Software Foundation, Inc.,
20  * 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
21  */
22
23 #include <linux/backing-dev.h>
24 #include <linux/bitmap.h>
25 #include <linux/debugfs.h>
26 #include <linux/delay.h>
27 #include <linux/kernel.h>
28 #include <linux/miscdevice.h>
29 #include <linux/mm.h>
30 #include <linux/oom.h>
31 #include <linux/platform_device.h>
32 #include <linux/seq_file.h>
33 #include <linux/slab.h>
34 #include <linux/spinlock.h>
35 #include <linux/uaccess.h>
36 #include <linux/vmalloc.h>
37
38 #include <asm/cacheflush.h>
39 #include <asm/tlbflush.h>
40
41 #include <mach/iovmm.h>
42 #include <mach/nvmap.h>
43
44 #include "nvmap.h"
45 #include "nvmap_ioctl.h"
46 #include "nvmap_mru.h"
47 #include "nvmap_common.h"
48
49 #define NVMAP_NUM_PTES          64
50 #define NVMAP_CARVEOUT_KILLER_RETRY_TIME 100 /* msecs */
51
52 #ifdef CONFIG_NVMAP_CARVEOUT_KILLER
53 static bool carveout_killer = true;
54 #else
55 static bool carveout_killer;
56 #endif
57 module_param(carveout_killer, bool, 0640);
58
59 struct nvmap_carveout_node {
60         unsigned int            heap_bit;
61         struct nvmap_heap       *carveout;
62         int                     index;
63         struct list_head        clients;
64         spinlock_t              clients_lock;
65 };
66
67 struct nvmap_device {
68         struct vm_struct *vm_rgn;
69         pte_t           *ptes[NVMAP_NUM_PTES];
70         unsigned long   ptebits[NVMAP_NUM_PTES / BITS_PER_LONG];
71         unsigned int    lastpte;
72         spinlock_t      ptelock;
73
74         struct rb_root  handles;
75         spinlock_t      handle_lock;
76         wait_queue_head_t pte_wait;
77         struct miscdevice dev_super;
78         struct miscdevice dev_user;
79         struct nvmap_carveout_node *heaps;
80         int nr_carveouts;
81         struct nvmap_share iovmm_master;
82 };
83
84 struct nvmap_device *nvmap_dev;
85
86 static struct backing_dev_info nvmap_bdi = {
87         .ra_pages       = 0,
88         .capabilities   = (BDI_CAP_NO_ACCT_AND_WRITEBACK |
89                            BDI_CAP_READ_MAP | BDI_CAP_WRITE_MAP),
90 };
91
92 static int nvmap_open(struct inode *inode, struct file *filp);
93 static int nvmap_release(struct inode *inode, struct file *filp);
94 static long nvmap_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
95 static int nvmap_map(struct file *filp, struct vm_area_struct *vma);
96 static void nvmap_vma_open(struct vm_area_struct *vma);
97 static void nvmap_vma_close(struct vm_area_struct *vma);
98 static int nvmap_vma_fault(struct vm_area_struct *vma, struct vm_fault *vmf);
99
100 static const struct file_operations nvmap_user_fops = {
101         .owner          = THIS_MODULE,
102         .open           = nvmap_open,
103         .release        = nvmap_release,
104         .unlocked_ioctl = nvmap_ioctl,
105         .mmap           = nvmap_map,
106 };
107
108 static const struct file_operations nvmap_super_fops = {
109         .owner          = THIS_MODULE,
110         .open           = nvmap_open,
111         .release        = nvmap_release,
112         .unlocked_ioctl = nvmap_ioctl,
113         .mmap           = nvmap_map,
114 };
115
116 static struct vm_operations_struct nvmap_vma_ops = {
117         .open           = nvmap_vma_open,
118         .close          = nvmap_vma_close,
119         .fault          = nvmap_vma_fault,
120 };
121
122 int is_nvmap_vma(struct vm_area_struct *vma)
123 {
124         return vma->vm_ops == &nvmap_vma_ops;
125 }
126
127 struct device *nvmap_client_to_device(struct nvmap_client *client)
128 {
129         if (client->super)
130                 return client->dev->dev_super.this_device;
131         else
132                 return client->dev->dev_user.this_device;
133 }
134
135 struct nvmap_share *nvmap_get_share_from_dev(struct nvmap_device *dev)
136 {
137         return &dev->iovmm_master;
138 }
139
140 /* allocates a PTE for the caller's use; returns the PTE pointer or
141  * a negative errno. may be called from IRQs */
142 pte_t **nvmap_alloc_pte_irq(struct nvmap_device *dev, void **vaddr)
143 {
144         unsigned long flags;
145         unsigned long bit;
146
147         spin_lock_irqsave(&dev->ptelock, flags);
148         bit = find_next_zero_bit(dev->ptebits, NVMAP_NUM_PTES, dev->lastpte);
149         if (bit == NVMAP_NUM_PTES) {
150                 bit = find_first_zero_bit(dev->ptebits, dev->lastpte);
151                 if (bit == dev->lastpte)
152                         bit = NVMAP_NUM_PTES;
153         }
154
155         if (bit == NVMAP_NUM_PTES) {
156                 spin_unlock_irqrestore(&dev->ptelock, flags);
157                 return ERR_PTR(-ENOMEM);
158         }
159
160         dev->lastpte = bit;
161         set_bit(bit, dev->ptebits);
162         spin_unlock_irqrestore(&dev->ptelock, flags);
163
164         *vaddr = dev->vm_rgn->addr + bit * PAGE_SIZE;
165         return &(dev->ptes[bit]);
166 }
167
168 /* allocates a PTE for the caller's use; returns the PTE pointer or
169  * a negative errno. must be called from sleepable contexts */
170 pte_t **nvmap_alloc_pte(struct nvmap_device *dev, void **vaddr)
171 {
172         int ret;
173         pte_t **pte;
174         ret = wait_event_interruptible(dev->pte_wait,
175                         !IS_ERR(pte = nvmap_alloc_pte_irq(dev, vaddr)));
176
177         if (ret == -ERESTARTSYS)
178                 return ERR_PTR(-EINTR);
179
180         return pte;
181 }
182
183 /* frees a PTE */
184 void nvmap_free_pte(struct nvmap_device *dev, pte_t **pte)
185 {
186         unsigned long addr;
187         unsigned int bit = pte - dev->ptes;
188         unsigned long flags;
189
190         if (WARN_ON(bit >= NVMAP_NUM_PTES))
191                 return;
192
193         addr = (unsigned long)dev->vm_rgn->addr + bit * PAGE_SIZE;
194         set_pte_at(&init_mm, addr, *pte, 0);
195
196         spin_lock_irqsave(&dev->ptelock, flags);
197         clear_bit(bit, dev->ptebits);
198         spin_unlock_irqrestore(&dev->ptelock, flags);
199         wake_up(&dev->pte_wait);
200 }
201
202 /* verifies that the handle ref value "ref" is a valid handle ref for the
203  * file. caller must hold the file's ref_lock prior to calling this function */
204 struct nvmap_handle_ref *_nvmap_validate_id_locked(struct nvmap_client *c,
205                                                    unsigned long id)
206 {
207         struct rb_node *n = c->handle_refs.rb_node;
208
209         while (n) {
210                 struct nvmap_handle_ref *ref;
211                 ref = rb_entry(n, struct nvmap_handle_ref, node);
212                 if ((unsigned long)ref->handle == id)
213                         return ref;
214                 else if (id > (unsigned long)ref->handle)
215                         n = n->rb_right;
216                 else
217                         n = n->rb_left;
218         }
219
220         return NULL;
221 }
222
223 struct nvmap_handle *nvmap_get_handle_id(struct nvmap_client *client,
224                                          unsigned long id)
225 {
226         struct nvmap_handle_ref *ref;
227         struct nvmap_handle *h = NULL;
228
229         nvmap_ref_lock(client);
230         ref = _nvmap_validate_id_locked(client, id);
231         if (ref)
232                 h = ref->handle;
233         if (h)
234                 h = nvmap_handle_get(h);
235         nvmap_ref_unlock(client);
236         return h;
237 }
238
239 unsigned long nvmap_carveout_usage(struct nvmap_client *c,
240                                    struct nvmap_heap_block *b)
241 {
242         struct nvmap_heap *h = nvmap_block_to_heap(b);
243         struct nvmap_carveout_node *n;
244         int i;
245
246         for (i = 0; i < c->dev->nr_carveouts; i++) {
247                 n = &c->dev->heaps[i];
248                 if (n->carveout == h)
249                         return n->heap_bit;
250         }
251         return 0;
252 }
253
254 /*
255  * This routine is used to flush the carveout memory from cache.
256  * Why cache flush is needed for carveout? Consider the case, where a piece of
257  * carveout is allocated as cached and released. After this, if the same memory is
258  * allocated for uncached request and the memory is not flushed out from cache.
259  * In this case, the client might pass this to H/W engine and it could start modify
260  * the memory. As this was cached earlier, it might have some portion of it in cache.
261  * During cpu request to read/write other memory, the cached portion of this memory
262  * might get flushed back to main memory and would cause corruptions, if it happens
263  * after H/W writes data to memory.
264  *
265  * But flushing out the memory blindly on each carveout allocation is redundant.
266  *
267  * In order to optimize the carveout buffer cache flushes, the following
268  * strategy is used.
269  *
270  * The whole Carveout is flushed out from cache during its initialization.
271  * During allocation, carveout buffers are not flused from cache.
272  * During deallocation, carveout buffers are flushed, if they were allocated as cached.
273  * if they were allocated as uncached/writecombined, no cache flush is needed.
274  * Just draining store buffers is enough.
275  */
276 int nvmap_flush_heap_block(struct nvmap_client *client,
277         struct nvmap_heap_block *block, size_t len, unsigned int prot)
278 {
279         pte_t **pte;
280         void *addr;
281         phys_addr_t kaddr;
282         phys_addr_t phys = block->base;
283         phys_addr_t end = block->base + len;
284
285         if (prot == NVMAP_HANDLE_UNCACHEABLE || prot == NVMAP_HANDLE_WRITE_COMBINE)
286                 goto out;
287
288         if ( len >= FLUSH_CLEAN_BY_SET_WAY_THRESHOLD ) {
289                 inner_flush_cache_all();
290                 if (prot != NVMAP_HANDLE_INNER_CACHEABLE)
291                         outer_flush_range(block->base, block->base + len);
292                 goto out;
293         }
294
295         pte = nvmap_alloc_pte((client ? client->dev : nvmap_dev), &addr);
296         if (IS_ERR(pte))
297                 return PTR_ERR(pte);
298
299         kaddr = (phys_addr_t)addr;
300
301         while (phys < end) {
302                 phys_addr_t next = (phys + PAGE_SIZE) & PAGE_MASK;
303                 unsigned long pfn = __phys_to_pfn(phys);
304                 void *base = (void *)kaddr + (phys & ~PAGE_MASK);
305
306                 next = min(next, end);
307                 set_pte_at(&init_mm, kaddr, *pte, pfn_pte(pfn, pgprot_kernel));
308                 flush_tlb_kernel_page(kaddr);
309                 __cpuc_flush_dcache_area(base, next - phys);
310                 phys = next;
311         }
312
313         if (prot != NVMAP_HANDLE_INNER_CACHEABLE)
314                 outer_flush_range(block->base, block->base + len);
315
316         nvmap_free_pte((client ? client->dev: nvmap_dev), pte);
317 out:
318         wmb();
319         return 0;
320 }
321
322 void nvmap_carveout_commit_add(struct nvmap_client *client,
323                                struct nvmap_carveout_node *node,
324                                size_t len)
325 {
326         unsigned long flags;
327
328         nvmap_ref_lock(client);
329         spin_lock_irqsave(&node->clients_lock, flags);
330         BUG_ON(list_empty(&client->carveout_commit[node->index].list) &&
331                client->carveout_commit[node->index].commit != 0);
332
333         client->carveout_commit[node->index].commit += len;
334         /* if this client isn't already on the list of nodes for this heap,
335            add it */
336         if (list_empty(&client->carveout_commit[node->index].list)) {
337                 list_add(&client->carveout_commit[node->index].list,
338                          &node->clients);
339         }
340         spin_unlock_irqrestore(&node->clients_lock, flags);
341         nvmap_ref_unlock(client);
342 }
343
344 void nvmap_carveout_commit_subtract(struct nvmap_client *client,
345                                     struct nvmap_carveout_node *node,
346                                     size_t len)
347 {
348         unsigned long flags;
349
350         if (!client)
351                 return;
352
353         spin_lock_irqsave(&node->clients_lock, flags);
354         client->carveout_commit[node->index].commit -= len;
355         BUG_ON(client->carveout_commit[node->index].commit < 0);
356         /* if no more allocation in this carveout for this node, delete it */
357         if (!client->carveout_commit[node->index].commit)
358                 list_del_init(&client->carveout_commit[node->index].list);
359         spin_unlock_irqrestore(&node->clients_lock, flags);
360 }
361
362 static struct nvmap_client* get_client_from_carveout_commit(
363         struct nvmap_carveout_node *node, struct nvmap_carveout_commit *commit)
364 {
365         struct nvmap_carveout_commit *first_commit = commit - node->index;
366         return (void *)first_commit - offsetof(struct nvmap_client,
367                                                carveout_commit);
368 }
369
370 static DECLARE_WAIT_QUEUE_HEAD(wait_reclaim);
371 static int wait_count;
372 bool nvmap_shrink_carveout(struct nvmap_carveout_node *node)
373 {
374         struct nvmap_carveout_commit *commit;
375         size_t selected_size = 0;
376         int selected_oom_adj = OOM_ADJUST_MIN;
377         struct task_struct *selected_task = NULL;
378         unsigned long flags;
379         bool wait = false;
380         int current_oom_adj = OOM_ADJUST_MIN;
381
382         task_lock(current);
383         if (current->signal)
384                 current_oom_adj = current->signal->oom_adj;
385         task_unlock(current);
386
387         spin_lock_irqsave(&node->clients_lock, flags);
388         /* find the task with the smallest oom_adj (lowest priority)
389          * and largest carveout allocation -- ignore kernel allocations,
390          * there's no way to handle them */
391         list_for_each_entry(commit, &node->clients, list) {
392                 struct nvmap_client *client =
393                         get_client_from_carveout_commit(node, commit);
394                 size_t size = commit->commit;
395                 struct task_struct *task = client->task;
396                 struct signal_struct *sig;
397
398                 if (!task)
399                         continue;
400
401                 task_lock(task);
402                 sig = task->signal;
403                 if (!task->mm || !sig)
404                         goto end;
405                 /* don't try to kill current */
406                 if (task == current->group_leader)
407                         goto end;
408                 /* don't try to kill higher priority tasks */
409                 if (sig->oom_adj < current_oom_adj)
410                         goto end;
411                 if (sig->oom_adj < selected_oom_adj)
412                         goto end;
413                 if (sig->oom_adj == selected_oom_adj &&
414                     size <= selected_size)
415                         goto end;
416                 selected_oom_adj = sig->oom_adj;
417                 selected_size = size;
418                 selected_task = task;
419 end:
420                 task_unlock(task);
421         }
422         if (selected_task) {
423                 wait = true;
424                 if (fatal_signal_pending(selected_task)) {
425                         pr_warning("carveout_killer: process %d dying "
426                                    "slowly\n", selected_task->pid);
427                         goto out;
428                 }
429                 pr_info("carveout_killer: killing process %d with oom_adj %d "
430                         "to reclaim %d (for process with oom_adj %d)\n",
431                         selected_task->pid, selected_oom_adj,
432                         selected_size, current_oom_adj);
433                 force_sig(SIGKILL, selected_task);
434         }
435 out:
436         spin_unlock_irqrestore(&node->clients_lock, flags);
437         return wait;
438 }
439
440 struct nvmap_heap_block *do_nvmap_carveout_alloc(struct nvmap_client *client,
441                                               size_t len, size_t align,
442                                               unsigned long usage,
443                                               unsigned int prot,
444                                               struct nvmap_handle *handle)
445 {
446         struct nvmap_carveout_node *co_heap;
447         struct nvmap_device *dev = client->dev;
448         int i;
449
450         for (i = 0; i < dev->nr_carveouts; i++) {
451                 struct nvmap_heap_block *block;
452                 co_heap = &dev->heaps[i];
453
454                 if (!(co_heap->heap_bit & usage))
455                         continue;
456
457                 block = nvmap_heap_alloc(co_heap->carveout, len,
458                                         align, prot, handle);
459                 if (block) {
460                         return block;
461                 }
462         }
463         return NULL;
464 }
465
466 static bool nvmap_carveout_freed(int count)
467 {
468         smp_rmb();
469         return count != wait_count;
470 }
471
472 struct nvmap_heap_block *nvmap_carveout_alloc(struct nvmap_client *client,
473                                               size_t len, size_t align,
474                                               unsigned long usage,
475                                               unsigned int prot,
476                                               struct nvmap_handle *handle)
477 {
478         struct nvmap_heap_block *block;
479         struct nvmap_carveout_node *co_heap;
480         struct nvmap_device *dev = client->dev;
481         int i;
482         unsigned long end = jiffies +
483                 msecs_to_jiffies(NVMAP_CARVEOUT_KILLER_RETRY_TIME);
484         int count = 0;
485
486         do {
487                 block = do_nvmap_carveout_alloc(client, len, align, usage,
488                                                 prot, handle);
489                 if (!carveout_killer)
490                         return block;
491
492                 if (block)
493                         return block;
494
495                 if (!count++) {
496                         char task_comm[TASK_COMM_LEN];
497                         if (client->task)
498                                 get_task_comm(task_comm, client->task);
499                         else
500                                 task_comm[0] = 0;
501                         pr_info("%s: failed to allocate %u bytes for "
502                                 "process %s, firing carveout "
503                                 "killer!\n", __func__, len, task_comm);
504
505                 } else {
506                         pr_info("%s: still can't allocate %u bytes, "
507                                 "attempt %d!\n", __func__, len, count);
508                 }
509
510                 /* shrink carveouts that matter and try again */
511                 for (i = 0; i < dev->nr_carveouts; i++) {
512                         int count;
513                         co_heap = &dev->heaps[i];
514
515                         if (!(co_heap->heap_bit & usage))
516                                 continue;
517
518                         count = wait_count;
519                         /* indicates we didn't find anything to kill,
520                            might as well stop trying */
521                         if (!nvmap_shrink_carveout(co_heap))
522                                 return NULL;
523
524                         if (time_is_after_jiffies(end))
525                                 wait_event_interruptible_timeout(wait_reclaim,
526                                          nvmap_carveout_freed(count),
527                                          end - jiffies);
528                 }
529         } while (time_is_after_jiffies(end));
530
531         if (time_is_before_jiffies(end))
532                 pr_info("carveout_killer: timeout expired without "
533                         "allocation succeeding.\n");
534
535         return NULL;
536 }
537
538 /* remove a handle from the device's tree of all handles; called
539  * when freeing handles. */
540 int nvmap_handle_remove(struct nvmap_device *dev, struct nvmap_handle *h)
541 {
542         spin_lock(&dev->handle_lock);
543
544         /* re-test inside the spinlock if the handle really has no clients;
545          * only remove the handle if it is unreferenced */
546         if (atomic_add_return(0, &h->ref) > 0) {
547                 spin_unlock(&dev->handle_lock);
548                 return -EBUSY;
549         }
550         smp_rmb();
551         BUG_ON(atomic_read(&h->ref) < 0);
552         BUG_ON(atomic_read(&h->pin) != 0);
553
554         rb_erase(&h->node, &dev->handles);
555
556         spin_unlock(&dev->handle_lock);
557         return 0;
558 }
559
560 /* adds a newly-created handle to the device master tree */
561 void nvmap_handle_add(struct nvmap_device *dev, struct nvmap_handle *h)
562 {
563         struct rb_node **p;
564         struct rb_node *parent = NULL;
565
566         spin_lock(&dev->handle_lock);
567         p = &dev->handles.rb_node;
568         while (*p) {
569                 struct nvmap_handle *b;
570
571                 parent = *p;
572                 b = rb_entry(parent, struct nvmap_handle, node);
573                 if (h > b)
574                         p = &parent->rb_right;
575                 else
576                         p = &parent->rb_left;
577         }
578         rb_link_node(&h->node, parent, p);
579         rb_insert_color(&h->node, &dev->handles);
580         spin_unlock(&dev->handle_lock);
581 }
582
583 /* validates that a handle is in the device master tree, and that the
584  * client has permission to access it */
585 struct nvmap_handle *nvmap_validate_get(struct nvmap_client *client,
586                                         unsigned long id)
587 {
588         struct nvmap_handle *h = NULL;
589         struct rb_node *n;
590
591         spin_lock(&client->dev->handle_lock);
592
593         n = client->dev->handles.rb_node;
594
595         while (n) {
596                 h = rb_entry(n, struct nvmap_handle, node);
597                 if ((unsigned long)h == id) {
598                         if (client->super || h->global || (h->owner == client))
599                                 h = nvmap_handle_get(h);
600                         else
601                                 h = NULL;
602                         spin_unlock(&client->dev->handle_lock);
603                         return h;
604                 }
605                 if (id > (unsigned long)h)
606                         n = n->rb_right;
607                 else
608                         n = n->rb_left;
609         }
610         spin_unlock(&client->dev->handle_lock);
611         return NULL;
612 }
613
614 struct nvmap_client *nvmap_create_client(struct nvmap_device *dev,
615                                          const char *name)
616 {
617         struct nvmap_client *client;
618         struct task_struct *task;
619         int i;
620
621         if (WARN_ON(!dev))
622                 return NULL;
623
624         client = kzalloc(sizeof(*client) + (sizeof(struct nvmap_carveout_commit)
625                          * dev->nr_carveouts), GFP_KERNEL);
626         if (!client)
627                 return NULL;
628
629         client->name = name;
630         client->super = true;
631         client->dev = dev;
632         /* TODO: allocate unique IOVMM client for each nvmap client */
633         client->share = &dev->iovmm_master;
634         client->handle_refs = RB_ROOT;
635
636         atomic_set(&client->iovm_commit, 0);
637
638         client->iovm_limit = nvmap_mru_vm_size(client->share->iovmm);
639
640         for (i = 0; i < dev->nr_carveouts; i++) {
641                 INIT_LIST_HEAD(&client->carveout_commit[i].list);
642                 client->carveout_commit[i].commit = 0;
643         }
644
645         get_task_struct(current->group_leader);
646         task_lock(current->group_leader);
647         /* don't bother to store task struct for kernel threads,
648            they can't be killed anyway */
649         if (current->flags & PF_KTHREAD) {
650                 put_task_struct(current->group_leader);
651                 task = NULL;
652         } else {
653                 task = current->group_leader;
654         }
655         task_unlock(current->group_leader);
656         client->task = task;
657
658         mutex_init(&client->ref_lock);
659         atomic_set(&client->count, 1);
660
661         return client;
662 }
663
664 static void destroy_client(struct nvmap_client *client)
665 {
666         struct rb_node *n;
667         int i;
668
669         if (!client)
670                 return;
671
672
673         while ((n = rb_first(&client->handle_refs))) {
674                 struct nvmap_handle_ref *ref;
675                 int pins, dupes;
676
677                 ref = rb_entry(n, struct nvmap_handle_ref, node);
678                 rb_erase(&ref->node, &client->handle_refs);
679
680                 smp_rmb();
681                 pins = atomic_read(&ref->pin);
682
683                 if (ref->handle->owner == client)
684                     ref->handle->owner = NULL;
685
686                 while (pins--)
687                         nvmap_unpin_handles(client, &ref->handle, 1);
688
689                 dupes = atomic_read(&ref->dupes);
690                 while (dupes--)
691                         nvmap_handle_put(ref->handle);
692
693                 kfree(ref);
694         }
695
696         if (carveout_killer) {
697                 wait_count++;
698                 smp_wmb();
699                 wake_up_all(&wait_reclaim);
700         }
701
702         for (i = 0; i < client->dev->nr_carveouts; i++)
703                 list_del(&client->carveout_commit[i].list);
704
705         if (client->task)
706                 put_task_struct(client->task);
707
708         kfree(client);
709 }
710
711 struct nvmap_client *nvmap_client_get(struct nvmap_client *client)
712 {
713         if (WARN_ON(!client))
714                 return NULL;
715
716         if (WARN_ON(!atomic_add_unless(&client->count, 1, 0)))
717                 return NULL;
718
719         return client;
720 }
721
722 struct nvmap_client *nvmap_client_get_file(int fd)
723 {
724         struct nvmap_client *client = ERR_PTR(-EFAULT);
725         struct file *f = fget(fd);
726         if (!f)
727                 return ERR_PTR(-EINVAL);
728
729         if ((f->f_op == &nvmap_user_fops) || (f->f_op == &nvmap_super_fops)) {
730                 client = f->private_data;
731                 atomic_inc(&client->count);
732         }
733
734         fput(f);
735         return client;
736 }
737
738 void nvmap_client_put(struct nvmap_client *client)
739 {
740         if (!client)
741                 return;
742
743         if (!atomic_dec_return(&client->count))
744                 destroy_client(client);
745 }
746
747 static int nvmap_open(struct inode *inode, struct file *filp)
748 {
749         struct miscdevice *miscdev = filp->private_data;
750         struct nvmap_device *dev = dev_get_drvdata(miscdev->parent);
751         struct nvmap_client *priv;
752         int ret;
753
754         ret = nonseekable_open(inode, filp);
755         if (unlikely(ret))
756                 return ret;
757
758         BUG_ON(dev != nvmap_dev);
759         priv = nvmap_create_client(dev, "user");
760         if (!priv)
761                 return -ENOMEM;
762
763         priv->super = (filp->f_op == &nvmap_super_fops);
764
765         filp->f_mapping->backing_dev_info = &nvmap_bdi;
766
767         filp->private_data = priv;
768         return 0;
769 }
770
771 static int nvmap_release(struct inode *inode, struct file *filp)
772 {
773         nvmap_client_put(filp->private_data);
774         return 0;
775 }
776
777 static int nvmap_map(struct file *filp, struct vm_area_struct *vma)
778 {
779         struct nvmap_vma_priv *priv;
780
781         /* after NVMAP_IOC_MMAP, the handle that is mapped by this VMA
782          * will be stored in vm_private_data and faulted in. until the
783          * ioctl is made, the VMA is mapped no-access */
784         vma->vm_private_data = NULL;
785
786         priv = kzalloc(sizeof(*priv), GFP_KERNEL);
787         if (!priv)
788                 return -ENOMEM;
789
790         priv->offs = 0;
791         priv->handle = NULL;
792         atomic_set(&priv->count, 1);
793
794         vma->vm_flags |= VM_SHARED;
795         vma->vm_flags |= (VM_IO | VM_DONTEXPAND | VM_MIXEDMAP | VM_RESERVED);
796         vma->vm_ops = &nvmap_vma_ops;
797         vma->vm_private_data = priv;
798
799         return 0;
800 }
801
802 static long nvmap_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
803 {
804         int err = 0;
805         void __user *uarg = (void __user *)arg;
806
807         if (_IOC_TYPE(cmd) != NVMAP_IOC_MAGIC)
808                 return -ENOTTY;
809
810         if (_IOC_NR(cmd) > NVMAP_IOC_MAXNR)
811                 return -ENOTTY;
812
813         if (_IOC_DIR(cmd) & _IOC_READ)
814                 err = !access_ok(VERIFY_WRITE, uarg, _IOC_SIZE(cmd));
815         if (_IOC_DIR(cmd) & _IOC_WRITE)
816                 err = !access_ok(VERIFY_READ, uarg, _IOC_SIZE(cmd));
817
818         if (err)
819                 return -EFAULT;
820
821         switch (cmd) {
822         case NVMAP_IOC_CLAIM:
823                 nvmap_warn(filp->private_data, "preserved handles not"
824                            "supported\n");
825                 err = -ENODEV;
826                 break;
827         case NVMAP_IOC_CREATE:
828         case NVMAP_IOC_FROM_ID:
829                 err = nvmap_ioctl_create(filp, cmd, uarg);
830                 break;
831
832         case NVMAP_IOC_GET_ID:
833                 err = nvmap_ioctl_getid(filp, uarg);
834                 break;
835
836         case NVMAP_IOC_PARAM:
837                 err = nvmap_ioctl_get_param(filp, uarg);
838                 break;
839
840         case NVMAP_IOC_UNPIN_MULT:
841         case NVMAP_IOC_PIN_MULT:
842                 err = nvmap_ioctl_pinop(filp, cmd == NVMAP_IOC_PIN_MULT, uarg);
843                 break;
844
845         case NVMAP_IOC_ALLOC:
846                 err = nvmap_ioctl_alloc(filp, uarg);
847                 break;
848
849         case NVMAP_IOC_FREE:
850                 err = nvmap_ioctl_free(filp, arg);
851                 break;
852
853         case NVMAP_IOC_MMAP:
854                 err = nvmap_map_into_caller_ptr(filp, uarg);
855                 break;
856
857         case NVMAP_IOC_WRITE:
858         case NVMAP_IOC_READ:
859                 err = nvmap_ioctl_rw_handle(filp, cmd == NVMAP_IOC_READ, uarg);
860                 break;
861
862         case NVMAP_IOC_CACHE:
863                 err = nvmap_ioctl_cache_maint(filp, uarg);
864                 break;
865
866         default:
867                 return -ENOTTY;
868         }
869         return err;
870 }
871
872 /* to ensure that the backing store for the VMA isn't freed while a fork'd
873  * reference still exists, nvmap_vma_open increments the reference count on
874  * the handle, and nvmap_vma_close decrements it. alternatively, we could
875  * disallow copying of the vma, or behave like pmem and zap the pages. FIXME.
876 */
877 static void nvmap_vma_open(struct vm_area_struct *vma)
878 {
879         struct nvmap_vma_priv *priv;
880
881         priv = vma->vm_private_data;
882
883         BUG_ON(!priv);
884
885         atomic_inc(&priv->count);
886 }
887
888 static void nvmap_vma_close(struct vm_area_struct *vma)
889 {
890         struct nvmap_vma_priv *priv = vma->vm_private_data;
891
892         if (priv) {
893                 if (priv->handle) {
894                         nvmap_usecount_dec(priv->handle);
895                         BUG_ON(priv->handle->usecount < 0);
896                 }
897                 if (!atomic_dec_return(&priv->count)) {
898                         if (priv->handle)
899                                 nvmap_handle_put(priv->handle);
900                         kfree(priv);
901                 }
902         }
903         vma->vm_private_data = NULL;
904 }
905
906 static int nvmap_vma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
907 {
908         struct nvmap_vma_priv *priv;
909         unsigned long offs;
910
911         offs = (unsigned long)(vmf->virtual_address - vma->vm_start);
912         priv = vma->vm_private_data;
913         if (!priv || !priv->handle || !priv->handle->alloc)
914                 return VM_FAULT_SIGBUS;
915
916         offs += priv->offs;
917         /* if the VMA was split for some reason, vm_pgoff will be the VMA's
918          * offset from the original VMA */
919         offs += (vma->vm_pgoff << PAGE_SHIFT);
920
921         if (offs >= priv->handle->size)
922                 return VM_FAULT_SIGBUS;
923
924         if (!priv->handle->heap_pgalloc) {
925                 unsigned long pfn;
926                 BUG_ON(priv->handle->carveout->base & ~PAGE_MASK);
927                 pfn = ((priv->handle->carveout->base + offs) >> PAGE_SHIFT);
928                 vm_insert_pfn(vma, (unsigned long)vmf->virtual_address, pfn);
929                 return VM_FAULT_NOPAGE;
930         } else {
931                 struct page *page;
932                 offs >>= PAGE_SHIFT;
933                 page = priv->handle->pgalloc.pages[offs];
934                 if (page)
935                         get_page(page);
936                 vmf->page = page;
937                 return (page) ? 0 : VM_FAULT_SIGBUS;
938         }
939 }
940
941 static ssize_t attr_show_usage(struct device *dev,
942                                struct device_attribute *attr, char *buf)
943 {
944         struct nvmap_carveout_node *node = nvmap_heap_device_to_arg(dev);
945
946         return sprintf(buf, "%08x\n", node->heap_bit);
947 }
948
949 static struct device_attribute heap_attr_show_usage =
950         __ATTR(usage, S_IRUGO, attr_show_usage, NULL);
951
952 static struct attribute *heap_extra_attrs[] = {
953         &heap_attr_show_usage.attr,
954         NULL,
955 };
956
957 static struct attribute_group heap_extra_attr_group = {
958         .attrs = heap_extra_attrs,
959 };
960
961 static void client_stringify(struct nvmap_client *client, struct seq_file *s)
962 {
963         char task_comm[TASK_COMM_LEN];
964         if (!client->task) {
965                 seq_printf(s, "%-16s %16s %8u", client->name, "kernel", 0);
966                 return;
967         }
968         get_task_comm(task_comm, client->task);
969         seq_printf(s, "%-16s %16s %8u", client->name, task_comm,
970                    client->task->pid);
971 }
972
973 static void allocations_stringify(struct nvmap_client *client,
974                                   struct seq_file *s)
975 {
976         struct rb_node *n = rb_first(&client->handle_refs);
977
978         for (; n != NULL; n = rb_next(n)) {
979                 struct nvmap_handle_ref *ref =
980                         rb_entry(n, struct nvmap_handle_ref, node);
981                 struct nvmap_handle *handle = ref->handle;
982                 if (handle->alloc && !handle->heap_pgalloc) {
983                         seq_printf(s, "%-16s %-16s %8lx %10u\n", "", "",
984                                         (unsigned long)(handle->carveout->base),
985                                         handle->size);
986                 }
987         }
988 }
989
990 static int nvmap_debug_allocations_show(struct seq_file *s, void *unused)
991 {
992         struct nvmap_carveout_node *node = s->private;
993         struct nvmap_carveout_commit *commit;
994         unsigned long flags;
995         unsigned int total = 0;
996
997         spin_lock_irqsave(&node->clients_lock, flags);
998         list_for_each_entry(commit, &node->clients, list) {
999                 struct nvmap_client *client =
1000                         get_client_from_carveout_commit(node, commit);
1001                 client_stringify(client, s);
1002                 seq_printf(s, " %10u\n", commit->commit);
1003                 allocations_stringify(client, s);
1004                 seq_printf(s, "\n");
1005                 total += commit->commit;
1006         }
1007         seq_printf(s, "%-16s %-16s %8u %10u\n", "total", "", 0, total);
1008         spin_unlock_irqrestore(&node->clients_lock, flags);
1009
1010         return 0;
1011 }
1012
1013 static int nvmap_debug_allocations_open(struct inode *inode, struct file *file)
1014 {
1015         return single_open(file, nvmap_debug_allocations_show,
1016                            inode->i_private);
1017 }
1018
1019 static struct file_operations debug_allocations_fops = {
1020         .open = nvmap_debug_allocations_open,
1021         .read = seq_read,
1022         .llseek = seq_lseek,
1023         .release = single_release,
1024 };
1025
1026 static int nvmap_debug_clients_show(struct seq_file *s, void *unused)
1027 {
1028         struct nvmap_carveout_node *node = s->private;
1029         struct nvmap_carveout_commit *commit;
1030         unsigned long flags;
1031         unsigned int total = 0;
1032
1033         spin_lock_irqsave(&node->clients_lock, flags);
1034         list_for_each_entry(commit, &node->clients, list) {
1035                 struct nvmap_client *client =
1036                         get_client_from_carveout_commit(node, commit);
1037                 client_stringify(client, s);
1038                 seq_printf(s, " %10u\n", commit->commit);
1039                 total += commit->commit;
1040         }
1041         seq_printf(s, "%-16s %-16s %8u %10u\n", "total", "", 0, total);
1042         spin_unlock_irqrestore(&node->clients_lock, flags);
1043
1044         return 0;
1045 }
1046
1047 static int nvmap_debug_clients_open(struct inode *inode, struct file *file)
1048 {
1049         return single_open(file, nvmap_debug_clients_show, inode->i_private);
1050 }
1051
1052 static struct file_operations debug_clients_fops = {
1053         .open = nvmap_debug_clients_open,
1054         .read = seq_read,
1055         .llseek = seq_lseek,
1056         .release = single_release,
1057 };
1058
1059 static int nvmap_probe(struct platform_device *pdev)
1060 {
1061         struct nvmap_platform_data *plat = pdev->dev.platform_data;
1062         struct nvmap_device *dev;
1063         struct dentry *nvmap_debug_root;
1064         unsigned int i;
1065         int e;
1066
1067         if (!plat) {
1068                 dev_err(&pdev->dev, "no platform data?\n");
1069                 return -ENODEV;
1070         }
1071
1072         if (WARN_ON(nvmap_dev != NULL)) {
1073                 dev_err(&pdev->dev, "only one nvmap device may be present\n");
1074                 return -ENODEV;
1075         }
1076
1077         dev = kzalloc(sizeof(*dev), GFP_KERNEL);
1078         if (!dev) {
1079                 dev_err(&pdev->dev, "out of memory for device\n");
1080                 return -ENOMEM;
1081         }
1082
1083         dev->dev_user.minor = MISC_DYNAMIC_MINOR;
1084         dev->dev_user.name = "nvmap";
1085         dev->dev_user.fops = &nvmap_user_fops;
1086         dev->dev_user.parent = &pdev->dev;
1087
1088         dev->dev_super.minor = MISC_DYNAMIC_MINOR;
1089         dev->dev_super.name = "knvmap";
1090         dev->dev_super.fops = &nvmap_super_fops;
1091         dev->dev_super.parent = &pdev->dev;
1092
1093         dev->handles = RB_ROOT;
1094
1095         init_waitqueue_head(&dev->pte_wait);
1096
1097         init_waitqueue_head(&dev->iovmm_master.pin_wait);
1098         mutex_init(&dev->iovmm_master.pin_lock);
1099         dev->iovmm_master.iovmm =
1100                 tegra_iovmm_alloc_client(dev_name(&pdev->dev), NULL,
1101                         &(dev->dev_user));
1102         if (IS_ERR(dev->iovmm_master.iovmm)) {
1103                 e = PTR_ERR(dev->iovmm_master.iovmm);
1104                 dev_err(&pdev->dev, "couldn't create iovmm client\n");
1105                 goto fail;
1106         }
1107         dev->vm_rgn = alloc_vm_area(NVMAP_NUM_PTES * PAGE_SIZE);
1108         if (!dev->vm_rgn) {
1109                 e = -ENOMEM;
1110                 dev_err(&pdev->dev, "couldn't allocate remapping region\n");
1111                 goto fail;
1112         }
1113         e = nvmap_mru_init(&dev->iovmm_master);
1114         if (e) {
1115                 dev_err(&pdev->dev, "couldn't initialize MRU lists\n");
1116                 goto fail;
1117         }
1118
1119         spin_lock_init(&dev->ptelock);
1120         spin_lock_init(&dev->handle_lock);
1121
1122         for (i = 0; i < NVMAP_NUM_PTES; i++) {
1123                 unsigned long addr;
1124                 pgd_t *pgd;
1125                 pud_t *pud;
1126                 pmd_t *pmd;
1127
1128                 addr = (unsigned long)dev->vm_rgn->addr + (i * PAGE_SIZE);
1129                 pgd = pgd_offset_k(addr);
1130                 pud = pud_alloc(&init_mm, pgd, addr);
1131                 if (!pud) {
1132                         e = -ENOMEM;
1133                         dev_err(&pdev->dev, "couldn't allocate page tables\n");
1134                         goto fail;
1135                 }
1136                 pmd = pmd_alloc(&init_mm, pud, addr);
1137                 if (!pmd) {
1138                         e = -ENOMEM;
1139                         dev_err(&pdev->dev, "couldn't allocate page tables\n");
1140                         goto fail;
1141                 }
1142                 dev->ptes[i] = pte_alloc_kernel(pmd, addr);
1143                 if (!dev->ptes[i]) {
1144                         e = -ENOMEM;
1145                         dev_err(&pdev->dev, "couldn't allocate page tables\n");
1146                         goto fail;
1147                 }
1148         }
1149
1150         e = misc_register(&dev->dev_user);
1151         if (e) {
1152                 dev_err(&pdev->dev, "unable to register miscdevice %s\n",
1153                         dev->dev_user.name);
1154                 goto fail;
1155         }
1156
1157         e = misc_register(&dev->dev_super);
1158         if (e) {
1159                 dev_err(&pdev->dev, "unable to register miscdevice %s\n",
1160                         dev->dev_super.name);
1161                 goto fail;
1162         }
1163
1164         dev->nr_carveouts = 0;
1165         dev->heaps = kzalloc(sizeof(struct nvmap_carveout_node) *
1166                              plat->nr_carveouts, GFP_KERNEL);
1167         if (!dev->heaps) {
1168                 e = -ENOMEM;
1169                 dev_err(&pdev->dev, "couldn't allocate carveout memory\n");
1170                 goto fail;
1171         }
1172
1173         nvmap_debug_root = debugfs_create_dir("nvmap", NULL);
1174         if (IS_ERR_OR_NULL(nvmap_debug_root))
1175                 dev_err(&pdev->dev, "couldn't create debug files\n");
1176
1177         for (i = 0; i < plat->nr_carveouts; i++) {
1178                 struct nvmap_carveout_node *node = &dev->heaps[i];
1179                 const struct nvmap_platform_carveout *co = &plat->carveouts[i];
1180                 node->carveout = nvmap_heap_create(dev->dev_user.this_device,
1181                                    co->name, co->base, co->size,
1182                                    co->buddy_size, node);
1183                 if (!node->carveout) {
1184                         e = -ENOMEM;
1185                         dev_err(&pdev->dev, "couldn't create %s\n", co->name);
1186                         goto fail_heaps;
1187                 }
1188                 dev->nr_carveouts++;
1189                 spin_lock_init(&node->clients_lock);
1190                 node->index = i;
1191                 INIT_LIST_HEAD(&node->clients);
1192                 node->heap_bit = co->usage_mask;
1193                 if (nvmap_heap_create_group(node->carveout,
1194                                             &heap_extra_attr_group))
1195                         dev_warn(&pdev->dev, "couldn't add extra attributes\n");
1196
1197                 dev_info(&pdev->dev, "created carveout %s (%uKiB)\n",
1198                          co->name, co->size / 1024);
1199
1200                 if (!IS_ERR_OR_NULL(nvmap_debug_root)) {
1201                         struct dentry *heap_root =
1202                                 debugfs_create_dir(co->name, nvmap_debug_root);
1203                         if (!IS_ERR_OR_NULL(heap_root)) {
1204                                 debugfs_create_file("clients", 0664, heap_root,
1205                                     node, &debug_clients_fops);
1206                                 debugfs_create_file("allocations", 0664,
1207                                     heap_root, node, &debug_allocations_fops);
1208                         }
1209                 }
1210         }
1211
1212         platform_set_drvdata(pdev, dev);
1213         nvmap_dev = dev;
1214         return 0;
1215 fail_heaps:
1216         for (i = 0; i < dev->nr_carveouts; i++) {
1217                 struct nvmap_carveout_node *node = &dev->heaps[i];
1218                 nvmap_heap_remove_group(node->carveout, &heap_extra_attr_group);
1219                 nvmap_heap_destroy(node->carveout);
1220         }
1221 fail:
1222         kfree(dev->heaps);
1223         nvmap_mru_destroy(&dev->iovmm_master);
1224         if (dev->dev_super.minor != MISC_DYNAMIC_MINOR)
1225                 misc_deregister(&dev->dev_super);
1226         if (dev->dev_user.minor != MISC_DYNAMIC_MINOR)
1227                 misc_deregister(&dev->dev_user);
1228         if (!IS_ERR_OR_NULL(dev->iovmm_master.iovmm))
1229                 tegra_iovmm_free_client(dev->iovmm_master.iovmm);
1230         if (dev->vm_rgn)
1231                 free_vm_area(dev->vm_rgn);
1232         kfree(dev);
1233         nvmap_dev = NULL;
1234         return e;
1235 }
1236
1237 static int nvmap_remove(struct platform_device *pdev)
1238 {
1239         struct nvmap_device *dev = platform_get_drvdata(pdev);
1240         struct rb_node *n;
1241         struct nvmap_handle *h;
1242         int i;
1243
1244         misc_deregister(&dev->dev_super);
1245         misc_deregister(&dev->dev_user);
1246
1247         while ((n = rb_first(&dev->handles))) {
1248                 h = rb_entry(n, struct nvmap_handle, node);
1249                 rb_erase(&h->node, &dev->handles);
1250                 kfree(h);
1251         }
1252
1253         if (!IS_ERR_OR_NULL(dev->iovmm_master.iovmm))
1254                 tegra_iovmm_free_client(dev->iovmm_master.iovmm);
1255
1256         nvmap_mru_destroy(&dev->iovmm_master);
1257
1258         for (i = 0; i < dev->nr_carveouts; i++) {
1259                 struct nvmap_carveout_node *node = &dev->heaps[i];
1260                 nvmap_heap_remove_group(node->carveout, &heap_extra_attr_group);
1261                 nvmap_heap_destroy(node->carveout);
1262         }
1263         kfree(dev->heaps);
1264
1265         free_vm_area(dev->vm_rgn);
1266         kfree(dev);
1267         nvmap_dev = NULL;
1268         return 0;
1269 }
1270
1271 static int nvmap_suspend(struct platform_device *pdev, pm_message_t state)
1272 {
1273         return 0;
1274 }
1275
1276 static int nvmap_resume(struct platform_device *pdev)
1277 {
1278         return 0;
1279 }
1280
1281 static struct platform_driver nvmap_driver = {
1282         .probe          = nvmap_probe,
1283         .remove         = nvmap_remove,
1284         .suspend        = nvmap_suspend,
1285         .resume         = nvmap_resume,
1286
1287         .driver = {
1288                 .name   = "tegra-nvmap",
1289                 .owner  = THIS_MODULE,
1290         },
1291 };
1292
1293 static int __init nvmap_init_driver(void)
1294 {
1295         int e;
1296
1297         nvmap_dev = NULL;
1298
1299         e = nvmap_heap_init();
1300         if (e)
1301                 goto fail;
1302
1303         e = platform_driver_register(&nvmap_driver);
1304         if (e) {
1305                 nvmap_heap_deinit();
1306                 goto fail;
1307         }
1308
1309 fail:
1310         return e;
1311 }
1312 fs_initcall(nvmap_init_driver);
1313
1314 static void __exit nvmap_exit_driver(void)
1315 {
1316         platform_driver_unregister(&nvmap_driver);
1317         nvmap_heap_deinit();
1318         nvmap_dev = NULL;
1319 }
1320 module_exit(nvmap_exit_driver);