mm: swap: implement generic handler for swap_activate
[linux-3.10.git] / mm / nommu.c
index 0563fd9..d4b0c10 100644 (file)
@@ -13,7 +13,7 @@
  *  Copyright (c) 2007-2010 Paul Mundt <lethal@linux-sh.org>
  */
 
-#include <linux/module.h>
+#include <linux/export.h>
 #include <linux/mm.h>
 #include <linux/mman.h>
 #include <linux/swap.h>
@@ -22,7 +22,6 @@
 #include <linux/pagemap.h>
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
-#include <linux/tracehook.h>
 #include <linux/blkdev.h>
 #include <linux/backing-dev.h>
 #include <linux/mount.h>
@@ -455,7 +454,7 @@ void  __attribute__((weak)) vmalloc_sync_all(void)
  *     between processes, it syncs the pagetable across all
  *     processes.
  */
-struct vm_struct *alloc_vm_area(size_t size)
+struct vm_struct *alloc_vm_area(size_t size, pte_t **ptes)
 {
        BUG();
        return NULL;
@@ -697,9 +696,11 @@ static void add_vma_to_mm(struct mm_struct *mm, struct vm_area_struct *vma)
        if (vma->vm_file) {
                mapping = vma->vm_file->f_mapping;
 
+               mutex_lock(&mapping->i_mmap_mutex);
                flush_dcache_mmap_lock(mapping);
                vma_prio_tree_insert(vma, &mapping->i_mmap);
                flush_dcache_mmap_unlock(mapping);
+               mutex_unlock(&mapping->i_mmap_mutex);
        }
 
        /* add the VMA to the tree */
@@ -761,9 +762,11 @@ static void delete_vma_from_mm(struct vm_area_struct *vma)
        if (vma->vm_file) {
                mapping = vma->vm_file->f_mapping;
 
+               mutex_lock(&mapping->i_mmap_mutex);
                flush_dcache_mmap_lock(mapping);
                vma_prio_tree_remove(vma, &mapping->i_mmap);
                flush_dcache_mmap_unlock(mapping);
+               mutex_unlock(&mapping->i_mmap_mutex);
        }
 
        /* remove from the MM's tree and list */
@@ -776,8 +779,6 @@ static void delete_vma_from_mm(struct vm_area_struct *vma)
 
        if (vma->vm_next)
                vma->vm_next->vm_prev = vma->vm_prev;
-
-       vma->vm_mm = NULL;
 }
 
 /*
@@ -888,7 +889,6 @@ static int validate_mmap_request(struct file *file,
                                 unsigned long *_capabilities)
 {
        unsigned long capabilities, rlen;
-       unsigned long reqprot = prot;
        int ret;
 
        /* do the simple checks first */
@@ -1046,7 +1046,7 @@ static int validate_mmap_request(struct file *file,
        }
 
        /* allow the security API to have its say */
-       ret = security_file_mmap(file, reqprot, prot, flags, addr, 0);
+       ret = security_mmap_addr(addr);
        if (ret < 0)
                return ret;
 
@@ -1087,7 +1087,7 @@ static unsigned long determine_vm_flags(struct file *file,
         * it's being traced - otherwise breakpoints set in it may interfere
         * with another untraced process
         */
-       if ((flags & MAP_PRIVATE) && tracehook_expect_breakpoints(current))
+       if ((flags & MAP_PRIVATE) && current->ptrace)
                vm_flags &= ~VM_MAYSHARE;
 
        return vm_flags;
@@ -1124,7 +1124,7 @@ static int do_mmap_private(struct vm_area_struct *vma,
                           unsigned long capabilities)
 {
        struct page *pages;
-       unsigned long total, point, n, rlen;
+       unsigned long total, point, n;
        void *base;
        int ret, order;
 
@@ -1148,13 +1148,12 @@ static int do_mmap_private(struct vm_area_struct *vma,
                 * make a private copy of the data and map that instead */
        }
 
-       rlen = PAGE_ALIGN(len);
 
        /* allocate some memory to hold the mapping
         * - note that this may not return a page-aligned address if the object
         *   we're allocating is smaller than a page
         */
-       order = get_order(rlen);
+       order = get_order(len);
        kdebug("alloc order %d for %lx", order, len);
 
        pages = alloc_pages(GFP_KERNEL, order);
@@ -1164,7 +1163,7 @@ static int do_mmap_private(struct vm_area_struct *vma,
        total = 1 << order;
        atomic_long_add(total, &mmap_pages_allocated);
 
-       point = rlen >> PAGE_SHIFT;
+       point = len >> PAGE_SHIFT;
 
        /* we allocated a power-of-2 sized page set, so we may want to trim off
         * the excess */
@@ -1186,7 +1185,7 @@ static int do_mmap_private(struct vm_area_struct *vma,
        base = page_address(pages);
        region->vm_flags = vma->vm_flags |= VM_MAPPED_COPY;
        region->vm_start = (unsigned long) base;
-       region->vm_end   = region->vm_start + rlen;
+       region->vm_end   = region->vm_start + len;
        region->vm_top   = region->vm_start + (total << PAGE_SHIFT);
 
        vma->vm_start = region->vm_start;
@@ -1202,22 +1201,22 @@ static int do_mmap_private(struct vm_area_struct *vma,
 
                old_fs = get_fs();
                set_fs(KERNEL_DS);
-               ret = vma->vm_file->f_op->read(vma->vm_file, base, rlen, &fpos);
+               ret = vma->vm_file->f_op->read(vma->vm_file, base, len, &fpos);
                set_fs(old_fs);
 
                if (ret < 0)
                        goto error_free;
 
                /* clear the last little bit */
-               if (ret < rlen)
-                       memset(base + ret, 0, rlen - ret);
+               if (ret < len)
+                       memset(base + ret, 0, len - ret);
 
        }
 
        return 0;
 
 error_free:
-       free_page_series(region->vm_start, region->vm_end);
+       free_page_series(region->vm_start, region->vm_top);
        region->vm_start = vma->vm_start = 0;
        region->vm_end   = vma->vm_end = 0;
        region->vm_top   = 0;
@@ -1259,6 +1258,7 @@ unsigned long do_mmap_pgoff(struct file *file,
 
        /* we ignore the address hint */
        addr = 0;
+       len = PAGE_ALIGN(len);
 
        /* we've determined that we can make the mapping, now translate what we
         * now know into VMA flags */
@@ -1376,15 +1376,15 @@ unsigned long do_mmap_pgoff(struct file *file,
                if (capabilities & BDI_CAP_MAP_DIRECT) {
                        addr = file->f_op->get_unmapped_area(file, addr, len,
                                                             pgoff, flags);
-                       if (IS_ERR((void *) addr)) {
+                       if (IS_ERR_VALUE(addr)) {
                                ret = addr;
-                               if (ret != (unsigned long) -ENOSYS)
+                               if (ret != -ENOSYS)
                                        goto error_just_free;
 
                                /* the driver refused to tell us where to site
                                 * the mapping so we'll have to attempt to copy
                                 * it */
-                               ret = (unsigned long) -ENODEV;
+                               ret = -ENODEV;
                                if (!(capabilities & BDI_CAP_MAP_COPY))
                                        goto error_just_free;
 
@@ -1469,7 +1469,6 @@ error_getting_region:
        show_free_areas(0);
        return -ENOMEM;
 }
-EXPORT_SYMBOL(do_mmap_pgoff);
 
 SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len,
                unsigned long, prot, unsigned long, flags,
@@ -1487,9 +1486,7 @@ SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len,
 
        flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
 
-       down_write(&current->mm->mmap_sem);
-       retval = do_mmap_pgoff(file, addr, len, prot, flags, pgoff);
-       up_write(&current->mm->mmap_sem);
+       retval = vm_mmap_pgoff(file, addr, len, prot, flags, pgoff);
 
        if (file)
                fput(file);
@@ -1635,14 +1632,17 @@ static int shrink_vma(struct mm_struct *mm,
 int do_munmap(struct mm_struct *mm, unsigned long start, size_t len)
 {
        struct vm_area_struct *vma;
-       unsigned long end = start + len;
+       unsigned long end;
        int ret;
 
        kenter(",%lx,%zx", start, len);
 
+       len = PAGE_ALIGN(len);
        if (len == 0)
                return -EINVAL;
 
+       end = start + len;
+
        /* find the first potentially overlapping VMA */
        vma = find_vma(mm, start);
        if (!vma) {
@@ -1705,16 +1705,22 @@ erase_whole_vma:
 }
 EXPORT_SYMBOL(do_munmap);
 
-SYSCALL_DEFINE2(munmap, unsigned long, addr, size_t, len)
+int vm_munmap(unsigned long addr, size_t len)
 {
-       int ret;
        struct mm_struct *mm = current->mm;
+       int ret;
 
        down_write(&mm->mmap_sem);
        ret = do_munmap(mm, addr, len);
        up_write(&mm->mmap_sem);
        return ret;
 }
+EXPORT_SYMBOL(vm_munmap);
+
+SYSCALL_DEFINE2(munmap, unsigned long, addr, size_t, len)
+{
+       return vm_munmap(addr, len);
+}
 
 /*
  * release all the mappings made in a process's VM space
@@ -1740,7 +1746,7 @@ void exit_mmap(struct mm_struct *mm)
        kleave("");
 }
 
-unsigned long do_brk(unsigned long addr, unsigned long len)
+unsigned long vm_brk(unsigned long addr, unsigned long len)
 {
        return -ENOMEM;
 }
@@ -1762,6 +1768,8 @@ unsigned long do_mremap(unsigned long addr,
        struct vm_area_struct *vma;
 
        /* insanity checks first */
+       old_len = PAGE_ALIGN(old_len);
+       new_len = PAGE_ALIGN(new_len);
        if (old_len == 0 || new_len == 0)
                return (unsigned long) -EINVAL;
 
@@ -1808,10 +1816,13 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
        return NULL;
 }
 
-int remap_pfn_range(struct vm_area_struct *vma, unsigned long from,
-               unsigned long to, unsigned long size, pgprot_t prot)
+int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr,
+               unsigned long pfn, unsigned long size, pgprot_t prot)
 {
-       vma->vm_start = vma->vm_pgoff << PAGE_SHIFT;
+       if (addr != (pfn << PAGE_SHIFT))
+               return -EINVAL;
+
+       vma->vm_flags |= VM_IO | VM_RESERVED | VM_PFNMAP;
        return 0;
 }
 EXPORT_SYMBOL(remap_pfn_range);
@@ -1877,9 +1888,17 @@ int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin)
                return 0;
 
        if (sysctl_overcommit_memory == OVERCOMMIT_GUESS) {
-               unsigned long n;
+               free = global_page_state(NR_FREE_PAGES);
+               free += global_page_state(NR_FILE_PAGES);
+
+               /*
+                * shmem pages shouldn't be counted as free in this
+                * case, they can't be purged, only swapped out, and
+                * that won't affect the overall amount of available
+                * memory in the system.
+                */
+               free -= global_page_state(NR_SHMEM);
 
-               free = global_page_state(NR_FILE_PAGES);
                free += nr_swap_pages;
 
                /*
@@ -1891,34 +1910,18 @@ int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin)
                free += global_page_state(NR_SLAB_RECLAIMABLE);
 
                /*
-                * Leave the last 3% for root
-                */
-               if (!cap_sys_admin)
-                       free -= free / 32;
-
-               if (free > pages)
-                       return 0;
-
-               /*
-                * nr_free_pages() is very expensive on large systems,
-                * only call if we're about to fail.
-                */
-               n = nr_free_pages();
-
-               /*
                 * Leave reserved pages. The pages are not for anonymous pages.
                 */
-               if (n <= totalreserve_pages)
+               if (free <= totalreserve_pages)
                        goto error;
                else
-                       n -= totalreserve_pages;
+                       free -= totalreserve_pages;
 
                /*
                 * Leave the last 3% for root
                 */
                if (!cap_sys_admin)
-                       n -= n / 32;
-               free += n;
+                       free -= free / 32;
 
                if (free > pages)
                        return 0;
@@ -2053,6 +2056,7 @@ int nommu_shrink_inode_mappings(struct inode *inode, size_t size,
        high = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
 
        down_write(&nommu_region_sem);
+       mutex_lock(&inode->i_mapping->i_mmap_mutex);
 
        /* search for VMAs that fall within the dead zone */
        vma_prio_tree_foreach(vma, &iter, &inode->i_mapping->i_mmap,
@@ -2060,6 +2064,7 @@ int nommu_shrink_inode_mappings(struct inode *inode, size_t size,
                /* found one - only interested if it's shared out of the page
                 * cache */
                if (vma->vm_flags & VM_SHARED) {
+                       mutex_unlock(&inode->i_mapping->i_mmap_mutex);
                        up_write(&nommu_region_sem);
                        return -ETXTBSY; /* not quite true, but near enough */
                }
@@ -2087,6 +2092,7 @@ int nommu_shrink_inode_mappings(struct inode *inode, size_t size,
                }
        }
 
+       mutex_unlock(&inode->i_mapping->i_mmap_mutex);
        up_write(&nommu_region_sem);
        return 0;
 }