Stop playing silly games with the VM_ACCOUNT flag
Linus Torvalds [Sat, 31 Jan 2009 23:08:56 +0000 (15:08 -0800)]
The mmap_region() code would temporarily set the VM_ACCOUNT flag for
anonymous shared mappings just to inform shmem_zero_setup() that it
should enable accounting for the resulting shm object.  It would then
clear the flag after calling ->mmap (for the /dev/zero case) or doing
shmem_zero_setup() (for the MAP_ANON case).

This just resulted in vma merge issues, but also made for just
unnecessary confusion.  Use the already-existing VM_NORESERVE flag for
this instead, and let shmem_{zero|file}_setup() just figure it out from
that.

This also happens to make it obvious that the new DRI2 GEM layer uses a
non-reserving backing store for its object allocation - which is quite
possibly not intentional.  But since I didn't want to change semantics
in this patch, I left it alone, and just updated the caller to use the
new flag semantics.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

drivers/gpu/drm/drm_gem.c
ipc/shm.c
mm/mmap.c
mm/shmem.c

index 9da5814..6915fb8 100644 (file)
@@ -136,7 +136,7 @@ drm_gem_object_alloc(struct drm_device *dev, size_t size)
        obj = kcalloc(1, sizeof(*obj), GFP_KERNEL);
 
        obj->dev = dev;
-       obj->filp = shmem_file_setup("drm mm object", size, 0);
+       obj->filp = shmem_file_setup("drm mm object", size, VM_NORESERVE);
        if (IS_ERR(obj->filp)) {
                kfree(obj);
                return NULL;
index a9e09ad..c0a021f 100644 (file)
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -368,14 +368,14 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
                file = hugetlb_file_setup(name, size);
                shp->mlock_user = current_user();
        } else {
-               int acctflag = VM_ACCOUNT;
+               int acctflag = 0;
                /*
                 * Do not allow no accounting for OVERCOMMIT_NEVER, even
                 * if it's asked for.
                 */
                if  ((shmflg & SHM_NORESERVE) &&
                                sysctl_overcommit_memory != OVERCOMMIT_NEVER)
-                       acctflag = 0;
+                       acctflag = VM_NORESERVE;
                file = shmem_file_setup(name, size, acctflag);
        }
        error = PTR_ERR(file);
index c581df1..214b6a2 100644 (file)
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1090,6 +1090,15 @@ int vma_wants_writenotify(struct vm_area_struct *vma)
                mapping_cap_account_dirty(vma->vm_file->f_mapping);
 }
 
+/*
+ * We account for memory if it's a private writeable mapping,
+ * and VM_NORESERVE wasn't set.
+ */
+static inline int accountable_mapping(unsigned int vm_flags)
+{
+       return (vm_flags & (VM_NORESERVE | VM_SHARED | VM_WRITE)) == VM_WRITE;
+}
+
 unsigned long mmap_region(struct file *file, unsigned long addr,
                          unsigned long len, unsigned long flags,
                          unsigned int vm_flags, unsigned long pgoff,
@@ -1117,23 +1126,24 @@ munmap_back:
        if (!may_expand_vm(mm, len >> PAGE_SHIFT))
                return -ENOMEM;
 
-       if (flags & MAP_NORESERVE)
+       /*
+        * Set 'VM_NORESERVE' if we should not account for the
+        * memory use of this mapping. We only honor MAP_NORESERVE
+        * if we're allowed to overcommit memory.
+        */
+       if ((flags & MAP_NORESERVE) && sysctl_overcommit_memory != OVERCOMMIT_NEVER)
+               vm_flags |= VM_NORESERVE;
+       if (!accountable)
                vm_flags |= VM_NORESERVE;
 
-       if (accountable && (!(flags & MAP_NORESERVE) ||
-                           sysctl_overcommit_memory == OVERCOMMIT_NEVER)) {
-               if (vm_flags & VM_SHARED) {
-                       /* Check memory availability in shmem_file_setup? */
-                       vm_flags |= VM_ACCOUNT;
-               } else if (vm_flags & VM_WRITE) {
-                       /*
-                        * Private writable mapping: check memory availability
-                        */
-                       charged = len >> PAGE_SHIFT;
-                       if (security_vm_enough_memory(charged))
-                               return -ENOMEM;
-                       vm_flags |= VM_ACCOUNT;
-               }
+       /*
+        * Private writable mapping: check memory availability
+        */
+       if (accountable_mapping(vm_flags)) {
+               charged = len >> PAGE_SHIFT;
+               if (security_vm_enough_memory(charged))
+                       return -ENOMEM;
+               vm_flags |= VM_ACCOUNT;
        }
 
        /*
@@ -1184,14 +1194,6 @@ munmap_back:
                        goto free_vma;
        }
 
-       /* We set VM_ACCOUNT in a shared mapping's vm_flags, to inform
-        * shmem_zero_setup (perhaps called through /dev/zero's ->mmap)
-        * that memory reservation must be checked; but that reservation
-        * belongs to shared memory object, not to vma: so now clear it.
-        */
-       if ((vm_flags & (VM_SHARED|VM_ACCOUNT)) == (VM_SHARED|VM_ACCOUNT))
-               vma->vm_flags &= ~VM_ACCOUNT;
-
        /* Can addr have changed??
         *
         * Answer: Yes, several device drivers can do it in their
index 5d0de96..19d566c 100644 (file)
@@ -2628,7 +2628,7 @@ struct file *shmem_file_setup(char *name, loff_t size, unsigned long flags)
                goto close_file;
 
 #ifdef CONFIG_SHMEM
-       SHMEM_I(inode)->flags = flags & VM_ACCOUNT;
+       SHMEM_I(inode)->flags = (flags & VM_NORESERVE) ? 0 : VM_ACCOUNT;
 #endif
        d_instantiate(dentry, inode);
        inode->i_size = size;