Merge branch 'akpm' (Andrew's patchbomb)
Linus Torvalds [Wed, 12 Dec 2012 02:05:37 +0000 (18:05 -0800)]
Merge misc updates from Andrew Morton:
 "About half of most of MM.  Going very early this time due to
  uncertainty over the coreautounifiednumasched things.  I'll send the
  other half of most of MM tomorrow.  The rest of MM awaits a slab merge
  from Pekka."

* emailed patches from Andrew Morton: (71 commits)
  memory_hotplug: ensure every online node has NORMAL memory
  memory_hotplug: handle empty zone when online_movable/online_kernel
  mm, memory-hotplug: dynamic configure movable memory and portion memory
  drivers/base/node.c: cleanup node_state_attr[]
  bootmem: fix wrong call parameter for free_bootmem()
  avr32, kconfig: remove HAVE_ARCH_BOOTMEM
  mm: cma: remove watermark hacks
  mm: cma: skip watermarks check for already isolated blocks in split_free_page()
  mm, oom: fix race when specifying a thread as the oom origin
  mm, oom: change type of oom_score_adj to short
  mm: cleanup register_node()
  mm, mempolicy: remove duplicate code
  mm/vmscan.c: try_to_freeze() returns boolean
  mm: introduce putback_movable_pages()
  virtio_balloon: introduce migration primitives to balloon pages
  mm: introduce compaction and migration for ballooned pages
  mm: introduce a common interface for balloon pages mobility
  mm: redefine address_space.assoc_mapping
  mm: adjust address_space_operations.migratepage() return code
  arch/sparc/kernel/sys_sparc_64.c: s/COLOUR/COLOR/
  ...

1  2 
arch/arm/mm/mmap.c
arch/sh/mm/mmap.c
mm/mmap.c

diff --combined arch/arm/mm/mmap.c
  #include <linux/random.h>
  #include <asm/cachetype.h>
  
- static inline unsigned long COLOUR_ALIGN_DOWN(unsigned long addr,
-                                             unsigned long pgoff)
- {
-       unsigned long base = addr & ~(SHMLBA-1);
-       unsigned long off = (pgoff << PAGE_SHIFT) & (SHMLBA-1);
-       if (base + off <= addr)
-               return base + off;
-       return base - off;
- }
  #define COLOUR_ALIGN(addr,pgoff)              \
        ((((addr)+SHMLBA-1)&~(SHMLBA-1)) +      \
         (((pgoff)<<PAGE_SHIFT) & (SHMLBA-1)))
@@@ -69,9 -57,9 +57,9 @@@ arch_get_unmapped_area(struct file *fil
  {
        struct mm_struct *mm = current->mm;
        struct vm_area_struct *vma;
-       unsigned long start_addr;
        int do_align = 0;
        int aliasing = cache_is_vipt_aliasing();
+       struct vm_unmapped_area_info info;
  
        /*
         * We only need to do colour alignment if either the I or D
                    (!vma || addr + len <= vma->vm_start))
                        return addr;
        }
-       if (len > mm->cached_hole_size) {
-               start_addr = addr = mm->free_area_cache;
-       } else {
-               start_addr = addr = mm->mmap_base;
-               mm->cached_hole_size = 0;
-       }
  
- full_search:
-       if (do_align)
-               addr = COLOUR_ALIGN(addr, pgoff);
-       else
-               addr = PAGE_ALIGN(addr);
-       for (vma = find_vma(mm, addr); ; vma = vma->vm_next) {
-               /* At this point:  (!vma || addr < vma->vm_end). */
-               if (TASK_SIZE - len < addr) {
-                       /*
-                        * Start a new search - just in case we missed
-                        * some holes.
-                        */
-                       if (start_addr != TASK_UNMAPPED_BASE) {
-                               start_addr = addr = TASK_UNMAPPED_BASE;
-                               mm->cached_hole_size = 0;
-                               goto full_search;
-                       }
-                       return -ENOMEM;
-               }
-               if (!vma || addr + len <= vma->vm_start) {
-                       /*
-                        * Remember the place where we stopped the search:
-                        */
-                       mm->free_area_cache = addr + len;
-                       return addr;
-               }
-               if (addr + mm->cached_hole_size < vma->vm_start)
-                       mm->cached_hole_size = vma->vm_start - addr;
-               addr = vma->vm_end;
-               if (do_align)
-                       addr = COLOUR_ALIGN(addr, pgoff);
-       }
+       info.flags = 0;
+       info.length = len;
+       info.low_limit = mm->mmap_base;
+       info.high_limit = TASK_SIZE;
+       info.align_mask = do_align ? (PAGE_MASK & (SHMLBA - 1)) : 0;
+       info.align_offset = pgoff << PAGE_SHIFT;
+       return vm_unmapped_area(&info);
  }
  
  unsigned long
@@@ -156,6 -112,7 +112,7 @@@ arch_get_unmapped_area_topdown(struct f
        unsigned long addr = addr0;
        int do_align = 0;
        int aliasing = cache_is_vipt_aliasing();
+       struct vm_unmapped_area_info info;
  
        /*
         * We only need to do colour alignment if either the I or D
                        return addr;
        }
  
-       /* check if free_area_cache is useful for us */
-       if (len <= mm->cached_hole_size) {
-               mm->cached_hole_size = 0;
-               mm->free_area_cache = mm->mmap_base;
-       }
+       info.flags = VM_UNMAPPED_AREA_TOPDOWN;
+       info.length = len;
+       info.low_limit = PAGE_SIZE;
+       info.high_limit = mm->mmap_base;
+       info.align_mask = do_align ? (PAGE_MASK & (SHMLBA - 1)) : 0;
+       info.align_offset = pgoff << PAGE_SHIFT;
+       addr = vm_unmapped_area(&info);
  
-       /* either no address requested or can't fit in requested address hole */
-       addr = mm->free_area_cache;
-       if (do_align) {
-               unsigned long base = COLOUR_ALIGN_DOWN(addr - len, pgoff);
-               addr = base + len;
-       }
-       /* make sure it can fit in the remaining address space */
-       if (addr > len) {
-               vma = find_vma(mm, addr-len);
-               if (!vma || addr <= vma->vm_start)
-                       /* remember the address as a hint for next time */
-                       return (mm->free_area_cache = addr-len);
-       }
-       if (mm->mmap_base < len)
-               goto bottomup;
-       addr = mm->mmap_base - len;
-       if (do_align)
-               addr = COLOUR_ALIGN_DOWN(addr, pgoff);
-       do {
-               /*
-                * Lookup failure means no vma is above this address,
-                * else if new region fits below vma->vm_start,
-                * return with success:
-                */
-               vma = find_vma(mm, addr);
-               if (!vma || addr+len <= vma->vm_start)
-                       /* remember the address as a hint for next time */
-                       return (mm->free_area_cache = addr);
-               /* remember the largest hole we saw so far */
-               if (addr + mm->cached_hole_size < vma->vm_start)
-                       mm->cached_hole_size = vma->vm_start - addr;
-               /* try just below the current vma->vm_start */
-               addr = vma->vm_start - len;
-               if (do_align)
-                       addr = COLOUR_ALIGN_DOWN(addr, pgoff);
-       } while (len < vma->vm_start);
- bottomup:
        /*
         * A failed mmap() very likely causes application failure,
         * so fall back to the bottom-up function here. This scenario
         * can happen with large stack limits and large mmap()
         * allocations.
         */
-       mm->cached_hole_size = ~0UL;
-       mm->free_area_cache = TASK_UNMAPPED_BASE;
-       addr = arch_get_unmapped_area(filp, addr0, len, pgoff, flags);
-       /*
-        * Restore the topdown base:
-        */
-       mm->free_area_cache = mm->mmap_base;
-       mm->cached_hole_size = ~0UL;
+       if (addr & ~PAGE_MASK) {
+               VM_BUG_ON(addr != -ENOMEM);
+               info.flags = 0;
+               info.low_limit = mm->mmap_base;
+               info.high_limit = TASK_SIZE;
+               addr = vm_unmapped_area(&info);
+       }
  
        return addr;
  }
@@@ -279,7 -193,7 +193,7 @@@ void arch_pick_mmap_layout(struct mm_st
   * You really shouldn't be using read() or write() on /dev/mem.  This
   * might go away in the future.
   */
 -int valid_phys_addr_range(unsigned long addr, size_t size)
 +int valid_phys_addr_range(phys_addr_t addr, size_t size)
  {
        if (addr < PHYS_OFFSET)
                return 0;
diff --combined arch/sh/mm/mmap.c
@@@ -30,25 -30,13 +30,13 @@@ static inline unsigned long COLOUR_ALIG
        return base + off;
  }
  
- static inline unsigned long COLOUR_ALIGN_DOWN(unsigned long addr,
-                                             unsigned long pgoff)
- {
-       unsigned long base = addr & ~shm_align_mask;
-       unsigned long off = (pgoff << PAGE_SHIFT) & shm_align_mask;
-       if (base + off <= addr)
-               return base + off;
-       return base - off;
- }
  unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr,
        unsigned long len, unsigned long pgoff, unsigned long flags)
  {
        struct mm_struct *mm = current->mm;
        struct vm_area_struct *vma;
-       unsigned long start_addr;
        int do_colour_align;
+       struct vm_unmapped_area_info info;
  
        if (flags & MAP_FIXED) {
                /* We do not accept a shared mapping if it would violate
                        return addr;
        }
  
-       if (len > mm->cached_hole_size) {
-               start_addr = addr = mm->free_area_cache;
-       } else {
-               mm->cached_hole_size = 0;
-               start_addr = addr = TASK_UNMAPPED_BASE;
-       }
- full_search:
-       if (do_colour_align)
-               addr = COLOUR_ALIGN(addr, pgoff);
-       else
-               addr = PAGE_ALIGN(mm->free_area_cache);
-       for (vma = find_vma(mm, addr); ; vma = vma->vm_next) {
-               /* At this point:  (!vma || addr < vma->vm_end). */
-               if (unlikely(TASK_SIZE - len < addr)) {
-                       /*
-                        * Start a new search - just in case we missed
-                        * some holes.
-                        */
-                       if (start_addr != TASK_UNMAPPED_BASE) {
-                               start_addr = addr = TASK_UNMAPPED_BASE;
-                               mm->cached_hole_size = 0;
-                               goto full_search;
-                       }
-                       return -ENOMEM;
-               }
-               if (likely(!vma || addr + len <= vma->vm_start)) {
-                       /*
-                        * Remember the place where we stopped the search:
-                        */
-                       mm->free_area_cache = addr + len;
-                       return addr;
-               }
-               if (addr + mm->cached_hole_size < vma->vm_start)
-                       mm->cached_hole_size = vma->vm_start - addr;
-               addr = vma->vm_end;
-               if (do_colour_align)
-                       addr = COLOUR_ALIGN(addr, pgoff);
-       }
+       info.flags = 0;
+       info.length = len;
+       info.low_limit = TASK_UNMAPPED_BASE;
+       info.high_limit = TASK_SIZE;
+       info.align_mask = do_colour_align ? (PAGE_MASK & shm_align_mask) : 0;
+       info.align_offset = pgoff << PAGE_SHIFT;
+       return vm_unmapped_area(&info);
  }
  
  unsigned long
@@@ -131,6 -85,7 +85,7 @@@ arch_get_unmapped_area_topdown(struct f
        struct mm_struct *mm = current->mm;
        unsigned long addr = addr0;
        int do_colour_align;
+       struct vm_unmapped_area_info info;
  
        if (flags & MAP_FIXED) {
                /* We do not accept a shared mapping if it would violate
                        return addr;
        }
  
-       /* check if free_area_cache is useful for us */
-       if (len <= mm->cached_hole_size) {
-               mm->cached_hole_size = 0;
-               mm->free_area_cache = mm->mmap_base;
-       }
-       /* either no address requested or can't fit in requested address hole */
-       addr = mm->free_area_cache;
-       if (do_colour_align) {
-               unsigned long base = COLOUR_ALIGN_DOWN(addr-len, pgoff);
+       info.flags = VM_UNMAPPED_AREA_TOPDOWN;
+       info.length = len;
+       info.low_limit = PAGE_SIZE;
+       info.high_limit = mm->mmap_base;
+       info.align_mask = do_colour_align ? (PAGE_MASK & shm_align_mask) : 0;
+       info.align_offset = pgoff << PAGE_SHIFT;
+       addr = vm_unmapped_area(&info);
  
-               addr = base + len;
-       }
-       /* make sure it can fit in the remaining address space */
-       if (likely(addr > len)) {
-               vma = find_vma(mm, addr-len);
-               if (!vma || addr <= vma->vm_start) {
-                       /* remember the address as a hint for next time */
-                       return (mm->free_area_cache = addr-len);
-               }
-       }
-       if (unlikely(mm->mmap_base < len))
-               goto bottomup;
-       addr = mm->mmap_base-len;
-       if (do_colour_align)
-               addr = COLOUR_ALIGN_DOWN(addr, pgoff);
-       do {
-               /*
-                * Lookup failure means no vma is above this address,
-                * else if new region fits below vma->vm_start,
-                * return with success:
-                */
-               vma = find_vma(mm, addr);
-               if (likely(!vma || addr+len <= vma->vm_start)) {
-                       /* remember the address as a hint for next time */
-                       return (mm->free_area_cache = addr);
-               }
-               /* remember the largest hole we saw so far */
-               if (addr + mm->cached_hole_size < vma->vm_start)
-                       mm->cached_hole_size = vma->vm_start - addr;
-               /* try just below the current vma->vm_start */
-               addr = vma->vm_start-len;
-               if (do_colour_align)
-                       addr = COLOUR_ALIGN_DOWN(addr, pgoff);
-       } while (likely(len < vma->vm_start));
- bottomup:
        /*
         * A failed mmap() very likely causes application failure,
         * so fall back to the bottom-up function here. This scenario
         * can happen with large stack limits and large mmap()
         * allocations.
         */
-       mm->cached_hole_size = ~0UL;
-       mm->free_area_cache = TASK_UNMAPPED_BASE;
-       addr = arch_get_unmapped_area(filp, addr0, len, pgoff, flags);
-       /*
-        * Restore the topdown base:
-        */
-       mm->free_area_cache = mm->mmap_base;
-       mm->cached_hole_size = ~0UL;
+       if (addr & ~PAGE_MASK) {
+               VM_BUG_ON(addr != -ENOMEM);
+               info.flags = 0;
+               info.low_limit = TASK_UNMAPPED_BASE;
+               info.high_limit = TASK_SIZE;
+               addr = vm_unmapped_area(&info);
+       }
  
        return addr;
  }
   * You really shouldn't be using read() or write() on /dev/mem.  This
   * might go away in the future.
   */
 -int valid_phys_addr_range(unsigned long addr, size_t count)
 +int valid_phys_addr_range(phys_addr_t addr, size_t count)
  {
        if (addr < __MEMORY_START)
                return 0;
diff --combined mm/mmap.c
+++ b/mm/mmap.c
@@@ -31,6 -31,7 +31,7 @@@
  #include <linux/audit.h>
  #include <linux/khugepaged.h>
  #include <linux/uprobes.h>
+ #include <linux/rbtree_augmented.h>
  
  #include <asm/uaccess.h>
  #include <asm/cacheflush.h>
@@@ -89,20 -90,6 +90,20 @@@ int sysctl_max_map_count __read_mostly 
  struct percpu_counter vm_committed_as ____cacheline_aligned_in_smp;
  
  /*
 + * The global memory commitment made in the system can be a metric
 + * that can be used to drive ballooning decisions when Linux is hosted
 + * as a guest. On Hyper-V, the host implements a policy engine for dynamically
 + * balancing memory across competing virtual machines that are hosted.
 + * Several metrics drive this policy engine including the guest reported
 + * memory commitment.
 + */
 +unsigned long vm_memory_committed(void)
 +{
 +      return percpu_counter_read_positive(&vm_committed_as);
 +}
 +EXPORT_SYMBOL_GPL(vm_memory_committed);
 +
 +/*
   * Check that a process has enough memory to allocate a new virtual
   * mapping. 0 means there is enough memory for the allocation to
   * succeed and -ENOMEM implies there is not.
@@@ -311,40 -298,88 +312,88 @@@ out
        return retval;
  }
  
+ static long vma_compute_subtree_gap(struct vm_area_struct *vma)
+ {
+       unsigned long max, subtree_gap;
+       max = vma->vm_start;
+       if (vma->vm_prev)
+               max -= vma->vm_prev->vm_end;
+       if (vma->vm_rb.rb_left) {
+               subtree_gap = rb_entry(vma->vm_rb.rb_left,
+                               struct vm_area_struct, vm_rb)->rb_subtree_gap;
+               if (subtree_gap > max)
+                       max = subtree_gap;
+       }
+       if (vma->vm_rb.rb_right) {
+               subtree_gap = rb_entry(vma->vm_rb.rb_right,
+                               struct vm_area_struct, vm_rb)->rb_subtree_gap;
+               if (subtree_gap > max)
+                       max = subtree_gap;
+       }
+       return max;
+ }
  #ifdef CONFIG_DEBUG_VM_RB
  static int browse_rb(struct rb_root *root)
  {
-       int i = 0, j;
+       int i = 0, j, bug = 0;
        struct rb_node *nd, *pn = NULL;
        unsigned long prev = 0, pend = 0;
  
        for (nd = rb_first(root); nd; nd = rb_next(nd)) {
                struct vm_area_struct *vma;
                vma = rb_entry(nd, struct vm_area_struct, vm_rb);
-               if (vma->vm_start < prev)
-                       printk("vm_start %lx prev %lx\n", vma->vm_start, prev), i = -1;
-               if (vma->vm_start < pend)
+               if (vma->vm_start < prev) {
+                       printk("vm_start %lx prev %lx\n", vma->vm_start, prev);
+                       bug = 1;
+               }
+               if (vma->vm_start < pend) {
                        printk("vm_start %lx pend %lx\n", vma->vm_start, pend);
-               if (vma->vm_start > vma->vm_end)
-                       printk("vm_end %lx < vm_start %lx\n", vma->vm_end, vma->vm_start);
+                       bug = 1;
+               }
+               if (vma->vm_start > vma->vm_end) {
+                       printk("vm_end %lx < vm_start %lx\n",
+                               vma->vm_end, vma->vm_start);
+                       bug = 1;
+               }
+               if (vma->rb_subtree_gap != vma_compute_subtree_gap(vma)) {
+                       printk("free gap %lx, correct %lx\n",
+                              vma->rb_subtree_gap,
+                              vma_compute_subtree_gap(vma));
+                       bug = 1;
+               }
                i++;
                pn = nd;
                prev = vma->vm_start;
                pend = vma->vm_end;
        }
        j = 0;
-       for (nd = pn; nd; nd = rb_prev(nd)) {
+       for (nd = pn; nd; nd = rb_prev(nd))
                j++;
+       if (i != j) {
+               printk("backwards %d, forwards %d\n", j, i);
+               bug = 1;
+       }
+       return bug ? -1 : i;
+ }
+ static void validate_mm_rb(struct rb_root *root, struct vm_area_struct *ignore)
+ {
+       struct rb_node *nd;
+       for (nd = rb_first(root); nd; nd = rb_next(nd)) {
+               struct vm_area_struct *vma;
+               vma = rb_entry(nd, struct vm_area_struct, vm_rb);
+               BUG_ON(vma != ignore &&
+                      vma->rb_subtree_gap != vma_compute_subtree_gap(vma));
        }
-       if (i != j)
-               printk("backwards %d, forwards %d\n", j, i), i = 0;
-       return i;
  }
  
  void validate_mm(struct mm_struct *mm)
  {
        int bug = 0;
        int i = 0;
+       unsigned long highest_address = 0;
        struct vm_area_struct *vma = mm->mmap;
        while (vma) {
                struct anon_vma_chain *avc;
                list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
                        anon_vma_interval_tree_verify(avc);
                vma_unlock_anon_vma(vma);
+               highest_address = vma->vm_end;
                vma = vma->vm_next;
                i++;
        }
-       if (i != mm->map_count)
-               printk("map_count %d vm_next %d\n", mm->map_count, i), bug = 1;
+       if (i != mm->map_count) {
+               printk("map_count %d vm_next %d\n", mm->map_count, i);
+               bug = 1;
+       }
+       if (highest_address != mm->highest_vm_end) {
+               printk("mm->highest_vm_end %lx, found %lx\n",
+                      mm->highest_vm_end, highest_address);
+               bug = 1;
+       }
        i = browse_rb(&mm->mm_rb);
-       if (i != mm->map_count)
-               printk("map_count %d rb %d\n", mm->map_count, i), bug = 1;
+       if (i != mm->map_count) {
+               printk("map_count %d rb %d\n", mm->map_count, i);
+               bug = 1;
+       }
        BUG_ON(bug);
  }
  #else
+ #define validate_mm_rb(root, ignore) do { } while (0)
  #define validate_mm(mm) do { } while (0)
  #endif
  
+ RB_DECLARE_CALLBACKS(static, vma_gap_callbacks, struct vm_area_struct, vm_rb,
+                    unsigned long, rb_subtree_gap, vma_compute_subtree_gap)
+ /*
+  * Update augmented rbtree rb_subtree_gap values after vma->vm_start or
+  * vma->vm_prev->vm_end values changed, without modifying the vma's position
+  * in the rbtree.
+  */
+ static void vma_gap_update(struct vm_area_struct *vma)
+ {
+       /*
+        * As it turns out, RB_DECLARE_CALLBACKS() already created a callback
+        * function that does exacltly what we want.
+        */
+       vma_gap_callbacks_propagate(&vma->vm_rb, NULL);
+ }
+ static inline void vma_rb_insert(struct vm_area_struct *vma,
+                                struct rb_root *root)
+ {
+       /* All rb_subtree_gap values must be consistent prior to insertion */
+       validate_mm_rb(root, NULL);
+       rb_insert_augmented(&vma->vm_rb, root, &vma_gap_callbacks);
+ }
+ static void vma_rb_erase(struct vm_area_struct *vma, struct rb_root *root)
+ {
+       /*
+        * All rb_subtree_gap values must be consistent prior to erase,
+        * with the possible exception of the vma being erased.
+        */
+       validate_mm_rb(root, vma);
+       /*
+        * Note rb_erase_augmented is a fairly large inline function,
+        * so make sure we instantiate it only once with our desired
+        * augmented rbtree callbacks.
+        */
+       rb_erase_augmented(&vma->vm_rb, root, &vma_gap_callbacks);
+ }
  /*
   * vma has some anon_vma assigned, and is already inserted on that
   * anon_vma's interval trees.
@@@ -435,8 -523,25 +537,25 @@@ static int find_vma_links(struct mm_str
  void __vma_link_rb(struct mm_struct *mm, struct vm_area_struct *vma,
                struct rb_node **rb_link, struct rb_node *rb_parent)
  {
+       /* Update tracking information for the gap following the new vma. */
+       if (vma->vm_next)
+               vma_gap_update(vma->vm_next);
+       else
+               mm->highest_vm_end = vma->vm_end;
+       /*
+        * vma->vm_prev wasn't known when we followed the rbtree to find the
+        * correct insertion point for that vma. As a result, we could not
+        * update the vma vm_rb parents rb_subtree_gap values on the way down.
+        * So, we first insert the vma with a zero rb_subtree_gap value
+        * (to be consistent with what we did on the way down), and then
+        * immediately update the gap to the correct value. Finally we
+        * rebalance the rbtree after all augmented values have been set.
+        */
        rb_link_node(&vma->vm_rb, rb_parent, rb_link);
-       rb_insert_color(&vma->vm_rb, &mm->mm_rb);
+       vma->rb_subtree_gap = 0;
+       vma_gap_update(vma);
+       vma_rb_insert(vma, &mm->mm_rb);
  }
  
  static void __vma_link_file(struct vm_area_struct *vma)
@@@ -512,12 -617,12 +631,12 @@@ static inline voi
  __vma_unlink(struct mm_struct *mm, struct vm_area_struct *vma,
                struct vm_area_struct *prev)
  {
-       struct vm_area_struct *next = vma->vm_next;
+       struct vm_area_struct *next;
  
-       prev->vm_next = next;
+       vma_rb_erase(vma, &mm->mm_rb);
+       prev->vm_next = next = vma->vm_next;
        if (next)
                next->vm_prev = prev;
-       rb_erase(&vma->vm_rb, &mm->mm_rb);
        if (mm->mmap_cache == vma)
                mm->mmap_cache = prev;
  }
@@@ -539,6 -644,7 +658,7 @@@ int vma_adjust(struct vm_area_struct *v
        struct rb_root *root = NULL;
        struct anon_vma *anon_vma = NULL;
        struct file *file = vma->vm_file;
+       bool start_changed = false, end_changed = false;
        long adjust_next = 0;
        int remove_next = 0;
  
@@@ -629,8 -735,14 +749,14 @@@ again:                   remove_next = 1 + (end > next-
                        vma_interval_tree_remove(next, root);
        }
  
-       vma->vm_start = start;
-       vma->vm_end = end;
+       if (start != vma->vm_start) {
+               vma->vm_start = start;
+               start_changed = true;
+       }
+       if (end != vma->vm_end) {
+               vma->vm_end = end;
+               end_changed = true;
+       }
        vma->vm_pgoff = pgoff;
        if (adjust_next) {
                next->vm_start += adjust_next << PAGE_SHIFT;
                 * (it may either follow vma or precede it).
                 */
                __insert_vm_struct(mm, insert);
+       } else {
+               if (start_changed)
+                       vma_gap_update(vma);
+               if (end_changed) {
+                       if (!next)
+                               mm->highest_vm_end = end;
+                       else if (!adjust_next)
+                               vma_gap_update(next);
+               }
        }
  
        if (anon_vma) {
                 * we must remove another next too. It would clutter
                 * up the code too much to do both in one go.
                 */
-               if (remove_next == 2) {
-                       next = vma->vm_next;
+               next = vma->vm_next;
+               if (remove_next == 2)
                        goto again;
-               }
+               else if (next)
+                       vma_gap_update(next);
+               else
+                       mm->highest_vm_end = end;
        }
        if (insert && file)
                uprobe_mmap(insert);
@@@ -1167,8 -1291,9 +1305,9 @@@ SYSCALL_DEFINE6(mmap_pgoff, unsigned lo
                 * memory so no accounting is necessary
                 */
                file = hugetlb_file_setup(HUGETLB_ANON_FILE, addr, len,
-                                               VM_NORESERVE, &user,
-                                               HUGETLB_ANONHUGE_INODE);
+                               VM_NORESERVE,
+                               &user, HUGETLB_ANONHUGE_INODE,
+                               (flags >> MAP_HUGE_SHIFT) & MAP_HUGE_MASK);
                if (IS_ERR(file))
                        return PTR_ERR(file);
        }
@@@ -1414,6 -1539,206 +1553,206 @@@ unacct_error
        return error;
  }
  
+ unsigned long unmapped_area(struct vm_unmapped_area_info *info)
+ {
+       /*
+        * We implement the search by looking for an rbtree node that
+        * immediately follows a suitable gap. That is,
+        * - gap_start = vma->vm_prev->vm_end <= info->high_limit - length;
+        * - gap_end   = vma->vm_start        >= info->low_limit  + length;
+        * - gap_end - gap_start >= length
+        */
+       struct mm_struct *mm = current->mm;
+       struct vm_area_struct *vma;
+       unsigned long length, low_limit, high_limit, gap_start, gap_end;
+       /* Adjust search length to account for worst case alignment overhead */
+       length = info->length + info->align_mask;
+       if (length < info->length)
+               return -ENOMEM;
+       /* Adjust search limits by the desired length */
+       if (info->high_limit < length)
+               return -ENOMEM;
+       high_limit = info->high_limit - length;
+       if (info->low_limit > high_limit)
+               return -ENOMEM;
+       low_limit = info->low_limit + length;
+       /* Check if rbtree root looks promising */
+       if (RB_EMPTY_ROOT(&mm->mm_rb))
+               goto check_highest;
+       vma = rb_entry(mm->mm_rb.rb_node, struct vm_area_struct, vm_rb);
+       if (vma->rb_subtree_gap < length)
+               goto check_highest;
+       while (true) {
+               /* Visit left subtree if it looks promising */
+               gap_end = vma->vm_start;
+               if (gap_end >= low_limit && vma->vm_rb.rb_left) {
+                       struct vm_area_struct *left =
+                               rb_entry(vma->vm_rb.rb_left,
+                                        struct vm_area_struct, vm_rb);
+                       if (left->rb_subtree_gap >= length) {
+                               vma = left;
+                               continue;
+                       }
+               }
+               gap_start = vma->vm_prev ? vma->vm_prev->vm_end : 0;
+ check_current:
+               /* Check if current node has a suitable gap */
+               if (gap_start > high_limit)
+                       return -ENOMEM;
+               if (gap_end >= low_limit && gap_end - gap_start >= length)
+                       goto found;
+               /* Visit right subtree if it looks promising */
+               if (vma->vm_rb.rb_right) {
+                       struct vm_area_struct *right =
+                               rb_entry(vma->vm_rb.rb_right,
+                                        struct vm_area_struct, vm_rb);
+                       if (right->rb_subtree_gap >= length) {
+                               vma = right;
+                               continue;
+                       }
+               }
+               /* Go back up the rbtree to find next candidate node */
+               while (true) {
+                       struct rb_node *prev = &vma->vm_rb;
+                       if (!rb_parent(prev))
+                               goto check_highest;
+                       vma = rb_entry(rb_parent(prev),
+                                      struct vm_area_struct, vm_rb);
+                       if (prev == vma->vm_rb.rb_left) {
+                               gap_start = vma->vm_prev->vm_end;
+                               gap_end = vma->vm_start;
+                               goto check_current;
+                       }
+               }
+       }
+ check_highest:
+       /* Check highest gap, which does not precede any rbtree node */
+       gap_start = mm->highest_vm_end;
+       gap_end = ULONG_MAX;  /* Only for VM_BUG_ON below */
+       if (gap_start > high_limit)
+               return -ENOMEM;
+ found:
+       /* We found a suitable gap. Clip it with the original low_limit. */
+       if (gap_start < info->low_limit)
+               gap_start = info->low_limit;
+       /* Adjust gap address to the desired alignment */
+       gap_start += (info->align_offset - gap_start) & info->align_mask;
+       VM_BUG_ON(gap_start + info->length > info->high_limit);
+       VM_BUG_ON(gap_start + info->length > gap_end);
+       return gap_start;
+ }
+ unsigned long unmapped_area_topdown(struct vm_unmapped_area_info *info)
+ {
+       struct mm_struct *mm = current->mm;
+       struct vm_area_struct *vma;
+       unsigned long length, low_limit, high_limit, gap_start, gap_end;
+       /* Adjust search length to account for worst case alignment overhead */
+       length = info->length + info->align_mask;
+       if (length < info->length)
+               return -ENOMEM;
+       /*
+        * Adjust search limits by the desired length.
+        * See implementation comment at top of unmapped_area().
+        */
+       gap_end = info->high_limit;
+       if (gap_end < length)
+               return -ENOMEM;
+       high_limit = gap_end - length;
+       if (info->low_limit > high_limit)
+               return -ENOMEM;
+       low_limit = info->low_limit + length;
+       /* Check highest gap, which does not precede any rbtree node */
+       gap_start = mm->highest_vm_end;
+       if (gap_start <= high_limit)
+               goto found_highest;
+       /* Check if rbtree root looks promising */
+       if (RB_EMPTY_ROOT(&mm->mm_rb))
+               return -ENOMEM;
+       vma = rb_entry(mm->mm_rb.rb_node, struct vm_area_struct, vm_rb);
+       if (vma->rb_subtree_gap < length)
+               return -ENOMEM;
+       while (true) {
+               /* Visit right subtree if it looks promising */
+               gap_start = vma->vm_prev ? vma->vm_prev->vm_end : 0;
+               if (gap_start <= high_limit && vma->vm_rb.rb_right) {
+                       struct vm_area_struct *right =
+                               rb_entry(vma->vm_rb.rb_right,
+                                        struct vm_area_struct, vm_rb);
+                       if (right->rb_subtree_gap >= length) {
+                               vma = right;
+                               continue;
+                       }
+               }
+ check_current:
+               /* Check if current node has a suitable gap */
+               gap_end = vma->vm_start;
+               if (gap_end < low_limit)
+                       return -ENOMEM;
+               if (gap_start <= high_limit && gap_end - gap_start >= length)
+                       goto found;
+               /* Visit left subtree if it looks promising */
+               if (vma->vm_rb.rb_left) {
+                       struct vm_area_struct *left =
+                               rb_entry(vma->vm_rb.rb_left,
+                                        struct vm_area_struct, vm_rb);
+                       if (left->rb_subtree_gap >= length) {
+                               vma = left;
+                               continue;
+                       }
+               }
+               /* Go back up the rbtree to find next candidate node */
+               while (true) {
+                       struct rb_node *prev = &vma->vm_rb;
+                       if (!rb_parent(prev))
+                               return -ENOMEM;
+                       vma = rb_entry(rb_parent(prev),
+                                      struct vm_area_struct, vm_rb);
+                       if (prev == vma->vm_rb.rb_right) {
+                               gap_start = vma->vm_prev ?
+                                       vma->vm_prev->vm_end : 0;
+                               goto check_current;
+                       }
+               }
+       }
+ found:
+       /* We found a suitable gap. Clip it with the original high_limit. */
+       if (gap_end > info->high_limit)
+               gap_end = info->high_limit;
+ found_highest:
+       /* Compute highest gap address at the desired alignment */
+       gap_end -= info->length;
+       gap_end -= (gap_end - info->align_offset) & info->align_mask;
+       VM_BUG_ON(gap_end < info->low_limit);
+       VM_BUG_ON(gap_end < gap_start);
+       return gap_end;
+ }
  /* Get an address range which is currently unmapped.
   * For shmat() with addr=0.
   *
@@@ -1432,7 -1757,7 +1771,7 @@@ arch_get_unmapped_area(struct file *fil
  {
        struct mm_struct *mm = current->mm;
        struct vm_area_struct *vma;
-       unsigned long start_addr;
+       struct vm_unmapped_area_info info;
  
        if (len > TASK_SIZE)
                return -ENOMEM;
                    (!vma || addr + len <= vma->vm_start))
                        return addr;
        }
-       if (len > mm->cached_hole_size) {
-               start_addr = addr = mm->free_area_cache;
-       } else {
-               start_addr = addr = TASK_UNMAPPED_BASE;
-               mm->cached_hole_size = 0;
-       }
  
- full_search:
-       for (vma = find_vma(mm, addr); ; vma = vma->vm_next) {
-               /* At this point:  (!vma || addr < vma->vm_end). */
-               if (TASK_SIZE - len < addr) {
-                       /*
-                        * Start a new search - just in case we missed
-                        * some holes.
-                        */
-                       if (start_addr != TASK_UNMAPPED_BASE) {
-                               addr = TASK_UNMAPPED_BASE;
-                               start_addr = addr;
-                               mm->cached_hole_size = 0;
-                               goto full_search;
-                       }
-                       return -ENOMEM;
-               }
-               if (!vma || addr + len <= vma->vm_start) {
-                       /*
-                        * Remember the place where we stopped the search:
-                        */
-                       mm->free_area_cache = addr + len;
-                       return addr;
-               }
-               if (addr + mm->cached_hole_size < vma->vm_start)
-                       mm->cached_hole_size = vma->vm_start - addr;
-               addr = vma->vm_end;
-       }
+       info.flags = 0;
+       info.length = len;
+       info.low_limit = TASK_UNMAPPED_BASE;
+       info.high_limit = TASK_SIZE;
+       info.align_mask = 0;
+       return vm_unmapped_area(&info);
  }
  #endif        
  
@@@ -1505,7 -1803,8 +1817,8 @@@ arch_get_unmapped_area_topdown(struct f
  {
        struct vm_area_struct *vma;
        struct mm_struct *mm = current->mm;
-       unsigned long addr = addr0, start_addr;
+       unsigned long addr = addr0;
+       struct vm_unmapped_area_info info;
  
        /* requested length too big for entire address space */
        if (len > TASK_SIZE)
                        return addr;
        }
  
-       /* check if free_area_cache is useful for us */
-       if (len <= mm->cached_hole_size) {
-               mm->cached_hole_size = 0;
-               mm->free_area_cache = mm->mmap_base;
-       }
- try_again:
-       /* either no address requested or can't fit in requested address hole */
-       start_addr = addr = mm->free_area_cache;
-       if (addr < len)
-               goto fail;
-       addr -= len;
-       do {
-               /*
-                * Lookup failure means no vma is above this address,
-                * else if new region fits below vma->vm_start,
-                * return with success:
-                */
-               vma = find_vma(mm, addr);
-               if (!vma || addr+len <= vma->vm_start)
-                       /* remember the address as a hint for next time */
-                       return (mm->free_area_cache = addr);
-               /* remember the largest hole we saw so far */
-               if (addr + mm->cached_hole_size < vma->vm_start)
-                       mm->cached_hole_size = vma->vm_start - addr;
-               /* try just below the current vma->vm_start */
-               addr = vma->vm_start-len;
-       } while (len < vma->vm_start);
- fail:
-       /*
-        * if hint left us with no space for the requested
-        * mapping then try again:
-        *
-        * Note: this is different with the case of bottomup
-        * which does the fully line-search, but we use find_vma
-        * here that causes some holes skipped.
-        */
-       if (start_addr != mm->mmap_base) {
-               mm->free_area_cache = mm->mmap_base;
-               mm->cached_hole_size = 0;
-               goto try_again;
-       }
+       info.flags = VM_UNMAPPED_AREA_TOPDOWN;
+       info.length = len;
+       info.low_limit = PAGE_SIZE;
+       info.high_limit = mm->mmap_base;
+       info.align_mask = 0;
+       addr = vm_unmapped_area(&info);
  
        /*
         * A failed mmap() very likely causes application failure,
         * can happen with large stack limits and large mmap()
         * allocations.
         */
-       mm->cached_hole_size = ~0UL;
-       mm->free_area_cache = TASK_UNMAPPED_BASE;
-       addr = arch_get_unmapped_area(filp, addr0, len, pgoff, flags);
-       /*
-        * Restore the topdown base:
-        */
-       mm->free_area_cache = mm->mmap_base;
-       mm->cached_hole_size = ~0UL;
+       if (addr & ~PAGE_MASK) {
+               VM_BUG_ON(addr != -ENOMEM);
+               info.flags = 0;
+               info.low_limit = TASK_UNMAPPED_BASE;
+               info.high_limit = TASK_SIZE;
+               addr = vm_unmapped_area(&info);
+       }
  
        return addr;
  }
@@@ -1797,6 -2054,10 +2068,10 @@@ int expand_upwards(struct vm_area_struc
                                anon_vma_interval_tree_pre_update_vma(vma);
                                vma->vm_end = address;
                                anon_vma_interval_tree_post_update_vma(vma);
+                               if (vma->vm_next)
+                                       vma_gap_update(vma->vm_next);
+                               else
+                                       vma->vm_mm->highest_vm_end = address;
                                perf_event_mmap(vma);
                        }
                }
@@@ -1851,6 -2112,7 +2126,7 @@@ int expand_downwards(struct vm_area_str
                                vma->vm_start = address;
                                vma->vm_pgoff -= grow;
                                anon_vma_interval_tree_post_update_vma(vma);
+                               vma_gap_update(vma);
                                perf_event_mmap(vma);
                        }
                }
@@@ -1973,14 -2235,17 +2249,17 @@@ detach_vmas_to_be_unmapped(struct mm_st
        insertion_point = (prev ? &prev->vm_next : &mm->mmap);
        vma->vm_prev = NULL;
        do {
-               rb_erase(&vma->vm_rb, &mm->mm_rb);
+               vma_rb_erase(vma, &mm->mm_rb);
                mm->map_count--;
                tail_vma = vma;
                vma = vma->vm_next;
        } while (vma && vma->vm_start < end);
        *insertion_point = vma;
-       if (vma)
+       if (vma) {
                vma->vm_prev = prev;
+               vma_gap_update(vma);
+       } else
+               mm->highest_vm_end = prev ? prev->vm_end : 0;
        tail_vma->vm_next = NULL;
        if (mm->unmap_area == arch_unmap_area)
                addr = prev ? prev->vm_end : mm->mmap_base;