Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/penberg...

[linux-2.6.git] / mm / huge_memory.c
diff --git a/mm/huge_memory.c b/mm/huge_memory.c

index b6facc35e8932d8b612e0d78ac48056f4c2aa815..e2d1587be269bf475a5ce2c05e6d10d9648a2121 100644 (file)
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -15,6 +15,8 @@
  #include <linux/mm_inline.h>
  #include <linux/kthread.h>
  #include <linux/khugepaged.h>
+#include <linux/freezer.h>
+#include <linux/mman.h>
  #include <asm/tlb.h>
  #include <asm/pgalloc.h>
  #include "internal.h"
@@ -242,24 +244,28 @@ static ssize_t single_flag_show(struct kobject *kobj,
                                 struct kobj_attribute *attr, char *buf,
                                 enum transparent_hugepage_flag flag)
  {
-       if (test_bit(flag, &transparent_hugepage_flags))
-               return sprintf(buf, "[yes] no\n");
-       else
-               return sprintf(buf, "yes [no]\n");
+       return sprintf(buf, "%d\n",
+                      !!test_bit(flag, &transparent_hugepage_flags));
  }
+
  static ssize_t single_flag_store(struct kobject *kobj,
                                  struct kobj_attribute *attr,
                                  const char *buf, size_t count,
                                  enum transparent_hugepage_flag flag)
  {
-       if (!memcmp("yes", buf,
-                   min(sizeof("yes")-1, count))) {
+       unsigned long value;
+       int ret;
+
+       ret = kstrtoul(buf, 10, &value);
+       if (ret < 0)
+               return ret;
+       if (value > 1)
+               return -EINVAL;
+
+       if (value)
                 set_bit(flag, &transparent_hugepage_flags);
-       } else if (!memcmp("no", buf,
-                          min(sizeof("no")-1, count))) {
+       else
                 clear_bit(flag, &transparent_hugepage_flags);
-       } else
-               return -EINVAL;
  
         return count;
  }
@@ -487,7 +493,15 @@ static int __init hugepage_init(void)
         int err;
  #ifdef CONFIG_SYSFS
         static struct kobject *hugepage_kobj;
+#endif
+
+       err = -EINVAL;
+       if (!has_transparent_hugepage()) {
+               transparent_hugepage_flags = 0;
+               goto out;
+       }
  
+#ifdef CONFIG_SYSFS
         err = -ENOMEM;
         hugepage_kobj = kobject_create_and_add("transparent_hugepage", mm_kobj);
         if (unlikely(!hugepage_kobj)) {
@@ -518,6 +532,14 @@ static int __init hugepage_init(void)
                 goto out;
         }
  
+       /*
+        * By default disable transparent hugepages on smaller systems,
+        * where the extra memory used could hurt more than TLB overhead
+        * is likely to save.  The admin can still enable it through /sys.
+        */
+       if (totalram_pages < (512 << (20 - PAGE_SHIFT)))
+               transparent_hugepage_flags = 0;
+
         start_khugepaged();
  
         set_recommended_min_free_kbytes();
@@ -625,23 +647,24 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm,
         return ret;
  }
  
-static inline gfp_t alloc_hugepage_gfpmask(int defrag)
+static inline gfp_t alloc_hugepage_gfpmask(int defrag, gfp_t extra_gfp)
  {
-       return GFP_TRANSHUGE & ~(defrag ? 0 : __GFP_WAIT);
+       return (GFP_TRANSHUGE & ~(defrag ? 0 : __GFP_WAIT)) | extra_gfp;
  }
  
  static inline struct page *alloc_hugepage_vma(int defrag,
                                               struct vm_area_struct *vma,
-                                             unsigned long haddr)
+                                             unsigned long haddr, int nd,
+                                             gfp_t extra_gfp)
  {
-       return alloc_pages_vma(alloc_hugepage_gfpmask(defrag),
-                              HPAGE_PMD_ORDER, vma, haddr);
+       return alloc_pages_vma(alloc_hugepage_gfpmask(defrag, extra_gfp),
+                              HPAGE_PMD_ORDER, vma, haddr, nd);
  }
  
  #ifndef CONFIG_NUMA
  static inline struct page *alloc_hugepage(int defrag)
  {
-       return alloc_pages(alloc_hugepage_gfpmask(defrag),
+       return alloc_pages(alloc_hugepage_gfpmask(defrag, 0),
                            HPAGE_PMD_ORDER);
  }
  #endif
@@ -660,9 +683,12 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
                 if (unlikely(khugepaged_enter(vma)))
                         return VM_FAULT_OOM;
                 page = alloc_hugepage_vma(transparent_hugepage_defrag(vma),
-                                         vma, haddr);
-               if (unlikely(!page))
+                                         vma, haddr, numa_node_id(), 0);
+               if (unlikely(!page)) {
+                       count_vm_event(THP_FAULT_FALLBACK);
                         goto out;
+               }
+               count_vm_event(THP_FAULT_ALLOC);
                 if (unlikely(mem_cgroup_newpage_charge(page, mm, GFP_KERNEL))) {
                         put_page(page);
                         goto out;
@@ -781,8 +807,9 @@ static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm,
         }
  
         for (i = 0; i < HPAGE_PMD_NR; i++) {
-               pages[i] = alloc_page_vma(GFP_HIGHUSER_MOVABLE,
-                                         vma, address);
+               pages[i] = alloc_page_vma_node(GFP_HIGHUSER_MOVABLE |
+                                              __GFP_OTHER_NODE,
+                                              vma, address, page_to_nid(page));
                 if (unlikely(!pages[i] ||
                              mem_cgroup_newpage_charge(pages[i], mm,
                                                        GFP_KERNEL))) {
@@ -884,16 +911,18 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
         if (transparent_hugepage_enabled(vma) &&
             !transparent_hugepage_debug_cow())
                 new_page = alloc_hugepage_vma(transparent_hugepage_defrag(vma),
-                                             vma, haddr);
+                                             vma, haddr, numa_node_id(), 0);
         else
                 new_page = NULL;
  
         if (unlikely(!new_page)) {
+               count_vm_event(THP_FAULT_FALLBACK);
                 ret = do_huge_pmd_wp_page_fallback(mm, vma, address,
                                                    pmd, orig_pmd, page, haddr);
                 put_page(page);
                 goto out;
         }
+       count_vm_event(THP_FAULT_ALLOC);
  
         if (unlikely(mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL))) {
                 put_page(new_page);
@@ -1110,7 +1139,7 @@ static int __split_huge_page_splitting(struct page *page,
                  * We can't temporarily set the pmd to null in order
                  * to split it, the pmd must remain marked huge at all
                  * times or the VM won't take the pmd_trans_huge paths
-                * and it won't wait on the anon_vma->root->lock to
+                * and it won't wait on the anon_vma->root->mutex to
                  * serialize against split_huge_page*.
                  */
                 pmdp_splitting_flush_notify(vma, address, pmd);
@@ -1126,6 +1155,7 @@ static void __split_huge_page_refcount(struct page *page)
         int i;
         unsigned long head_index = page->index;
         struct zone *zone = page_zone(page);
+       int zonestat;
  
         /* prevent PageLRU to go away from under us, and freeze lru stats */
         spin_lock_irq(&zone->lru_lock);
@@ -1143,7 +1173,12 @@ static void __split_huge_page_refcount(struct page *page)
                 /* after clearing PageTail the gup refcount can be released */
                 smp_mb();
  
-               page_tail->flags &= ~PAGE_FLAGS_CHECK_AT_PREP;
+               /*
+                * retain hwpoison flag of the poisoned tail page:
+                *   fix for the unsuitable process killed on Guest Machine(KVM)
+                *   by the memory-failure.
+                */
+               page_tail->flags &= ~PAGE_FLAGS_CHECK_AT_PREP | __PG_HWPOISON;
                 page_tail->flags |= (page->flags &
                                      ((1L << PG_referenced) |
                                       (1L << PG_swapbacked) |
@@ -1184,12 +1219,23 @@ static void __split_huge_page_refcount(struct page *page)
                 BUG_ON(!PageDirty(page_tail));
                 BUG_ON(!PageSwapBacked(page_tail));
  
+               mem_cgroup_split_huge_fixup(page, page_tail);
+
                 lru_add_page_tail(zone, page, page_tail);
         }
  
         __dec_zone_page_state(page, NR_ANON_TRANSPARENT_HUGEPAGES);
         __mod_zone_page_state(zone, NR_ANON_PAGES, HPAGE_PMD_NR);
  
+       /*
+        * A hugepage counts for HPAGE_PMD_NR pages on the LRU statistics,
+        * so adjust those appropriately if this page is on the LRU.
+        */
+       if (PageLRU(page)) {
+               zonestat = NR_LRU_BASE + page_lru(page);
+               __mod_zone_page_state(zone, zonestat, -(HPAGE_PMD_NR-1));
+       }
+
         ClearPageCompound(page);
         compound_unlock(page);
         spin_unlock_irq(&zone->lru_lock);
@@ -1287,7 +1333,7 @@ static int __split_huge_page_map(struct page *page,
         return ret;
  }
  
-/* must be called with anon_vma->root->lock hold */
+/* must be called with anon_vma->root->mutex hold */
  static void __split_huge_page(struct page *page,
                               struct anon_vma *anon_vma)
  {
@@ -1353,6 +1399,7 @@ int split_huge_page(struct page *page)
  
         BUG_ON(!PageSwapBacked(page));
         __split_huge_page(page, anon_vma);
+       count_vm_event(THP_SPLIT);
  
         BUG_ON(PageCompound(page));
  out_unlock:
@@ -1361,18 +1408,44 @@ out:
         return ret;
  }
  
-int hugepage_madvise(unsigned long *vm_flags)
-{
-       /*
-        * Be somewhat over-protective like KSM for now!
-        */
-       if (*vm_flags & (VM_HUGEPAGE | VM_SHARED  | VM_MAYSHARE   |
-                        VM_PFNMAP   | VM_IO      | VM_DONTEXPAND |
-                        VM_RESERVED | VM_HUGETLB | VM_INSERTPAGE |
-                        VM_MIXEDMAP | VM_SAO))
-               return -EINVAL;
+#define VM_NO_THP (VM_SPECIAL|VM_INSERTPAGE|VM_MIXEDMAP|VM_SAO| \
+                  VM_HUGETLB|VM_SHARED|VM_MAYSHARE)
  
-       *vm_flags |= VM_HUGEPAGE;
+int hugepage_madvise(struct vm_area_struct *vma,
+                    unsigned long *vm_flags, int advice)
+{
+       switch (advice) {
+       case MADV_HUGEPAGE:
+               /*
+                * Be somewhat over-protective like KSM for now!
+                */
+               if (*vm_flags & (VM_HUGEPAGE | VM_NO_THP))
+                       return -EINVAL;
+               *vm_flags &= ~VM_NOHUGEPAGE;
+               *vm_flags |= VM_HUGEPAGE;
+               /*
+                * If the vma become good for khugepaged to scan,
+                * register it here without waiting a page fault that
+                * may not happen any time soon.
+                */
+               if (unlikely(khugepaged_enter_vma_merge(vma)))
+                       return -ENOMEM;
+               break;
+       case MADV_NOHUGEPAGE:
+               /*
+                * Be somewhat over-protective like KSM for now!
+                */
+               if (*vm_flags & (VM_NOHUGEPAGE | VM_NO_THP))
+                       return -EINVAL;
+               *vm_flags &= ~VM_HUGEPAGE;
+               *vm_flags |= VM_NOHUGEPAGE;
+               /*
+                * Setting VM_NOHUGEPAGE will prevent khugepaged from scanning
+                * this vma even if we leave the mm registered in khugepaged if
+                * it got registered before VM_NOHUGEPAGE was set.
+                */
+               break;
+       }
  
         return 0;
  }
@@ -1496,10 +1569,14 @@ int khugepaged_enter_vma_merge(struct vm_area_struct *vma)
                  * page fault if needed.
                  */
                 return 0;
-       if (vma->vm_file || vma->vm_ops)
+       if (vma->vm_ops)
                 /* khugepaged not yet working on file or special mappings */
                 return 0;
-       VM_BUG_ON(is_linear_pfn_mapping(vma) || is_pfn_mapping(vma));
+       /*
+        * If is_pfn_mapping() is true is_learn_pfn_mapping() must be
+        * true too, verify it here.
+        */
+       VM_BUG_ON(is_linear_pfn_mapping(vma) || vma->vm_flags & VM_NO_THP);
         hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK;
         hend = vma->vm_end & HPAGE_PMD_MASK;
         if (hstart < hend)
@@ -1519,14 +1596,13 @@ void __khugepaged_exit(struct mm_struct *mm)
                 list_del(&mm_slot->mm_node);
                 free = 1;
         }
+       spin_unlock(&khugepaged_mm_lock);
  
         if (free) {
-               spin_unlock(&khugepaged_mm_lock);
                 clear_bit(MMF_VM_HUGEPAGE, &mm->flags);
                 free_mm_slot(mm_slot);
                 mmdrop(mm);
         } else if (mm_slot) {
-               spin_unlock(&khugepaged_mm_lock);
                 /*
                  * This is required to serialize against
                  * khugepaged_test_exit() (which is guaranteed to run
@@ -1537,8 +1613,7 @@ void __khugepaged_exit(struct mm_struct *mm)
                  */
                 down_write(&mm->mmap_sem);
                 up_write(&mm->mmap_sem);
-       } else
-               spin_unlock(&khugepaged_mm_lock);
+       }
  }
  
  static void release_pte_page(struct page *page)
@@ -1624,7 +1699,8 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
                 VM_BUG_ON(PageLRU(page));
  
                 /* If there is no mapped pte young don't collapse the page */
-               if (pte_young(pteval))
+               if (pte_young(pteval) || PageReferenced(page) ||
+                   mmu_notifier_test_young(vma->vm_mm, address))
                         referenced = 1;
         }
         if (unlikely(!referenced))
@@ -1678,7 +1754,8 @@ static void __collapse_huge_page_copy(pte_t *pte, struct page *page,
  static void collapse_huge_page(struct mm_struct *mm,
                                unsigned long address,
                                struct page **hpage,
-                              struct vm_area_struct *vma)
+                              struct vm_area_struct *vma,
+                              int node)
  {
         pgd_t *pgd;
         pud_t *pud;
@@ -1692,6 +1769,7 @@ static void collapse_huge_page(struct mm_struct *mm,
  
         VM_BUG_ON(address & ~HPAGE_PMD_MASK);
  #ifndef CONFIG_NUMA
+       up_read(&mm->mmap_sem);
         VM_BUG_ON(!*hpage);
         new_page = *hpage;
  #else
@@ -1706,22 +1784,29 @@ static void collapse_huge_page(struct mm_struct *mm,
          * mmap_sem in read mode is good idea also to allow greater
          * scalability.
          */
-       new_page = alloc_hugepage_vma(khugepaged_defrag(), vma, address);
+       new_page = alloc_hugepage_vma(khugepaged_defrag(), vma, address,
+                                     node, __GFP_OTHER_NODE);
+
+       /*
+        * After allocating the hugepage, release the mmap_sem read lock in
+        * preparation for taking it in write mode.
+        */
+       up_read(&mm->mmap_sem);
         if (unlikely(!new_page)) {
-               up_read(&mm->mmap_sem);
+               count_vm_event(THP_COLLAPSE_ALLOC_FAILED);
                 *hpage = ERR_PTR(-ENOMEM);
                 return;
         }
  #endif
+
+       count_vm_event(THP_COLLAPSE_ALLOC);
         if (unlikely(mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL))) {
-               up_read(&mm->mmap_sem);
+#ifdef CONFIG_NUMA
                 put_page(new_page);
+#endif
                 return;
         }
  
-       /* after allocating the hugepage upgrade to mmap_sem write mode */
-       up_read(&mm->mmap_sem);
-
         /*
          * Prevent all access to pagetables with the exception of
          * gup_fast later hanlded by the ptep_clear_flush and the VM
@@ -1737,13 +1822,19 @@ static void collapse_huge_page(struct mm_struct *mm,
         if (address < hstart || address + HPAGE_PMD_SIZE > hend)
                 goto out;
  
-       if (!(vma->vm_flags & VM_HUGEPAGE) && !khugepaged_always())
+       if ((!(vma->vm_flags & VM_HUGEPAGE) && !khugepaged_always()) ||
+           (vma->vm_flags & VM_NOHUGEPAGE))
                 goto out;
  
-       /* VM_PFNMAP vmas may have vm_ops null but vm_file set */
-       if (!vma->anon_vma || vma->vm_ops || vma->vm_file)
+       if (!vma->anon_vma || vma->vm_ops)
                 goto out;
-       VM_BUG_ON(is_linear_pfn_mapping(vma) || is_pfn_mapping(vma));
+       if (is_vma_temporary_stack(vma))
+               goto out;
+       /*
+        * If is_pfn_mapping() is true is_learn_pfn_mapping() must be
+        * true too, verify it here.
+        */
+       VM_BUG_ON(is_linear_pfn_mapping(vma) || vma->vm_flags & VM_NO_THP);
  
         pgd = pgd_offset(mm, address);
         if (!pgd_present(*pgd))
@@ -1776,15 +1867,14 @@ static void collapse_huge_page(struct mm_struct *mm,
         spin_lock(ptl);
         isolated = __collapse_huge_page_isolate(vma, address, pte);
         spin_unlock(ptl);
-       pte_unmap(pte);
  
         if (unlikely(!isolated)) {
+               pte_unmap(pte);
                 spin_lock(&mm->page_table_lock);
                 BUG_ON(!pmd_none(*pmd));
                 set_pmd_at(mm, address, pmd, _pmd);
                 spin_unlock(&mm->page_table_lock);
                 anon_vma_unlock(vma->anon_vma);
-               mem_cgroup_uncharge_page(new_page);
                 goto out;
         }
  
@@ -1795,6 +1885,7 @@ static void collapse_huge_page(struct mm_struct *mm,
         anon_vma_unlock(vma->anon_vma);
  
         __collapse_huge_page_copy(pte, new_page, vma, address, ptl);
+       pte_unmap(pte);
         __SetPageUptodate(new_page);
         pgtable = pmd_pgtable(_pmd);
         VM_BUG_ON(page_count(pgtable) != 1);
@@ -1829,6 +1920,7 @@ out_up_write:
         return;
  
  out:
+       mem_cgroup_uncharge_page(new_page);
  #ifdef CONFIG_NUMA
         put_page(new_page);
  #endif
@@ -1848,6 +1940,7 @@ static int khugepaged_scan_pmd(struct mm_struct *mm,
         struct page *page;
         unsigned long _address;
         spinlock_t *ptl;
+       int node = -1;
  
         VM_BUG_ON(address & ~HPAGE_PMD_MASK);
  
@@ -1878,13 +1971,21 @@ static int khugepaged_scan_pmd(struct mm_struct *mm,
                 page = vm_normal_page(vma, _address, pteval);
                 if (unlikely(!page))
                         goto out_unmap;
+               /*
+                * Chose the node of the first page. This could
+                * be more sophisticated and look at more pages,
+                * but isn't for now.
+                */
+               if (node == -1)
+                       node = page_to_nid(page);
                 VM_BUG_ON(PageCompound(page));
                 if (!PageLRU(page) || PageLocked(page) || !PageAnon(page))
                         goto out_unmap;
                 /* cannot use mapcount: can't collapse if there's a gup pin */
                 if (page_count(page) != 1)
                         goto out_unmap;
-               if (pte_young(pteval))
+               if (pte_young(pteval) || PageReferenced(page) ||
+                   mmu_notifier_test_young(vma->vm_mm, address))
                         referenced = 1;
         }
         if (referenced)
@@ -1893,7 +1994,7 @@ out_unmap:
         pte_unmap_unlock(pte, ptl);
         if (ret)
                 /* collapse_huge_page will return with the mmap_sem released */
-               collapse_huge_page(mm, address, hpage, vma);
+               collapse_huge_page(mm, address, hpage, vma, node);
  out:
         return ret;
  }
@@ -1959,34 +2060,33 @@ static unsigned int khugepaged_scan_mm_slot(unsigned int pages,
                         break;
                 }
  
-               if (!(vma->vm_flags & VM_HUGEPAGE) &&
-                   !khugepaged_always()) {
+               if ((!(vma->vm_flags & VM_HUGEPAGE) &&
+                    !khugepaged_always()) ||
+                   (vma->vm_flags & VM_NOHUGEPAGE)) {
+               skip:
                         progress++;
                         continue;
                 }
-
-               /* VM_PFNMAP vmas may have vm_ops null but vm_file set */
-               if (!vma->anon_vma || vma->vm_ops || vma->vm_file) {
-                       khugepaged_scan.address = vma->vm_end;
-                       progress++;
-                       continue;
-               }
-               VM_BUG_ON(is_linear_pfn_mapping(vma) || is_pfn_mapping(vma));
+               if (!vma->anon_vma || vma->vm_ops)
+                       goto skip;
+               if (is_vma_temporary_stack(vma))
+                       goto skip;
+               /*
+                * If is_pfn_mapping() is true is_learn_pfn_mapping()
+                * must be true too, verify it here.
+                */
+               VM_BUG_ON(is_linear_pfn_mapping(vma) ||
+                         vma->vm_flags & VM_NO_THP);
  
                 hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK;
                 hend = vma->vm_end & HPAGE_PMD_MASK;
-               if (hstart >= hend) {
-                       progress++;
-                       continue;
-               }
+               if (hstart >= hend)
+                       goto skip;
+               if (khugepaged_scan.address > hend)
+                       goto skip;
                 if (khugepaged_scan.address < hstart)
                         khugepaged_scan.address = hstart;
-               if (khugepaged_scan.address > hend) {
-                       khugepaged_scan.address = hend + HPAGE_PMD_SIZE;
-                       progress++;
-                       continue;
-               }
-               BUG_ON(khugepaged_scan.address & ~HPAGE_PMD_MASK);
+               VM_BUG_ON(khugepaged_scan.address & ~HPAGE_PMD_MASK);
  
                 while (khugepaged_scan.address < hend) {
                         int ret;
@@ -2015,7 +2115,7 @@ breakouterloop:
  breakouterloop_mmap_sem:
  
         spin_lock(&khugepaged_mm_lock);
-       BUG_ON(khugepaged_scan.mm_slot != mm_slot);
+       VM_BUG_ON(khugepaged_scan.mm_slot != mm_slot);
         /*
          * Release the current mm_slot if this mm is about to die, or
          * if we scanned all vmas of this mm.
@@ -2067,14 +2167,20 @@ static void khugepaged_do_scan(struct page **hpage)
  #ifndef CONFIG_NUMA
                 if (!*hpage) {
                         *hpage = alloc_hugepage(khugepaged_defrag());
-                       if (unlikely(!*hpage))
+                       if (unlikely(!*hpage)) {
+                               count_vm_event(THP_COLLAPSE_ALLOC_FAILED);
                                 break;
+                       }
+                       count_vm_event(THP_COLLAPSE_ALLOC);
                 }
  #else
                 if (IS_ERR(*hpage))
                         break;
  #endif
  
+               if (unlikely(kthread_should_stop() || freezing(current)))
+                       break;
+
                 spin_lock(&khugepaged_mm_lock);
                 if (!khugepaged_scan.mm_slot)
                         pass_through_head++;
@@ -2105,8 +2211,11 @@ static struct page *khugepaged_alloc_hugepage(void)
  
         do {
                 hpage = alloc_hugepage(khugepaged_defrag());
-               if (!hpage)
+               if (!hpage) {
+                       count_vm_event(THP_COLLAPSE_ALLOC_FAILED);
                         khugepaged_alloc_sleep();
+               } else
+                       count_vm_event(THP_COLLAPSE_ALLOC);
         } while (unlikely(!hpage) &&
                  likely(khugepaged_enabled()));
         return hpage;
@@ -2137,6 +2246,9 @@ static void khugepaged_loop(void)
                 if (hpage)
                         put_page(hpage);
  #endif
+               try_to_freeze();
+               if (unlikely(kthread_should_stop()))
+                       break;
                 if (khugepaged_has_work()) {
                         DEFINE_WAIT(wait);
                         if (!khugepaged_scan_sleep_millisecs)
@@ -2147,8 +2259,8 @@ static void khugepaged_loop(void)
                                         khugepaged_scan_sleep_millisecs));
                         remove_wait_queue(&khugepaged_wait, &wait);
                 } else if (khugepaged_enabled())
-                       wait_event_interruptible(khugepaged_wait,
-                                                khugepaged_wait_event());
+                       wait_event_freezable(khugepaged_wait,
+                                            khugepaged_wait_event());
         }
  }
  
@@ -2156,6 +2268,7 @@ static int khugepaged(void *none)
  {
         struct mm_slot *mm_slot;
  
+       set_freezable();
         set_user_nice(current, 19);
  
         /* serialize with start_khugepaged() */
@@ -2163,13 +2276,15 @@ static int khugepaged(void *none)
  
         for (;;) {
                 mutex_unlock(&khugepaged_mutex);
-               BUG_ON(khugepaged_thread != current);
+               VM_BUG_ON(khugepaged_thread != current);
                 khugepaged_loop();
-               BUG_ON(khugepaged_thread != current);
+               VM_BUG_ON(khugepaged_thread != current);
  
                 mutex_lock(&khugepaged_mutex);
                 if (!khugepaged_enabled())
                         break;
+               if (unlikely(kthread_should_stop()))
+                       break;
         }
  
         spin_lock(&khugepaged_mm_lock);