PM / driver core: disable device's runtime PM during shutdown
[linux-2.6.git] / mm / huge_memory.c
index 81532f2..d819d93 100644 (file)
@@ -989,7 +989,7 @@ struct page *follow_trans_huge_pmd(struct mm_struct *mm,
        page += (addr & ~HPAGE_PMD_MASK) >> PAGE_SHIFT;
        VM_BUG_ON(!PageCompound(page));
        if (flags & FOLL_GET)
-               get_page(page);
+               get_page_foll(page);
 
 out:
        return page;
@@ -1156,6 +1156,7 @@ static void __split_huge_page_refcount(struct page *page)
        unsigned long head_index = page->index;
        struct zone *zone = page_zone(page);
        int zonestat;
+       int tail_count = 0;
 
        /* prevent PageLRU to go away from under us, and freeze lru stats */
        spin_lock_irq(&zone->lru_lock);
@@ -1164,11 +1165,27 @@ static void __split_huge_page_refcount(struct page *page)
        for (i = 1; i < HPAGE_PMD_NR; i++) {
                struct page *page_tail = page + i;
 
-               /* tail_page->_count cannot change */
-               atomic_sub(atomic_read(&page_tail->_count), &page->_count);
-               BUG_ON(page_count(page) <= 0);
-               atomic_add(page_mapcount(page) + 1, &page_tail->_count);
-               BUG_ON(atomic_read(&page_tail->_count) <= 0);
+               /* tail_page->_mapcount cannot change */
+               BUG_ON(page_mapcount(page_tail) < 0);
+               tail_count += page_mapcount(page_tail);
+               /* check for overflow */
+               BUG_ON(tail_count < 0);
+               BUG_ON(atomic_read(&page_tail->_count) != 0);
+               /*
+                * tail_page->_count is zero and not changing from
+                * under us. But get_page_unless_zero() may be running
+                * from under us on the tail_page. If we used
+                * atomic_set() below instead of atomic_add(), we
+                * would then run atomic_set() concurrently with
+                * get_page_unless_zero(), and atomic_set() is
+                * implemented in C not using locked ops. spin_unlock
+                * on x86 sometime uses locked ops because of PPro
+                * errata 66, 92, so unless somebody can guarantee
+                * atomic_set() here would be safe on all archs (and
+                * not only on x86), it's safer to use atomic_add().
+                */
+               atomic_add(page_mapcount(page) + page_mapcount(page_tail) + 1,
+                          &page_tail->_count);
 
                /* after clearing PageTail the gup refcount can be released */
                smp_mb();
@@ -1186,10 +1203,7 @@ static void __split_huge_page_refcount(struct page *page)
                                      (1L << PG_uptodate)));
                page_tail->flags |= (1L << PG_dirty);
 
-               /*
-                * 1) clear PageTail before overwriting first_page
-                * 2) clear PageTail before clearing PageHead for VM_BUG_ON
-                */
+               /* clear PageTail before overwriting first_page */
                smp_wmb();
 
                /*
@@ -1206,7 +1220,6 @@ static void __split_huge_page_refcount(struct page *page)
                 * status is achieved setting a reserved bit in the
                 * pmd, not by clearing the present bit.
                */
-               BUG_ON(page_mapcount(page_tail));
                page_tail->_mapcount = page->_mapcount;
 
                BUG_ON(page_tail->mapping);
@@ -1223,6 +1236,8 @@ static void __split_huge_page_refcount(struct page *page)
 
                lru_add_page_tail(zone, page, page_tail);
        }
+       atomic_sub(tail_count, &page->_count);
+       BUG_ON(atomic_read(&page->_count) <= 0);
 
        __dec_zone_page_state(page, NR_ANON_TRANSPARENT_HUGEPAGES);
        __mod_zone_page_state(zone, NR_ANON_PAGES, HPAGE_PMD_NR);
@@ -1596,14 +1611,13 @@ void __khugepaged_exit(struct mm_struct *mm)
                list_del(&mm_slot->mm_node);
                free = 1;
        }
+       spin_unlock(&khugepaged_mm_lock);
 
        if (free) {
-               spin_unlock(&khugepaged_mm_lock);
                clear_bit(MMF_VM_HUGEPAGE, &mm->flags);
                free_mm_slot(mm_slot);
                mmdrop(mm);
        } else if (mm_slot) {
-               spin_unlock(&khugepaged_mm_lock);
                /*
                 * This is required to serialize against
                 * khugepaged_test_exit() (which is guaranteed to run
@@ -1614,8 +1628,7 @@ void __khugepaged_exit(struct mm_struct *mm)
                 */
                down_write(&mm->mmap_sem);
                up_write(&mm->mmap_sem);
-       } else
-               spin_unlock(&khugepaged_mm_lock);
+       }
 }
 
 static void release_pte_page(struct page *page)