mm: change __remove_from_page_cache()
[linux-2.6.git] / mm / vmscan.c
index dafb9d9..665b090 100644 (file)
@@ -514,7 +514,7 @@ static int __remove_mapping(struct address_space *mapping, struct page *page)
 
                freepage = mapping->a_ops->freepage;
 
-               __remove_from_page_cache(page);
+               __delete_from_page_cache(page);
                spin_unlock_irq(&mapping->tree_lock);
                mem_cgroup_uncharge_cache_page(page);
 
@@ -1044,7 +1044,7 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
                case 0:
                        list_move(&page->lru, dst);
                        mem_cgroup_del_lru(page);
-                       nr_taken++;
+                       nr_taken += hpage_nr_pages(page);
                        break;
 
                case -EBUSY:
@@ -1102,7 +1102,7 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
                        if (__isolate_lru_page(cursor_page, mode, file) == 0) {
                                list_move(&cursor_page->lru, dst);
                                mem_cgroup_del_lru(cursor_page);
-                               nr_taken++;
+                               nr_taken += hpage_nr_pages(page);
                                nr_lumpy_taken++;
                                if (PageDirty(cursor_page))
                                        nr_lumpy_dirty++;
@@ -1157,14 +1157,15 @@ static unsigned long clear_active_flags(struct list_head *page_list,
        struct page *page;
 
        list_for_each_entry(page, page_list, lru) {
+               int numpages = hpage_nr_pages(page);
                lru = page_lru_base_type(page);
                if (PageActive(page)) {
                        lru += LRU_ACTIVE;
                        ClearPageActive(page);
-                       nr_active++;
+                       nr_active += numpages;
                }
                if (count)
-                       count[lru]++;
+                       count[lru] += numpages;
        }
 
        return nr_active;
@@ -1274,7 +1275,8 @@ putback_lru_pages(struct zone *zone, struct scan_control *sc,
                add_page_to_lru_list(zone, page, lru);
                if (is_active_lru(lru)) {
                        int file = is_file_lru(lru);
-                       reclaim_stat->recent_rotated[file]++;
+                       int numpages = hpage_nr_pages(page);
+                       reclaim_stat->recent_rotated[file] += numpages;
                }
                if (!pagevec_add(&pvec, page)) {
                        spin_unlock_irq(&zone->lru_lock);
@@ -1482,7 +1484,7 @@ static void move_active_pages_to_lru(struct zone *zone,
 
                list_move(&page->lru, &zone->lru[lru].list);
                mem_cgroup_add_lru_list(page, lru);
-               pgmoved++;
+               pgmoved += hpage_nr_pages(page);
 
                if (!pagevec_add(&pvec, page) || list_empty(list)) {
                        spin_unlock_irq(&zone->lru_lock);
@@ -1550,7 +1552,7 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
                }
 
                if (page_referenced(page, 0, sc->mem_cgroup, &vm_flags)) {
-                       nr_rotated++;
+                       nr_rotated += hpage_nr_pages(page);
                        /*
                         * Identify referenced, file-backed active pages and
                         * give them one more trip around the active list. So
@@ -1839,16 +1841,28 @@ static inline bool should_continue_reclaim(struct zone *zone,
        if (!(sc->reclaim_mode & RECLAIM_MODE_COMPACTION))
                return false;
 
-       /*
-        * If we failed to reclaim and have scanned the full list, stop.
-        * NOTE: Checking just nr_reclaimed would exit reclaim/compaction far
-        *       faster but obviously would be less likely to succeed
-        *       allocation. If this is desirable, use GFP_REPEAT to decide
-        *       if both reclaimed and scanned should be checked or just
-        *       reclaimed
-        */
-       if (!nr_reclaimed && !nr_scanned)
-               return false;
+       /* Consider stopping depending on scan and reclaim activity */
+       if (sc->gfp_mask & __GFP_REPEAT) {
+               /*
+                * For __GFP_REPEAT allocations, stop reclaiming if the
+                * full LRU list has been scanned and we are still failing
+                * to reclaim pages. This full LRU scan is potentially
+                * expensive but a __GFP_REPEAT caller really wants to succeed
+                */
+               if (!nr_reclaimed && !nr_scanned)
+                       return false;
+       } else {
+               /*
+                * For non-__GFP_REPEAT allocations which can presumably
+                * fail without consequence, stop if we failed to reclaim
+                * any pages from the last SWAP_CLUSTER_MAX number of
+                * pages that were scanned. This will return to the
+                * caller faster at the risk reclaim/compaction and
+                * the resulting allocation attempt fails
+                */
+               if (!nr_reclaimed)
+                       return false;
+       }
 
        /*
         * If we have not reclaimed enough pages for compaction and the
@@ -1880,12 +1894,12 @@ static void shrink_zone(int priority, struct zone *zone,
        unsigned long nr[NR_LRU_LISTS];
        unsigned long nr_to_scan;
        enum lru_list l;
-       unsigned long nr_reclaimed;
+       unsigned long nr_reclaimed, nr_scanned;
        unsigned long nr_to_reclaim = sc->nr_to_reclaim;
-       unsigned long nr_scanned = sc->nr_scanned;
 
 restart:
        nr_reclaimed = 0;
+       nr_scanned = sc->nr_scanned;
        get_scan_count(zone, sc, nr, priority);
 
        while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] ||
@@ -2081,7 +2095,8 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
                        struct zone *preferred_zone;
 
                        first_zones_zonelist(zonelist, gfp_zone(sc->gfp_mask),
-                                                       NULL, &preferred_zone);
+                                               &cpuset_current_mems_allowed,
+                                               &preferred_zone);
                        wait_iff_congested(preferred_zone, BLK_RW_ASYNC, HZ/10);
                }
        }
@@ -2227,7 +2242,8 @@ static bool pgdat_balanced(pg_data_t *pgdat, unsigned long balanced_pages,
 }
 
 /* is kswapd sleeping prematurely? */
-static bool sleeping_prematurely(pg_data_t *pgdat, int order, long remaining)
+static bool sleeping_prematurely(pg_data_t *pgdat, int order, long remaining,
+                                       int classzone_idx)
 {
        int i;
        unsigned long balanced = 0;
@@ -2235,7 +2251,7 @@ static bool sleeping_prematurely(pg_data_t *pgdat, int order, long remaining)
 
        /* If a direct reclaimer woke kswapd within HZ/10, it's premature */
        if (remaining)
-               return 1;
+               return true;
 
        /* Check the watermark levels */
        for (i = 0; i < pgdat->nr_zones; i++) {
@@ -2244,11 +2260,19 @@ static bool sleeping_prematurely(pg_data_t *pgdat, int order, long remaining)
                if (!populated_zone(zone))
                        continue;
 
-               if (zone->all_unreclaimable)
+               /*
+                * balance_pgdat() skips over all_unreclaimable after
+                * DEF_PRIORITY. Effectively, it considers them balanced so
+                * they must be considered balanced here as well if kswapd
+                * is to sleep
+                */
+               if (zone->all_unreclaimable) {
+                       balanced += zone->present_pages;
                        continue;
+               }
 
                if (!zone_watermark_ok_safe(zone, order, high_wmark_pages(zone),
-                                                               0, 0))
+                                                       classzone_idx, 0))
                        all_zones_ok = false;
                else
                        balanced += zone->present_pages;
@@ -2260,7 +2284,7 @@ static bool sleeping_prematurely(pg_data_t *pgdat, int order, long remaining)
         * must be balanced
         */
        if (order)
-               return pgdat_balanced(pgdat, balanced, 0);
+               return pgdat_balanced(pgdat, balanced, classzone_idx);
        else
                return !all_zones_ok;
 }
@@ -2287,7 +2311,7 @@ static bool sleeping_prematurely(pg_data_t *pgdat, int order, long remaining)
  * of pages is balanced across the zones.
  */
 static unsigned long balance_pgdat(pg_data_t *pgdat, int order,
-                                                       int classzone_idx)
+                                                       int *classzone_idx)
 {
        int all_zones_ok;
        unsigned long balanced;
@@ -2350,6 +2374,7 @@ loop_again:
                        if (!zone_watermark_ok_safe(zone, order,
                                        high_wmark_pages(zone), 0, 0)) {
                                end_zone = i;
+                               *classzone_idx = i;
                                break;
                        }
                }
@@ -2401,9 +2426,11 @@ loop_again:
                                                lru_pages);
                        sc.nr_reclaimed += reclaim_state->reclaimed_slab;
                        total_scanned += sc.nr_scanned;
+
                        if (zone->all_unreclaimable)
                                continue;
-                       if (nr_slab == 0 && !zone_reclaimable(zone))
+                       if (nr_slab == 0 &&
+                           !zone_reclaimable(zone))
                                zone->all_unreclaimable = 1;
                        /*
                         * If we've done a decent amount of scanning and
@@ -2414,15 +2441,6 @@ loop_again:
                            total_scanned > sc.nr_reclaimed + sc.nr_reclaimed / 2)
                                sc.may_writepage = 1;
 
-                       /*
-                        * Compact the zone for higher orders to reduce
-                        * latencies for higher-order allocations that
-                        * would ordinarily call try_to_compact_pages()
-                        */
-                       if (sc.order > PAGE_ALLOC_COSTLY_ORDER)
-                               compact_zone_order(zone, sc.order, sc.gfp_mask,
-                                                       false);
-
                        if (!zone_watermark_ok_safe(zone, order,
                                        high_wmark_pages(zone), end_zone, 0)) {
                                all_zones_ok = 0;
@@ -2443,12 +2461,12 @@ loop_again:
                                 * spectulatively avoid congestion waits
                                 */
                                zone_clear_flag(zone, ZONE_CONGESTED);
-                               if (i <= classzone_idx)
+                               if (i <= *classzone_idx)
                                        balanced += zone->present_pages;
                        }
 
                }
-               if (all_zones_ok || (order && pgdat_balanced(pgdat, balanced, classzone_idx)))
+               if (all_zones_ok || (order && pgdat_balanced(pgdat, balanced, *classzone_idx)))
                        break;          /* kswapd: all done */
                /*
                 * OK, kswapd is getting into trouble.  Take a nap, then take
@@ -2477,7 +2495,7 @@ out:
         * high-order: Balanced zones must make up at least 25% of the node
         *             for the node to be balanced
         */
-       if (!(all_zones_ok || (order && pgdat_balanced(pgdat, balanced, classzone_idx)))) {
+       if (!(all_zones_ok || (order && pgdat_balanced(pgdat, balanced, *classzone_idx)))) {
                cond_resched();
 
                try_to_freeze();
@@ -2538,10 +2556,11 @@ out:
         * if another caller entered the allocator slow path while kswapd
         * was awake, order will remain at the higher level
         */
+       *classzone_idx = end_zone;
        return order;
 }
 
-static void kswapd_try_to_sleep(pg_data_t *pgdat, int order)
+static void kswapd_try_to_sleep(pg_data_t *pgdat, int order, int classzone_idx)
 {
        long remaining = 0;
        DEFINE_WAIT(wait);
@@ -2552,7 +2571,7 @@ static void kswapd_try_to_sleep(pg_data_t *pgdat, int order)
        prepare_to_wait(&pgdat->kswapd_wait, &wait, TASK_INTERRUPTIBLE);
 
        /* Try to sleep for a short interval */
-       if (!sleeping_prematurely(pgdat, order, remaining)) {
+       if (!sleeping_prematurely(pgdat, order, remaining, classzone_idx)) {
                remaining = schedule_timeout(HZ/10);
                finish_wait(&pgdat->kswapd_wait, &wait);
                prepare_to_wait(&pgdat->kswapd_wait, &wait, TASK_INTERRUPTIBLE);
@@ -2562,7 +2581,7 @@ static void kswapd_try_to_sleep(pg_data_t *pgdat, int order)
         * After a short sleep, check if it was a premature sleep. If not, then
         * go fully to sleep until explicitly woken up.
         */
-       if (!sleeping_prematurely(pgdat, order, remaining)) {
+       if (!sleeping_prematurely(pgdat, order, remaining, classzone_idx)) {
                trace_mm_vmscan_kswapd_sleep(pgdat->node_id);
 
                /*
@@ -2650,7 +2669,7 @@ static int kswapd(void *p)
                        order = new_order;
                        classzone_idx = new_classzone_idx;
                } else {
-                       kswapd_try_to_sleep(pgdat, order);
+                       kswapd_try_to_sleep(pgdat, order, classzone_idx);
                        order = pgdat->kswapd_max_order;
                        classzone_idx = pgdat->classzone_idx;
                        pgdat->kswapd_max_order = 0;
@@ -2667,7 +2686,7 @@ static int kswapd(void *p)
                 */
                if (!ret) {
                        trace_mm_vmscan_kswapd_wake(pgdat->node_id, order);
-                       order = balance_pgdat(pgdat, order, classzone_idx);
+                       order = balance_pgdat(pgdat, order, &classzone_idx);
                }
        }
        return 0;