vmscan: fix it to take care of nodemask
KAMEZAWA Hiroyuki [Tue, 31 Mar 2009 22:23:31 +0000 (15:23 -0700)]
try_to_free_pages() is used for the direct reclaim of up to
SWAP_CLUSTER_MAX pages when watermarks are low.  The caller to
alloc_pages_nodemask() can specify a nodemask of nodes that are allowed to
be used but this is not passed to try_to_free_pages().  This can lead to
unnecessary reclaim of pages that are unusable by the caller and int the
worst case lead to allocation failure as progress was not been make where
it is needed.

This patch passes the nodemask used for alloc_pages_nodemask() to
try_to_free_pages().

Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Acked-by: Mel Gorman <mel@csn.ul.ie>
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

fs/buffer.c
include/linux/swap.h
mm/page_alloc.c
mm/vmscan.c

index 0c14f8d..c77b848 100644 (file)
@@ -290,7 +290,7 @@ static void free_more_memory(void)
                                                &zone);
                if (zone)
                        try_to_free_pages(node_zonelist(nid, GFP_NOFS), 0,
-                                               GFP_NOFS);
+                                               GFP_NOFS, NULL);
        }
 }
 
index d302155..b8b0c4c 100644 (file)
@@ -212,7 +212,7 @@ static inline void lru_cache_add_active_file(struct page *page)
 
 /* linux/mm/vmscan.c */
 extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
-                                       gfp_t gfp_mask);
+                                       gfp_t gfp_mask, nodemask_t *mask);
 extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem,
                                                  gfp_t gfp_mask, bool noswap,
                                                  unsigned int swappiness);
index cbd5321..0284e52 100644 (file)
@@ -1582,7 +1582,8 @@ nofail_alloc:
        reclaim_state.reclaimed_slab = 0;
        p->reclaim_state = &reclaim_state;
 
-       did_some_progress = try_to_free_pages(zonelist, order, gfp_mask);
+       did_some_progress = try_to_free_pages(zonelist, order,
+                                               gfp_mask, nodemask);
 
        p->reclaim_state = NULL;
        lockdep_clear_current_reclaim_state();
index f4619c6..06e7269 100644 (file)
@@ -78,6 +78,12 @@ struct scan_control {
        /* Which cgroup do we reclaim from */
        struct mem_cgroup *mem_cgroup;
 
+       /*
+        * Nodemask of nodes allowed by the caller. If NULL, all nodes
+        * are scanned.
+        */
+       nodemask_t      *nodemask;
+
        /* Pluggable isolate pages callback */
        unsigned long (*isolate_pages)(unsigned long nr, struct list_head *dst,
                        unsigned long *scanned, int order, int mode,
@@ -1538,7 +1544,8 @@ static void shrink_zones(int priority, struct zonelist *zonelist,
        struct zone *zone;
 
        sc->all_unreclaimable = 1;
-       for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
+       for_each_zone_zonelist_nodemask(zone, z, zonelist, high_zoneidx,
+                                       sc->nodemask) {
                if (!populated_zone(zone))
                        continue;
                /*
@@ -1683,7 +1690,7 @@ out:
 }
 
 unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
-                                                               gfp_t gfp_mask)
+                               gfp_t gfp_mask, nodemask_t *nodemask)
 {
        struct scan_control sc = {
                .gfp_mask = gfp_mask,
@@ -1694,6 +1701,7 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
                .order = order,
                .mem_cgroup = NULL,
                .isolate_pages = isolate_pages_global,
+               .nodemask = nodemask,
        };
 
        return do_try_to_free_pages(zonelist, &sc);
@@ -1714,6 +1722,7 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont,
                .order = 0,
                .mem_cgroup = mem_cont,
                .isolate_pages = mem_cgroup_isolate_pages,
+               .nodemask = NULL, /* we don't care the placement */
        };
        struct zonelist *zonelist;