mm: filter based on a nodemask as well as a gfp_mask
[linux-2.6.git] / mm / page_alloc.c
index 6d94d04..b4beb3e 100644 (file)
@@ -1377,7 +1377,7 @@ static void zlc_mark_zone_full(struct zonelist *zonelist, struct zoneref *z)
  * a page.
  */
 static struct page *
-get_page_from_freelist(gfp_t gfp_mask, unsigned int order,
+get_page_from_freelist(gfp_t gfp_mask, nodemask_t *nodemask, unsigned int order,
                struct zonelist *zonelist, int high_zoneidx, int alloc_flags)
 {
        struct zoneref *z;
@@ -1388,16 +1388,17 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order,
        int zlc_active = 0;             /* set if using zonelist_cache */
        int did_zlc_setup = 0;          /* just call zlc_setup() one time */
 
-       z = first_zones_zonelist(zonelist, high_zoneidx);
-       classzone_idx = zonelist_zone_idx(z);
-       preferred_zone = zonelist_zone(z);
+       (void)first_zones_zonelist(zonelist, high_zoneidx, nodemask,
+                                                       &preferred_zone);
+       classzone_idx = zone_idx(preferred_zone);
 
 zonelist_scan:
        /*
         * Scan zonelist, looking for a zone with enough free.
         * See also cpuset_zone_allowed() comment in kernel/cpuset.c.
         */
-       for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
+       for_each_zone_zonelist_nodemask(zone, z, zonelist,
+                                               high_zoneidx, nodemask) {
                if (NUMA_BUILD && zlc_active &&
                        !zlc_zone_worth_trying(zonelist, z, allowednodes))
                                continue;
@@ -1447,9 +1448,9 @@ try_next_zone:
 /*
  * This is the 'heart' of the zoned buddy allocator.
  */
-struct page *
-__alloc_pages(gfp_t gfp_mask, unsigned int order,
-               struct zonelist *zonelist)
+static struct page *
+__alloc_pages_internal(gfp_t gfp_mask, unsigned int order,
+                       struct zonelist *zonelist, nodemask_t *nodemask)
 {
        const gfp_t wait = gfp_mask & __GFP_WAIT;
        enum zone_type high_zoneidx = gfp_zone(gfp_mask);
@@ -1478,7 +1479,7 @@ restart:
                return NULL;
        }
 
-       page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, order,
+       page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, nodemask, order,
                        zonelist, high_zoneidx, ALLOC_WMARK_LOW|ALLOC_CPUSET);
        if (page)
                goto got_pg;
@@ -1523,7 +1524,7 @@ restart:
         * Ignore cpuset if GFP_ATOMIC (!wait) rather than fail alloc.
         * See also cpuset_zone_allowed() comment in kernel/cpuset.c.
         */
-       page = get_page_from_freelist(gfp_mask, order, zonelist,
+       page = get_page_from_freelist(gfp_mask, nodemask, order, zonelist,
                                                high_zoneidx, alloc_flags);
        if (page)
                goto got_pg;
@@ -1536,7 +1537,7 @@ rebalance:
                if (!(gfp_mask & __GFP_NOMEMALLOC)) {
 nofail_alloc:
                        /* go through the zonelist yet again, ignoring mins */
-                       page = get_page_from_freelist(gfp_mask, order,
+                       page = get_page_from_freelist(gfp_mask, nodemask, order,
                                zonelist, high_zoneidx, ALLOC_NO_WATERMARKS);
                        if (page)
                                goto got_pg;
@@ -1571,7 +1572,7 @@ nofail_alloc:
                drain_all_pages();
 
        if (likely(did_some_progress)) {
-               page = get_page_from_freelist(gfp_mask, order,
+               page = get_page_from_freelist(gfp_mask, nodemask, order,
                                        zonelist, high_zoneidx, alloc_flags);
                if (page)
                        goto got_pg;
@@ -1587,8 +1588,9 @@ nofail_alloc:
                 * a parallel oom killing, we must fail if we're still
                 * under heavy pressure.
                 */
-               page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, order,
-                       zonelist, high_zoneidx, ALLOC_WMARK_HIGH|ALLOC_CPUSET);
+               page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, nodemask,
+                       order, zonelist, high_zoneidx,
+                       ALLOC_WMARK_HIGH|ALLOC_CPUSET);
                if (page) {
                        clear_zonelist_oom(zonelist, gfp_mask);
                        goto got_pg;
@@ -1637,6 +1639,20 @@ got_pg:
        return page;
 }
 
+struct page *
+__alloc_pages(gfp_t gfp_mask, unsigned int order,
+               struct zonelist *zonelist)
+{
+       return __alloc_pages_internal(gfp_mask, order, zonelist, NULL);
+}
+
+struct page *
+__alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
+               struct zonelist *zonelist, nodemask_t *nodemask)
+{
+       return __alloc_pages_internal(gfp_mask, order, zonelist, nodemask);
+}
+
 EXPORT_SYMBOL(__alloc_pages);
 
 /*
@@ -1880,6 +1896,12 @@ void show_free_areas(void)
        show_swap_cache_info();
 }
 
+static void zoneref_set_zone(struct zone *zone, struct zoneref *zoneref)
+{
+       zoneref->zone = zone;
+       zoneref->zone_idx = zone_idx(zone);
+}
+
 /*
  * Builds allocation fallback zone lists.
  *