config: tegra3: enable /dev mount with ACL
[linux-2.6.git] / mm / page_alloc.c
index 9119faa..8859578 100644 (file)
@@ -127,6 +127,20 @@ void pm_restrict_gfp_mask(void)
        saved_gfp_mask = gfp_allowed_mask;
        gfp_allowed_mask &= ~GFP_IOFS;
 }
+
+static bool pm_suspending(void)
+{
+       if ((gfp_allowed_mask & GFP_IOFS) == GFP_IOFS)
+               return false;
+       return true;
+}
+
+#else
+
+static bool pm_suspending(void)
+{
+       return false;
+}
 #endif /* CONFIG_PM_SLEEP */
 
 #ifdef CONFIG_HUGETLB_PAGE_SIZE_VARIABLE
@@ -176,6 +190,7 @@ static char * const zone_names[MAX_NR_ZONES] = {
 };
 
 int min_free_kbytes = 1024;
+int min_free_order_shift = 1;
 
 static unsigned long __meminitdata nr_kernel_pages;
 static unsigned long __meminitdata nr_all_pages;
@@ -355,8 +370,8 @@ void prep_compound_page(struct page *page, unsigned long order)
        __SetPageHead(page);
        for (i = 1; i < nr_pages; i++) {
                struct page *p = page + i;
-
                __SetPageTail(p);
+               set_page_count(p, 0);
                p->first_page = page;
        }
 }
@@ -1370,21 +1385,12 @@ failed:
 
 #ifdef CONFIG_FAIL_PAGE_ALLOC
 
-static struct fail_page_alloc_attr {
+static struct {
        struct fault_attr attr;
 
        u32 ignore_gfp_highmem;
        u32 ignore_gfp_wait;
        u32 min_order;
-
-#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
-
-       struct dentry *ignore_gfp_highmem_file;
-       struct dentry *ignore_gfp_wait_file;
-       struct dentry *min_order_file;
-
-#endif /* CONFIG_FAULT_INJECTION_DEBUG_FS */
-
 } fail_page_alloc = {
        .attr = FAULT_ATTR_INITIALIZER,
        .ignore_gfp_wait = 1,
@@ -1418,36 +1424,27 @@ static int __init fail_page_alloc_debugfs(void)
 {
        mode_t mode = S_IFREG | S_IRUSR | S_IWUSR;
        struct dentry *dir;
-       int err;
-
-       err = init_fault_attr_dentries(&fail_page_alloc.attr,
-                                      "fail_page_alloc");
-       if (err)
-               return err;
-       dir = fail_page_alloc.attr.dentries.dir;
-
-       fail_page_alloc.ignore_gfp_wait_file =
-               debugfs_create_bool("ignore-gfp-wait", mode, dir,
-                                     &fail_page_alloc.ignore_gfp_wait);
-
-       fail_page_alloc.ignore_gfp_highmem_file =
-               debugfs_create_bool("ignore-gfp-highmem", mode, dir,
-                                     &fail_page_alloc.ignore_gfp_highmem);
-       fail_page_alloc.min_order_file =
-               debugfs_create_u32("min-order", mode, dir,
-                                  &fail_page_alloc.min_order);
-
-       if (!fail_page_alloc.ignore_gfp_wait_file ||
-            !fail_page_alloc.ignore_gfp_highmem_file ||
-            !fail_page_alloc.min_order_file) {
-               err = -ENOMEM;
-               debugfs_remove(fail_page_alloc.ignore_gfp_wait_file);
-               debugfs_remove(fail_page_alloc.ignore_gfp_highmem_file);
-               debugfs_remove(fail_page_alloc.min_order_file);
-               cleanup_fault_attr_dentries(&fail_page_alloc.attr);
-       }
 
-       return err;
+       dir = fault_create_debugfs_attr("fail_page_alloc", NULL,
+                                       &fail_page_alloc.attr);
+       if (IS_ERR(dir))
+               return PTR_ERR(dir);
+
+       if (!debugfs_create_bool("ignore-gfp-wait", mode, dir,
+                               &fail_page_alloc.ignore_gfp_wait))
+               goto fail;
+       if (!debugfs_create_bool("ignore-gfp-highmem", mode, dir,
+                               &fail_page_alloc.ignore_gfp_highmem))
+               goto fail;
+       if (!debugfs_create_u32("min-order", mode, dir,
+                               &fail_page_alloc.min_order))
+               goto fail;
+
+       return 0;
+fail:
+       debugfs_remove_recursive(dir);
+
+       return -ENOMEM;
 }
 
 late_initcall(fail_page_alloc_debugfs);
@@ -1487,7 +1484,7 @@ static bool __zone_watermark_ok(struct zone *z, int order, unsigned long mark,
                free_pages -= z->free_area[o].nr_free << o;
 
                /* Require fewer higher order pages to be free */
-               min >>= 1;
+               min >>= min_free_order_shift;
 
                if (free_pages <= min)
                        return false;
@@ -1616,6 +1613,21 @@ static void zlc_mark_zone_full(struct zonelist *zonelist, struct zoneref *z)
        set_bit(i, zlc->fullzones);
 }
 
+/*
+ * clear all zones full, called after direct reclaim makes progress so that
+ * a zone that was recently full is not skipped over for up to a second
+ */
+static void zlc_clear_zones_full(struct zonelist *zonelist)
+{
+       struct zonelist_cache *zlc;     /* cached zonelist speedup info */
+
+       zlc = zonelist->zlcache_ptr;
+       if (!zlc)
+               return;
+
+       bitmap_zero(zlc->fullzones, MAX_ZONES_PER_ZONELIST);
+}
+
 #else  /* CONFIG_NUMA */
 
 static nodemask_t *zlc_setup(struct zonelist *zonelist, int alloc_flags)
@@ -1632,6 +1644,10 @@ static int zlc_zone_worth_trying(struct zonelist *zonelist, struct zoneref *z,
 static void zlc_mark_zone_full(struct zonelist *zonelist, struct zoneref *z)
 {
 }
+
+static void zlc_clear_zones_full(struct zonelist *zonelist)
+{
+}
 #endif /* CONFIG_NUMA */
 
 /*
@@ -1664,7 +1680,7 @@ zonelist_scan:
                                continue;
                if ((alloc_flags & ALLOC_CPUSET) &&
                        !cpuset_zone_allowed_softwall(zone, gfp_mask))
-                               goto try_next_zone;
+                               continue;
 
                BUILD_BUG_ON(ALLOC_NO_WATERMARKS < NR_WMARK);
                if (!(alloc_flags & ALLOC_NO_WATERMARKS)) {
@@ -1676,17 +1692,36 @@ zonelist_scan:
                                    classzone_idx, alloc_flags))
                                goto try_this_zone;
 
+                       if (NUMA_BUILD && !did_zlc_setup && nr_online_nodes > 1) {
+                               /*
+                                * we do zlc_setup if there are multiple nodes
+                                * and before considering the first zone allowed
+                                * by the cpuset.
+                                */
+                               allowednodes = zlc_setup(zonelist, alloc_flags);
+                               zlc_active = 1;
+                               did_zlc_setup = 1;
+                       }
+
                        if (zone_reclaim_mode == 0)
                                goto this_zone_full;
 
+                       /*
+                        * As we may have just activated ZLC, check if the first
+                        * eligible zone has failed zone_reclaim recently.
+                        */
+                       if (NUMA_BUILD && zlc_active &&
+                               !zlc_zone_worth_trying(zonelist, z, allowednodes))
+                               continue;
+
                        ret = zone_reclaim(zone, gfp_mask, order);
                        switch (ret) {
                        case ZONE_RECLAIM_NOSCAN:
                                /* did not scan */
-                               goto try_next_zone;
+                               continue;
                        case ZONE_RECLAIM_FULL:
                                /* scanned but unreclaimable */
-                               goto this_zone_full;
+                               continue;
                        default:
                                /* did we reclaim enough */
                                if (!zone_watermark_ok(zone, order, mark,
@@ -1703,16 +1738,6 @@ try_this_zone:
 this_zone_full:
                if (NUMA_BUILD)
                        zlc_mark_zone_full(zonelist, z);
-try_next_zone:
-               if (NUMA_BUILD && !did_zlc_setup && nr_online_nodes > 1) {
-                       /*
-                        * we do zlc_setup after the first zone is tried but only
-                        * if there are multiple nodes make it worthwhile
-                        */
-                       allowednodes = zlc_setup(zonelist, alloc_flags);
-                       zlc_active = 1;
-                       did_zlc_setup = 1;
-               }
        }
 
        if (unlikely(NUMA_BUILD && page == NULL && zlc_active)) {
@@ -1954,6 +1979,10 @@ __alloc_pages_direct_reclaim(gfp_t gfp_mask, unsigned int order,
        if (unlikely(!(*did_some_progress)))
                return NULL;
 
+       /* After successful reclaim, reconsider all zones for allocation */
+       if (NUMA_BUILD)
+               zlc_clear_zones_full(zonelist);
+
 retry:
        page = get_page_from_freelist(gfp_mask, nodemask, order,
                                        zonelist, high_zoneidx,
@@ -2193,6 +2222,14 @@ rebalance:
 
                        goto restart;
                }
+
+               /*
+                * Suspend converts GFP_KERNEL to __GFP_WAIT which can
+                * prevent reclaim making forward progress without
+                * invoking OOM. Bail if we are suspending
+                */
+               if (pm_suspending())
+                       goto nopage;
        }
 
        /* Check if we should retry the allocation */
@@ -3356,9 +3393,15 @@ static void setup_zone_migrate_reserve(struct zone *zone)
        unsigned long block_migratetype;
        int reserve;
 
-       /* Get the start pfn, end pfn and the number of blocks to reserve */
+       /*
+        * Get the start pfn, end pfn and the number of blocks to reserve
+        * We have to be careful to be aligned to pageblock_nr_pages to
+        * make sure that we always check pfn_valid for the first page in
+        * the block.
+        */
        start_pfn = zone->zone_start_pfn;
        end_pfn = start_pfn + zone->spanned_pages;
+       start_pfn = roundup(start_pfn, pageblock_nr_pages);
        reserve = roundup(min_wmark_pages(zone), pageblock_nr_pages) >>
                                                        pageblock_order;