regmap: mmio: convert some error returns to BUG()
[linux-2.6.git] / mm / vmstat.c
index 312d728..f600557 100644 (file)
@@ -78,12 +78,36 @@ void vm_events_fold_cpu(int cpu)
  *
  * vm_stat contains the global counters
  */
-atomic_long_t vm_stat[NR_VM_ZONE_STAT_ITEMS];
+atomic_long_t vm_stat[NR_VM_ZONE_STAT_ITEMS] __cacheline_aligned_in_smp;
 EXPORT_SYMBOL(vm_stat);
 
 #ifdef CONFIG_SMP
 
-static int calculate_threshold(struct zone *zone)
+int calculate_pressure_threshold(struct zone *zone)
+{
+       int threshold;
+       int watermark_distance;
+
+       /*
+        * As vmstats are not up to date, there is drift between the estimated
+        * and real values. For high thresholds and a high number of CPUs, it
+        * is possible for the min watermark to be breached while the estimated
+        * value looks fine. The pressure threshold is a reduced value such
+        * that even the maximum amount of drift will not accidentally breach
+        * the min watermark
+        */
+       watermark_distance = low_wmark_pages(zone) - min_wmark_pages(zone);
+       threshold = max(1, (int)(watermark_distance / num_online_cpus()));
+
+       /*
+        * Maximum threshold is 125
+        */
+       threshold = min(125, threshold);
+
+       return threshold;
+}
+
+int calculate_normal_threshold(struct zone *zone)
 {
        int threshold;
        int mem;        /* memory in 128 MB units */
@@ -133,7 +157,7 @@ static int calculate_threshold(struct zone *zone)
 /*
  * Refresh the thresholds for each zone.
  */
-static void refresh_zone_stat_thresholds(void)
+void refresh_zone_stat_thresholds(void)
 {
        struct zone *zone;
        int cpu;
@@ -142,7 +166,7 @@ static void refresh_zone_stat_thresholds(void)
        for_each_populated_zone(zone) {
                unsigned long max_drift, tolerate_drift;
 
-               threshold = calculate_threshold(zone);
+               threshold = calculate_normal_threshold(zone);
 
                for_each_online_cpu(cpu)
                        per_cpu_ptr(zone->pageset, cpu)->stat_threshold
@@ -161,6 +185,26 @@ static void refresh_zone_stat_thresholds(void)
        }
 }
 
+void set_pgdat_percpu_threshold(pg_data_t *pgdat,
+                               int (*calculate_pressure)(struct zone *))
+{
+       struct zone *zone;
+       int cpu;
+       int threshold;
+       int i;
+
+       for (i = 0; i < pgdat->nr_zones; i++) {
+               zone = &pgdat->node_zones[i];
+               if (!zone->percpu_drift_mark)
+                       continue;
+
+               threshold = (*calculate_pressure)(zone);
+               for_each_possible_cpu(cpu)
+                       per_cpu_ptr(zone->pageset, cpu)->stat_threshold
+                                                       = threshold;
+       }
+}
+
 /*
  * For use when we know that interrupts are disabled.
  */
@@ -251,7 +295,7 @@ void __dec_zone_page_state(struct page *page, enum zone_stat_item item)
 }
 EXPORT_SYMBOL(__dec_zone_page_state);
 
-#ifdef CONFIG_CMPXCHG_LOCAL
+#ifdef CONFIG_HAVE_CMPXCHG_LOCAL
 /*
  * If we have cmpxchg_local support then we do not need to incur the overhead
  * that comes with local_irq_save/restore if we use this_cpu_cmpxchg.
@@ -277,9 +321,12 @@ static inline void mod_state(struct zone *zone,
                /*
                 * The fetching of the stat_threshold is racy. We may apply
                 * a counter threshold to the wrong the cpu if we get
-                * rescheduled while executing here. However, the following
-                * will apply the threshold again and therefore bring the
-                * counter under the threshold.
+                * rescheduled while executing here. However, the next
+                * counter update will apply the threshold again and
+                * therefore bring the counter under the threshold again.
+                *
+                * Most of the time the thresholds are the same anyways
+                * for all cpus in a zone.
                 */
                t = this_cpu_read(pcp->stat_threshold);
 
@@ -456,8 +503,12 @@ void refresh_cpu_vm_stats(int cpu)
  * z       = the zone from which the allocation occurred.
  *
  * Must be called with interrupts disabled.
+ *
+ * When __GFP_OTHER_NODE is set assume the node of the preferred
+ * zone is the local node. This is useful for daemons who allocate
+ * memory on behalf of other processes.
  */
-void zone_statistics(struct zone *preferred_zone, struct zone *z)
+void zone_statistics(struct zone *preferred_zone, struct zone *z, gfp_t flags)
 {
        if (z->zone_pgdat == preferred_zone->zone_pgdat) {
                __inc_zone_state(z, NUMA_HIT);
@@ -465,7 +516,8 @@ void zone_statistics(struct zone *preferred_zone, struct zone *z)
                __inc_zone_state(z, NUMA_MISS);
                __inc_zone_state(preferred_zone, NUMA_FOREIGN);
        }
-       if (z->node == numa_node_id())
+       if (z->node == ((flags & __GFP_OTHER_NODE) ?
+                       preferred_zone->node : numa_node_id()))
                __inc_zone_state(z, NUMA_LOCAL);
        else
                __inc_zone_state(z, NUMA_OTHER);
@@ -607,6 +659,139 @@ static void walk_zones_in_node(struct seq_file *m, pg_data_t *pgdat,
 }
 #endif
 
+#if defined(CONFIG_PROC_FS) || defined(CONFIG_SYSFS) || defined(CONFIG_NUMA)
+#ifdef CONFIG_ZONE_DMA
+#define TEXT_FOR_DMA(xx) xx "_dma",
+#else
+#define TEXT_FOR_DMA(xx)
+#endif
+
+#ifdef CONFIG_ZONE_DMA32
+#define TEXT_FOR_DMA32(xx) xx "_dma32",
+#else
+#define TEXT_FOR_DMA32(xx)
+#endif
+
+#ifdef CONFIG_HIGHMEM
+#define TEXT_FOR_HIGHMEM(xx) xx "_high",
+#else
+#define TEXT_FOR_HIGHMEM(xx)
+#endif
+
+#define TEXTS_FOR_ZONES(xx) TEXT_FOR_DMA(xx) TEXT_FOR_DMA32(xx) xx "_normal", \
+                                       TEXT_FOR_HIGHMEM(xx) xx "_movable",
+
+const char * const vmstat_text[] = {
+       /* Zoned VM counters */
+       "nr_free_pages",
+       "nr_inactive_anon",
+       "nr_active_anon",
+       "nr_inactive_file",
+       "nr_active_file",
+       "nr_unevictable",
+       "nr_mlock",
+       "nr_anon_pages",
+       "nr_mapped",
+       "nr_file_pages",
+       "nr_dirty",
+       "nr_writeback",
+       "nr_slab_reclaimable",
+       "nr_slab_unreclaimable",
+       "nr_page_table_pages",
+       "nr_kernel_stack",
+       "nr_unstable",
+       "nr_bounce",
+       "nr_vmscan_write",
+       "nr_vmscan_immediate_reclaim",
+       "nr_writeback_temp",
+       "nr_isolated_anon",
+       "nr_isolated_file",
+       "nr_shmem",
+       "nr_dirtied",
+       "nr_written",
+
+#ifdef CONFIG_NUMA
+       "numa_hit",
+       "numa_miss",
+       "numa_foreign",
+       "numa_interleave",
+       "numa_local",
+       "numa_other",
+#endif
+       "nr_anon_transparent_hugepages",
+       "nr_dirty_threshold",
+       "nr_dirty_background_threshold",
+
+#ifdef CONFIG_VM_EVENT_COUNTERS
+       "pgpgin",
+       "pgpgout",
+       "pswpin",
+       "pswpout",
+
+       TEXTS_FOR_ZONES("pgalloc")
+
+       "pgfree",
+       "pgactivate",
+       "pgdeactivate",
+
+       "pgfault",
+       "pgmajfault",
+
+       TEXTS_FOR_ZONES("pgrefill")
+       TEXTS_FOR_ZONES("pgsteal")
+       TEXTS_FOR_ZONES("pgscan_kswapd")
+       TEXTS_FOR_ZONES("pgscan_direct")
+
+#ifdef CONFIG_NUMA
+       "zone_reclaim_failed",
+#endif
+       "pginodesteal",
+       "slabs_scanned",
+       "kswapd_steal",
+       "kswapd_inodesteal",
+       "kswapd_low_wmark_hit_quickly",
+       "kswapd_high_wmark_hit_quickly",
+       "kswapd_skip_congestion_wait",
+       "pageoutrun",
+       "allocstall",
+
+       "pgrotated",
+
+#ifdef CONFIG_COMPACTION
+       "compact_blocks_moved",
+       "compact_pages_moved",
+       "compact_pagemigrate_failed",
+       "compact_stall",
+       "compact_fail",
+       "compact_success",
+#endif
+
+#ifdef CONFIG_HUGETLB_PAGE
+       "htlb_buddy_alloc_success",
+       "htlb_buddy_alloc_fail",
+#endif
+       "unevictable_pgs_culled",
+       "unevictable_pgs_scanned",
+       "unevictable_pgs_rescued",
+       "unevictable_pgs_mlocked",
+       "unevictable_pgs_munlocked",
+       "unevictable_pgs_cleared",
+       "unevictable_pgs_stranded",
+       "unevictable_pgs_mlockfreed",
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+       "thp_fault_alloc",
+       "thp_fault_fallback",
+       "thp_collapse_alloc",
+       "thp_collapse_alloc_failed",
+       "thp_split",
+#endif
+
+#endif /* CONFIG_VM_EVENTS_COUNTERS */
+};
+#endif /* CONFIG_PROC_FS || CONFIG_SYSFS || CONFIG_NUMA */
+
+
 #ifdef CONFIG_PROC_FS
 static void frag_show_print(struct seq_file *m, pg_data_t *pgdat,
                                                struct zone *zone)
@@ -779,125 +964,6 @@ static const struct file_operations pagetypeinfo_file_ops = {
        .release        = seq_release,
 };
 
-#ifdef CONFIG_ZONE_DMA
-#define TEXT_FOR_DMA(xx) xx "_dma",
-#else
-#define TEXT_FOR_DMA(xx)
-#endif
-
-#ifdef CONFIG_ZONE_DMA32
-#define TEXT_FOR_DMA32(xx) xx "_dma32",
-#else
-#define TEXT_FOR_DMA32(xx)
-#endif
-
-#ifdef CONFIG_HIGHMEM
-#define TEXT_FOR_HIGHMEM(xx) xx "_high",
-#else
-#define TEXT_FOR_HIGHMEM(xx)
-#endif
-
-#define TEXTS_FOR_ZONES(xx) TEXT_FOR_DMA(xx) TEXT_FOR_DMA32(xx) xx "_normal", \
-                                       TEXT_FOR_HIGHMEM(xx) xx "_movable",
-
-static const char * const vmstat_text[] = {
-       /* Zoned VM counters */
-       "nr_free_pages",
-       "nr_inactive_anon",
-       "nr_active_anon",
-       "nr_inactive_file",
-       "nr_active_file",
-       "nr_unevictable",
-       "nr_mlock",
-       "nr_anon_pages",
-       "nr_mapped",
-       "nr_file_pages",
-       "nr_dirty",
-       "nr_writeback",
-       "nr_slab_reclaimable",
-       "nr_slab_unreclaimable",
-       "nr_page_table_pages",
-       "nr_kernel_stack",
-       "nr_unstable",
-       "nr_bounce",
-       "nr_vmscan_write",
-       "nr_writeback_temp",
-       "nr_isolated_anon",
-       "nr_isolated_file",
-       "nr_shmem",
-       "nr_dirtied",
-       "nr_written",
-
-#ifdef CONFIG_NUMA
-       "numa_hit",
-       "numa_miss",
-       "numa_foreign",
-       "numa_interleave",
-       "numa_local",
-       "numa_other",
-#endif
-       "nr_dirty_threshold",
-       "nr_dirty_background_threshold",
-
-#ifdef CONFIG_VM_EVENT_COUNTERS
-       "pgpgin",
-       "pgpgout",
-       "pswpin",
-       "pswpout",
-
-       TEXTS_FOR_ZONES("pgalloc")
-
-       "pgfree",
-       "pgactivate",
-       "pgdeactivate",
-
-       "pgfault",
-       "pgmajfault",
-
-       TEXTS_FOR_ZONES("pgrefill")
-       TEXTS_FOR_ZONES("pgsteal")
-       TEXTS_FOR_ZONES("pgscan_kswapd")
-       TEXTS_FOR_ZONES("pgscan_direct")
-
-#ifdef CONFIG_NUMA
-       "zone_reclaim_failed",
-#endif
-       "pginodesteal",
-       "slabs_scanned",
-       "kswapd_steal",
-       "kswapd_inodesteal",
-       "kswapd_low_wmark_hit_quickly",
-       "kswapd_high_wmark_hit_quickly",
-       "kswapd_skip_congestion_wait",
-       "pageoutrun",
-       "allocstall",
-
-       "pgrotated",
-
-#ifdef CONFIG_COMPACTION
-       "compact_blocks_moved",
-       "compact_pages_moved",
-       "compact_pagemigrate_failed",
-       "compact_stall",
-       "compact_fail",
-       "compact_success",
-#endif
-
-#ifdef CONFIG_HUGETLB_PAGE
-       "htlb_buddy_alloc_success",
-       "htlb_buddy_alloc_fail",
-#endif
-       "unevictable_pgs_culled",
-       "unevictable_pgs_scanned",
-       "unevictable_pgs_rescued",
-       "unevictable_pgs_mlocked",
-       "unevictable_pgs_munlocked",
-       "unevictable_pgs_cleared",
-       "unevictable_pgs_stranded",
-       "unevictable_pgs_mlockfreed",
-#endif
-};
-
 static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
                                                        struct zone *zone)
 {
@@ -911,7 +977,7 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
                   "\n        scanned  %lu"
                   "\n        spanned  %lu"
                   "\n        present  %lu",
-                  zone_nr_free_pages(zone),
+                  zone_page_state(zone, NR_FREE_PAGES),
                   min_wmark_pages(zone),
                   low_wmark_pages(zone),
                   high_wmark_pages(zone),
@@ -1136,7 +1202,6 @@ static int __init setup_vmstat(void)
 #ifdef CONFIG_SMP
        int cpu;
 
-       refresh_zone_stat_thresholds();
        register_cpu_notifier(&vmstat_notifier);
 
        for_each_online_cpu(cpu)