mm: vmstat: use a single setter function and callback for adjusting percpu thresholds
Mel Gorman [Thu, 13 Jan 2011 23:45:43 +0000 (15:45 -0800)]
reduce_pgdat_percpu_threshold() and restore_pgdat_percpu_threshold() exist
to adjust the per-cpu vmstat thresholds while kswapd is awake to avoid
errors due to counter drift.  The functions duplicate some code so this
patch replaces them with a single set_pgdat_percpu_threshold() that takes
a callback function to calculate the desired threshold as a parameter.

[akpm@linux-foundation.org: readability tweak]
[kosaki.motohiro@jp.fujitsu.com: set_pgdat_percpu_threshold(): don't use for_each_online_cpu]
Signed-off-by: Mel Gorman <mel@csn.ul.ie>
Reviewed-by: Christoph Lameter <cl@linux.com>
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

include/linux/vmstat.h
mm/vmscan.c
mm/vmstat.c

index e4cc21c..833e676 100644 (file)
@@ -254,8 +254,11 @@ extern void dec_zone_state(struct zone *, enum zone_stat_item);
 extern void __dec_zone_state(struct zone *, enum zone_stat_item);
 
 void refresh_cpu_vm_stats(int);
-void reduce_pgdat_percpu_threshold(pg_data_t *pgdat);
-void restore_pgdat_percpu_threshold(pg_data_t *pgdat);
+
+int calculate_pressure_threshold(struct zone *zone);
+int calculate_normal_threshold(struct zone *zone);
+void set_pgdat_percpu_threshold(pg_data_t *pgdat,
+                               int (*calculate_pressure)(struct zone *));
 #else /* CONFIG_SMP */
 
 /*
@@ -300,8 +303,7 @@ static inline void __dec_zone_page_state(struct page *page,
 #define dec_zone_page_state __dec_zone_page_state
 #define mod_zone_page_state __mod_zone_page_state
 
-static inline void reduce_pgdat_percpu_threshold(pg_data_t *pgdat) { }
-static inline void restore_pgdat_percpu_threshold(pg_data_t *pgdat) { }
+#define set_pgdat_percpu_threshold(pgdat, callback) { }
 
 static inline void refresh_cpu_vm_stats(int cpu) { }
 #endif
index 5da4295..86f8c34 100644 (file)
@@ -2448,9 +2448,24 @@ static int kswapd(void *p)
                                 */
                                if (!sleeping_prematurely(pgdat, order, remaining)) {
                                        trace_mm_vmscan_kswapd_sleep(pgdat->node_id);
-                                       restore_pgdat_percpu_threshold(pgdat);
+
+                                       /*
+                                        * vmstat counters are not perfectly
+                                        * accurate and the estimated value
+                                        * for counters such as NR_FREE_PAGES
+                                        * can deviate from the true value by
+                                        * nr_online_cpus * threshold. To
+                                        * avoid the zone watermarks being
+                                        * breached while under pressure, we
+                                        * reduce the per-cpu vmstat threshold
+                                        * while kswapd is awake and restore
+                                        * them before going back to sleep.
+                                        */
+                                       set_pgdat_percpu_threshold(pgdat,
+                                               calculate_normal_threshold);
                                        schedule();
-                                       reduce_pgdat_percpu_threshold(pgdat);
+                                       set_pgdat_percpu_threshold(pgdat,
+                                               calculate_pressure_threshold);
                                } else {
                                        if (remaining)
                                                count_vm_event(KSWAPD_LOW_WMARK_HIT_QUICKLY);
index bc0f095..751a65e 100644 (file)
@@ -83,7 +83,7 @@ EXPORT_SYMBOL(vm_stat);
 
 #ifdef CONFIG_SMP
 
-static int calculate_pressure_threshold(struct zone *zone)
+int calculate_pressure_threshold(struct zone *zone)
 {
        int threshold;
        int watermark_distance;
@@ -107,7 +107,7 @@ static int calculate_pressure_threshold(struct zone *zone)
        return threshold;
 }
 
-static int calculate_threshold(struct zone *zone)
+int calculate_normal_threshold(struct zone *zone)
 {
        int threshold;
        int mem;        /* memory in 128 MB units */
@@ -166,7 +166,7 @@ static void refresh_zone_stat_thresholds(void)
        for_each_populated_zone(zone) {
                unsigned long max_drift, tolerate_drift;
 
-               threshold = calculate_threshold(zone);
+               threshold = calculate_normal_threshold(zone);
 
                for_each_online_cpu(cpu)
                        per_cpu_ptr(zone->pageset, cpu)->stat_threshold
@@ -185,46 +185,24 @@ static void refresh_zone_stat_thresholds(void)
        }
 }
 
-void reduce_pgdat_percpu_threshold(pg_data_t *pgdat)
+void set_pgdat_percpu_threshold(pg_data_t *pgdat,
+                               int (*calculate_pressure)(struct zone *))
 {
        struct zone *zone;
        int cpu;
        int threshold;
        int i;
 
-       get_online_cpus();
-       for (i = 0; i < pgdat->nr_zones; i++) {
-               zone = &pgdat->node_zones[i];
-               if (!zone->percpu_drift_mark)
-                       continue;
-
-               threshold = calculate_pressure_threshold(zone);
-               for_each_online_cpu(cpu)
-                       per_cpu_ptr(zone->pageset, cpu)->stat_threshold
-                                                       = threshold;
-       }
-       put_online_cpus();
-}
-
-void restore_pgdat_percpu_threshold(pg_data_t *pgdat)
-{
-       struct zone *zone;
-       int cpu;
-       int threshold;
-       int i;
-
-       get_online_cpus();
        for (i = 0; i < pgdat->nr_zones; i++) {
                zone = &pgdat->node_zones[i];
                if (!zone->percpu_drift_mark)
                        continue;
 
-               threshold = calculate_threshold(zone);
-               for_each_online_cpu(cpu)
+               threshold = (*calculate_pressure)(zone);
+               for_each_possible_cpu(cpu)
                        per_cpu_ptr(zone->pageset, cpu)->stat_threshold
                                                        = threshold;
        }
-       put_online_cpus();
 }
 
 /*