Merge branch 'for-2.6.38' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/percpu
[linux-2.6.git] / mm / vmstat.c
index 33c33e7..312d728 100644 (file)
@@ -167,36 +167,24 @@ static void refresh_zone_stat_thresholds(void)
 void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
                                int delta)
 {
-       struct per_cpu_pageset *pcp = this_cpu_ptr(zone->pageset);
-
-       s8 *p = pcp->vm_stat_diff + item;
+       struct per_cpu_pageset __percpu *pcp = zone->pageset;
+       s8 __percpu *p = pcp->vm_stat_diff + item;
        long x;
+       long t;
+
+       x = delta + __this_cpu_read(*p);
 
-       x = delta + *p;
+       t = __this_cpu_read(pcp->stat_threshold);
 
-       if (unlikely(x > pcp->stat_threshold || x < -pcp->stat_threshold)) {
+       if (unlikely(x > t || x < -t)) {
                zone_page_state_add(x, zone, item);
                x = 0;
        }
-       *p = x;
+       __this_cpu_write(*p, x);
 }
 EXPORT_SYMBOL(__mod_zone_page_state);
 
 /*
- * For an unknown interrupt state
- */
-void mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
-                                       int delta)
-{
-       unsigned long flags;
-
-       local_irq_save(flags);
-       __mod_zone_page_state(zone, item, delta);
-       local_irq_restore(flags);
-}
-EXPORT_SYMBOL(mod_zone_page_state);
-
-/*
  * Optimized increment and decrement functions.
  *
  * These are only for a single page and therefore can take a struct page *
@@ -221,16 +209,17 @@ EXPORT_SYMBOL(mod_zone_page_state);
  */
 void __inc_zone_state(struct zone *zone, enum zone_stat_item item)
 {
-       struct per_cpu_pageset *pcp = this_cpu_ptr(zone->pageset);
-       s8 *p = pcp->vm_stat_diff + item;
+       struct per_cpu_pageset __percpu *pcp = zone->pageset;
+       s8 __percpu *p = pcp->vm_stat_diff + item;
+       s8 v, t;
 
-       (*p)++;
+       v = __this_cpu_inc_return(*p);
+       t = __this_cpu_read(pcp->stat_threshold);
+       if (unlikely(v > t)) {
+               s8 overstep = t >> 1;
 
-       if (unlikely(*p > pcp->stat_threshold)) {
-               int overstep = pcp->stat_threshold / 2;
-
-               zone_page_state_add(*p + overstep, zone, item);
-               *p = -overstep;
+               zone_page_state_add(v + overstep, zone, item);
+               __this_cpu_write(*p, -overstep);
        }
 }
 
@@ -242,16 +231,17 @@ EXPORT_SYMBOL(__inc_zone_page_state);
 
 void __dec_zone_state(struct zone *zone, enum zone_stat_item item)
 {
-       struct per_cpu_pageset *pcp = this_cpu_ptr(zone->pageset);
-       s8 *p = pcp->vm_stat_diff + item;
-
-       (*p)--;
+       struct per_cpu_pageset __percpu *pcp = zone->pageset;
+       s8 __percpu *p = pcp->vm_stat_diff + item;
+       s8 v, t;
 
-       if (unlikely(*p < - pcp->stat_threshold)) {
-               int overstep = pcp->stat_threshold / 2;
+       v = __this_cpu_dec_return(*p);
+       t = __this_cpu_read(pcp->stat_threshold);
+       if (unlikely(v < - t)) {
+               s8 overstep = t >> 1;
 
-               zone_page_state_add(*p - overstep, zone, item);
-               *p = overstep;
+               zone_page_state_add(v - overstep, zone, item);
+               __this_cpu_write(*p, overstep);
        }
 }
 
@@ -261,6 +251,92 @@ void __dec_zone_page_state(struct page *page, enum zone_stat_item item)
 }
 EXPORT_SYMBOL(__dec_zone_page_state);
 
+#ifdef CONFIG_CMPXCHG_LOCAL
+/*
+ * If we have cmpxchg_local support then we do not need to incur the overhead
+ * that comes with local_irq_save/restore if we use this_cpu_cmpxchg.
+ *
+ * mod_state() modifies the zone counter state through atomic per cpu
+ * operations.
+ *
+ * Overstep mode specifies how overstep should handled:
+ *     0       No overstepping
+ *     1       Overstepping half of threshold
+ *     -1      Overstepping minus half of threshold
+*/
+static inline void mod_state(struct zone *zone,
+       enum zone_stat_item item, int delta, int overstep_mode)
+{
+       struct per_cpu_pageset __percpu *pcp = zone->pageset;
+       s8 __percpu *p = pcp->vm_stat_diff + item;
+       long o, n, t, z;
+
+       do {
+               z = 0;  /* overflow to zone counters */
+
+               /*
+                * The fetching of the stat_threshold is racy. We may apply
+                * a counter threshold to the wrong the cpu if we get
+                * rescheduled while executing here. However, the following
+                * will apply the threshold again and therefore bring the
+                * counter under the threshold.
+                */
+               t = this_cpu_read(pcp->stat_threshold);
+
+               o = this_cpu_read(*p);
+               n = delta + o;
+
+               if (n > t || n < -t) {
+                       int os = overstep_mode * (t >> 1) ;
+
+                       /* Overflow must be added to zone counters */
+                       z = n + os;
+                       n = -os;
+               }
+       } while (this_cpu_cmpxchg(*p, o, n) != o);
+
+       if (z)
+               zone_page_state_add(z, zone, item);
+}
+
+void mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
+                                       int delta)
+{
+       mod_state(zone, item, delta, 0);
+}
+EXPORT_SYMBOL(mod_zone_page_state);
+
+void inc_zone_state(struct zone *zone, enum zone_stat_item item)
+{
+       mod_state(zone, item, 1, 1);
+}
+
+void inc_zone_page_state(struct page *page, enum zone_stat_item item)
+{
+       mod_state(page_zone(page), item, 1, 1);
+}
+EXPORT_SYMBOL(inc_zone_page_state);
+
+void dec_zone_page_state(struct page *page, enum zone_stat_item item)
+{
+       mod_state(page_zone(page), item, -1, -1);
+}
+EXPORT_SYMBOL(dec_zone_page_state);
+#else
+/*
+ * Use interrupt disable to serialize counter updates
+ */
+void mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
+                                       int delta)
+{
+       unsigned long flags;
+
+       local_irq_save(flags);
+       __mod_zone_page_state(zone, item, delta);
+       local_irq_restore(flags);
+}
+EXPORT_SYMBOL(mod_zone_page_state);
+
 void inc_zone_state(struct zone *zone, enum zone_stat_item item)
 {
        unsigned long flags;
@@ -291,6 +367,7 @@ void dec_zone_page_state(struct page *page, enum zone_stat_item item)
        local_irq_restore(flags);
 }
 EXPORT_SYMBOL(dec_zone_page_state);
+#endif
 
 /*
  * Update the zone counters for one cpu.