rcu: Fix accelerated grace periods for last non-dynticked CPU
Paul E. McKenney [Sat, 27 Feb 2010 00:38:56 +0000 (16:38 -0800)]
It is invalid to invoke __rcu_process_callbacks() with irqs
disabled, so do it indirectly via raise_softirq().  This
requires a state-machine implementation to cycle through the
grace-period machinery the required number of times.

Located-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: laijs@cn.fujitsu.com
Cc: dipankar@in.ibm.com
Cc: mathieu.desnoyers@polymtl.ca
Cc: josh@joshtriplett.org
Cc: dvhltc@us.ibm.com
Cc: niv@us.ibm.com
Cc: peterz@infradead.org
Cc: rostedt@goodmis.org
Cc: Valdis.Kletnieks@vt.edu
Cc: dhowells@redhat.com
LKML-Reference: <1267231138-27856-1-git-send-email-paulmck@linux.vnet.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

kernel/rcutree.c
kernel/rcutree.h
kernel/rcutree_plugin.h

index 335bfe4..3ec8160 100644 (file)
@@ -1341,6 +1341,9 @@ static void rcu_process_callbacks(struct softirq_action *unused)
         * grace-period manipulations above.
         */
        smp_mb(); /* See above block comment. */
+
+       /* If we are last CPU on way to dyntick-idle mode, accelerate it. */
+       rcu_needs_cpu_flush();
 }
 
 static void
index 2ceb083..1439eb5 100644 (file)
@@ -373,5 +373,6 @@ static int rcu_preempt_needs_cpu(int cpu);
 static void __cpuinit rcu_preempt_init_percpu_data(int cpu);
 static void rcu_preempt_send_cbs_to_orphanage(void);
 static void __init __rcu_init_preempt(void);
+static void rcu_needs_cpu_flush(void);
 
 #endif /* #ifndef RCU_TREE_NONCORE */
index 3516de7..ed241fc 100644 (file)
@@ -973,9 +973,19 @@ int rcu_needs_cpu(int cpu)
        return rcu_needs_cpu_quick_check(cpu);
 }
 
+/*
+ * Check to see if we need to continue a callback-flush operations to
+ * allow the last CPU to enter dyntick-idle mode.  But fast dyntick-idle
+ * entry is not configured, so we never do need to.
+ */
+static void rcu_needs_cpu_flush(void)
+{
+}
+
 #else /* #if !defined(CONFIG_RCU_FAST_NO_HZ) */
 
 #define RCU_NEEDS_CPU_FLUSHES 5
+static DEFINE_PER_CPU(int, rcu_dyntick_drain);
 
 /*
  * Check to see if any future RCU-related work will need to be done
@@ -988,39 +998,62 @@ int rcu_needs_cpu(int cpu)
  * only if all other CPUs are already in dynticks-idle mode.  This will
  * allow the CPU cores to be powered down immediately, as opposed to after
  * waiting many milliseconds for grace periods to elapse.
+ *
+ * Because it is not legal to invoke rcu_process_callbacks() with irqs
+ * disabled, we do one pass of force_quiescent_state(), then do a
+ * raise_softirq() to cause rcu_process_callbacks() to be invoked later.
+ * The per-cpu rcu_dyntick_drain variable controls the sequencing.
  */
 int rcu_needs_cpu(int cpu)
 {
-       int c = 1;
-       int i;
+       int c = 0;
        int thatcpu;
 
        /* Don't bother unless we are the last non-dyntick-idle CPU. */
        for_each_cpu_not(thatcpu, nohz_cpu_mask)
-               if (thatcpu != cpu)
+               if (thatcpu != cpu) {
+                       per_cpu(rcu_dyntick_drain, cpu) = 0;
                        return rcu_needs_cpu_quick_check(cpu);
-
-       /* Try to push remaining RCU-sched and RCU-bh callbacks through. */
-       for (i = 0; i < RCU_NEEDS_CPU_FLUSHES && c; i++) {
-               c = 0;
-               if (per_cpu(rcu_sched_data, cpu).nxtlist) {
-                       rcu_sched_qs(cpu);
-                       force_quiescent_state(&rcu_sched_state, 0);
-                       __rcu_process_callbacks(&rcu_sched_state,
-                                               &per_cpu(rcu_sched_data, cpu));
-                       c = !!per_cpu(rcu_sched_data, cpu).nxtlist;
-               }
-               if (per_cpu(rcu_bh_data, cpu).nxtlist) {
-                       rcu_bh_qs(cpu);
-                       force_quiescent_state(&rcu_bh_state, 0);
-                       __rcu_process_callbacks(&rcu_bh_state,
-                                               &per_cpu(rcu_bh_data, cpu));
-                       c = !!per_cpu(rcu_bh_data, cpu).nxtlist;
                }
+
+       /* Check and update the rcu_dyntick_drain sequencing. */
+       if (per_cpu(rcu_dyntick_drain, cpu) <= 0) {
+               /* First time through, initialize the counter. */
+               per_cpu(rcu_dyntick_drain, cpu) = RCU_NEEDS_CPU_FLUSHES;
+       } else if (--per_cpu(rcu_dyntick_drain, cpu) <= 0) {
+               /* We have hit the limit, so time to give up. */
+               return rcu_needs_cpu_quick_check(cpu);
+       }
+
+       /* Do one step pushing remaining RCU callbacks through. */
+       if (per_cpu(rcu_sched_data, cpu).nxtlist) {
+               rcu_sched_qs(cpu);
+               force_quiescent_state(&rcu_sched_state, 0);
+               c = c || per_cpu(rcu_sched_data, cpu).nxtlist;
+       }
+       if (per_cpu(rcu_bh_data, cpu).nxtlist) {
+               rcu_bh_qs(cpu);
+               force_quiescent_state(&rcu_bh_state, 0);
+               c = c || per_cpu(rcu_bh_data, cpu).nxtlist;
        }
 
        /* If RCU callbacks are still pending, RCU still needs this CPU. */
+       if (c)
+               raise_softirq(RCU_SOFTIRQ);
        return c;
 }
 
+/*
+ * Check to see if we need to continue a callback-flush operations to
+ * allow the last CPU to enter dyntick-idle mode.
+ */
+static void rcu_needs_cpu_flush(void)
+{
+       int cpu = smp_processor_id();
+
+       if (per_cpu(rcu_dyntick_drain, cpu) <= 0)
+               return;
+       (void)rcu_needs_cpu(cpu);
+}
+
 #endif /* #else #if !defined(CONFIG_RCU_FAST_NO_HZ) */