rcu: Introduce proper blocking to no-CBs kthreads GP waits
Paul E. McKenney [Mon, 11 Feb 2013 04:48:58 +0000 (20:48 -0800)]
Currently, the no-CBs kthreads do repeated timed waits for grace periods
to elapse.  This is crude and energy inefficient, so this commit allows
no-CBs kthreads to specify exactly which grace period they are waiting
for and also allows them to block for the entire duration until the
desired grace period completes.

Signed-off-by: Paul E. McKenney <paul.mckenney@linaro.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>

kernel/rcutree.c
kernel/rcutree.h
kernel/rcutree_plugin.h

index 6ad0716..433f426 100644 (file)
@@ -310,7 +310,7 @@ cpu_needs_another_gp(struct rcu_state *rsp, struct rcu_data *rdp)
 
        if (rcu_gp_in_progress(rsp))
                return 0;  /* No, a grace period is already in progress. */
-       if (rcu_nocb_needs_gp(rdp))
+       if (rcu_nocb_needs_gp(rsp))
                return 1;  /* Yes, a no-CBs CPU needs one. */
        if (!rdp->nxttail[RCU_NEXT_TAIL])
                return 0;  /* No, this is a no-CBs (or offline) CPU. */
@@ -1364,6 +1364,7 @@ int rcu_gp_fqs(struct rcu_state *rsp, int fqs_state_in)
 static void rcu_gp_cleanup(struct rcu_state *rsp)
 {
        unsigned long gp_duration;
+       int nocb = 0;
        struct rcu_data *rdp;
        struct rcu_node *rnp = rcu_get_root(rsp);
 
@@ -1394,11 +1395,13 @@ static void rcu_gp_cleanup(struct rcu_state *rsp)
        rcu_for_each_node_breadth_first(rsp, rnp) {
                raw_spin_lock_irq(&rnp->lock);
                rnp->completed = rsp->gpnum;
+               nocb += rcu_nocb_gp_cleanup(rsp, rnp);
                raw_spin_unlock_irq(&rnp->lock);
                cond_resched();
        }
        rnp = rcu_get_root(rsp);
        raw_spin_lock_irq(&rnp->lock);
+       rcu_nocb_gp_set(rnp, nocb);
 
        rsp->completed = rsp->gpnum; /* Declare grace period done. */
        trace_rcu_grace_period(rsp->name, rsp->completed, "end");
@@ -3084,6 +3087,7 @@ static void __init rcu_init_one(struct rcu_state *rsp,
                        }
                        rnp->level = i;
                        INIT_LIST_HEAD(&rnp->blkd_tasks);
+                       rcu_init_one_nocb(rnp);
                }
        }
 
index 7af39f4..e51373c 100644 (file)
@@ -196,6 +196,12 @@ struct rcu_node {
                                /* Refused to boost: not sure why, though. */
                                /*  This can happen due to race conditions. */
 #endif /* #ifdef CONFIG_RCU_BOOST */
+#ifdef CONFIG_RCU_NOCB_CPU
+       wait_queue_head_t nocb_gp_wq[2];
+                               /* Place for rcu_nocb_kthread() to wait GP. */
+       int n_nocb_gp_requests[2];
+                               /* Counts of upcoming no-CB GP requests. */
+#endif /* #ifdef CONFIG_RCU_NOCB_CPU */
        raw_spinlock_t fqslock ____cacheline_internodealigned_in_smp;
 } ____cacheline_internodealigned_in_smp;
 
@@ -326,7 +332,6 @@ struct rcu_data {
        int nocb_p_count_lazy;          /*  (approximate). */
        wait_queue_head_t nocb_wq;      /* For nocb kthreads to sleep on. */
        struct task_struct *nocb_kthread;
-       bool nocb_needs_gp;
 #endif /* #ifdef CONFIG_RCU_NOCB_CPU */
 
        int cpu;
@@ -524,7 +529,10 @@ static void print_cpu_stall_info(struct rcu_state *rsp, int cpu);
 static void print_cpu_stall_info_end(void);
 static void zero_cpu_stall_ticks(struct rcu_data *rdp);
 static void increment_cpu_stall_ticks(void);
-static int rcu_nocb_needs_gp(struct rcu_data *rdp);
+static int rcu_nocb_needs_gp(struct rcu_state *rsp);
+static void rcu_nocb_gp_set(struct rcu_node *rnp, int nrq);
+static int rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp);
+static void rcu_init_one_nocb(struct rcu_node *rnp);
 static bool is_nocb_cpu(int cpu);
 static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp,
                            bool lazy);
index 3e33aef..90a1914 100644 (file)
@@ -2176,11 +2176,51 @@ static int __init parse_rcu_nocb_poll(char *arg)
 early_param("rcu_nocb_poll", parse_rcu_nocb_poll);
 
 /*
- * Does this CPU needs a grace period due to offloaded callbacks?
+ * Do any no-CBs CPUs need another grace period?
+ *
+ * Interrupts must be disabled.  If the caller does not hold the root
+ * rnp_node structure's ->lock, the results are advisory only.
+ */
+static int rcu_nocb_needs_gp(struct rcu_state *rsp)
+{
+       struct rcu_node *rnp = rcu_get_root(rsp);
+
+       return rnp->n_nocb_gp_requests[(ACCESS_ONCE(rnp->completed) + 1) & 0x1];
+}
+
+/*
+ * Clean up this rcu_node structure's no-CBs state at the end of
+ * a grace period, and also return whether any no-CBs CPU associated
+ * with this rcu_node structure needs another grace period.
+ */
+static int rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp)
+{
+       int c = rnp->completed;
+       int needmore;
+
+       wake_up_all(&rnp->nocb_gp_wq[c & 0x1]);
+       rnp->n_nocb_gp_requests[c & 0x1] = 0;
+       needmore = rnp->n_nocb_gp_requests[(c + 1) & 0x1];
+       return needmore;
+}
+
+/*
+ * Set the root rcu_node structure's ->n_nocb_gp_requests field
+ * based on the sum of those of all rcu_node structures.  This does
+ * double-count the root rcu_node structure's requests, but this
+ * is necessary to handle the possibility of a rcu_nocb_kthread()
+ * having awakened during the time that the rcu_node structures
+ * were being updated for the end of the previous grace period.
  */
-static int rcu_nocb_needs_gp(struct rcu_data *rdp)
+static void rcu_nocb_gp_set(struct rcu_node *rnp, int nrq)
+{
+       rnp->n_nocb_gp_requests[(rnp->completed + 1) & 0x1] += nrq;
+}
+
+static void rcu_init_one_nocb(struct rcu_node *rnp)
 {
-       return rdp->nocb_needs_gp;
+       init_waitqueue_head(&rnp->nocb_gp_wq[0]);
+       init_waitqueue_head(&rnp->nocb_gp_wq[1]);
 }
 
 /* Is the specified CPU a no-CPUs CPU? */
@@ -2289,31 +2329,73 @@ static bool __maybe_unused rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp,
 static void rcu_nocb_wait_gp(struct rcu_data *rdp)
 {
        unsigned long c;
+       bool d;
        unsigned long flags;
-       unsigned long j;
+       unsigned long flags1;
        struct rcu_node *rnp = rdp->mynode;
+       struct rcu_node *rnp_root = rcu_get_root(rdp->rsp);
 
        raw_spin_lock_irqsave(&rnp->lock, flags);
        c = rnp->completed + 2;
-       rdp->nocb_needs_gp = true;
-       raw_spin_unlock_irqrestore(&rnp->lock, flags);
+
+       /* Count our request for a grace period. */
+       rnp->n_nocb_gp_requests[c & 0x1]++;
+
+       if (rnp->gpnum != rnp->completed) {
+
+               /*
+                * This rcu_node structure believes that a grace period
+                * is in progress, so we are done.  When this grace
+                * period ends, our request will be acted upon.
+                */
+               raw_spin_unlock_irqrestore(&rnp->lock, flags);
+
+       } else {
+
+               /*
+                * Might not be a grace period, check root rcu_node
+                * structure to see if we must start one.
+                */
+               if (rnp != rnp_root)
+                       raw_spin_lock(&rnp_root->lock); /* irqs disabled. */
+               if (rnp_root->gpnum != rnp_root->completed) {
+                       raw_spin_unlock(&rnp_root->lock); /* irqs disabled. */
+               } else {
+
+                       /*
+                        * No grace period, so we need to start one.
+                        * The good news is that we can wait for exactly
+                        * one grace period instead of part of the current
+                        * grace period and all of the next grace period.
+                        * Adjust counters accordingly and start the
+                        * needed grace period.
+                        */
+                       rnp->n_nocb_gp_requests[c & 0x1]--;
+                       c = rnp_root->completed + 1;
+                       rnp->n_nocb_gp_requests[c & 0x1]++;
+                       rnp_root->n_nocb_gp_requests[c & 0x1]++;
+                       local_save_flags(flags1);
+                       rcu_start_gp(rdp->rsp, flags1); /* Rlses ->lock. */
+               }
+
+               /* Clean up locking and irq state. */
+               if (rnp != rnp_root)
+                       raw_spin_unlock_irqrestore(&rnp->lock, flags);
+               else
+                       local_irq_restore(flags);
+       }
 
        /*
         * Wait for the grace period.  Do so interruptibly to avoid messing
         * up the load average.
         */
        for (;;) {
-               j = jiffies;
-               schedule_timeout_interruptible(2);
-               raw_spin_lock_irqsave(&rnp->lock, flags);
-               if (ULONG_CMP_GE(rnp->completed, c)) {
-                       rdp->nocb_needs_gp = false;
-                       raw_spin_unlock_irqrestore(&rnp->lock, flags);
+               wait_event_interruptible(
+                       rnp->nocb_gp_wq[c & 0x1],
+                       (d = ULONG_CMP_GE(ACCESS_ONCE(rnp->completed), c)));
+               if (likely(d))
                        break;
-               }
-               if (j == jiffies)
-                       flush_signals(current);
-               raw_spin_unlock_irqrestore(&rnp->lock, flags);
+               flush_signals(current);
        }
        smp_mb(); /* Ensure that CB invocation happens after GP end. */
 }
@@ -2416,11 +2498,24 @@ static bool init_nocb_callback_list(struct rcu_data *rdp)
 
 #else /* #ifdef CONFIG_RCU_NOCB_CPU */
 
-static int rcu_nocb_needs_gp(struct rcu_data *rdp)
+static int rcu_nocb_needs_gp(struct rcu_state *rsp)
+{
+       return 0;
+}
+
+static int rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp)
 {
        return 0;
 }
 
+static void rcu_nocb_gp_set(struct rcu_node *rnp, int nrq)
+{
+}
+
+static void rcu_init_one_nocb(struct rcu_node *rnp)
+{
+}
+
 static bool is_nocb_cpu(int cpu)
 {
        return false;