rcu: Per-CPU operation cleanups to rcu_*_qs() functions

The rcu_bh_qs(), rcu_preempt_qs(), and rcu_sched_qs() functions use
old-style per-CPU variable access and write to ->passed_quiesce even
if it is already set.  This commit therefore updates to use the new-style
per-CPU variable access functions and avoids the spurious writes.
This commit also eliminates the "cpu" argument to these functions because
they are always invoked on the indicated CPU.

Reported-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 0981c0c..25e692a 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -158,14 +158,16 @@
  * As with the other rcu_*_qs() functions, callers to this function
  * must disable preemption.
  */
-static void rcu_preempt_qs(int cpu)
+static void rcu_preempt_qs(void)
 {
-	struct rcu_data *rdp = &per_cpu(rcu_preempt_data, cpu);
-
-	if (rdp->passed_quiesce == 0)
-		trace_rcu_grace_period(TPS("rcu_preempt"), rdp->gpnum, TPS("cpuqs"));
-	rdp->passed_quiesce = 1;
-	current->rcu_read_unlock_special.b.need_qs = false;
+	if (!__this_cpu_read(rcu_preempt_data.passed_quiesce)) {
+		trace_rcu_grace_period(TPS("rcu_preempt"),
+				       __this_cpu_read(rcu_preempt_data.gpnum),
+				       TPS("cpuqs"));
+		__this_cpu_write(rcu_preempt_data.passed_quiesce, 1);
+		barrier(); /* Coordinate with rcu_preempt_check_callbacks(). */
+		current->rcu_read_unlock_special.b.need_qs = false;
+	}
 }
 
 /*
@@ -256,7 +258,7 @@
 	 * grace period, then the fact that the task has been enqueued
 	 * means that we continue to block the current grace period.
 	 */
-	rcu_preempt_qs(cpu);
+	rcu_preempt_qs();
 }
 
 /*
@@ -352,7 +354,7 @@
 	 */
 	special = t->rcu_read_unlock_special;
 	if (special.b.need_qs) {
-		rcu_preempt_qs(smp_processor_id());
+		rcu_preempt_qs();
 		if (!t->rcu_read_unlock_special.s) {
 			local_irq_restore(flags);
 			return;
@@ -651,11 +653,12 @@
 	struct task_struct *t = current;
 
 	if (t->rcu_read_lock_nesting == 0) {
-		rcu_preempt_qs(cpu);
+		rcu_preempt_qs();
 		return;
 	}
 	if (t->rcu_read_lock_nesting > 0 &&
-	    per_cpu(rcu_preempt_data, cpu).qs_pending)
+	    per_cpu(rcu_preempt_data, cpu).qs_pending &&
+	    !per_cpu(rcu_preempt_data, cpu).passed_quiesce)
 		t->rcu_read_unlock_special.b.need_qs = true;
 }