rcu: Provide RCU CPU stall warnings for tiny RCU
Paul E. McKenney [Fri, 19 Oct 2012 19:49:17 +0000 (12:49 -0700)]
Tiny RCU has historically omitted RCU CPU stall warnings in order to
reduce memory requirements, however, lack of these warnings caused
Thomas Gleixner some debugging pain recently.  Therefore, this commit
adds RCU CPU stall warnings to tiny RCU if RCU_TRACE=y.  This keeps
the memory footprint small, while still enabling CPU stall warnings
in kernels built to enable them.

Updated to include Josh Triplett's suggested use of RCU_STALL_COMMON
config variable to simplify #if expressions.

Reported-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Paul E. McKenney <paul.mckenney@linaro.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>

init/Kconfig
kernel/rcu.h
kernel/rcupdate.c
kernel/rcutiny.c
kernel/rcutiny_plugin.h
kernel/rcutree.c
kernel/rcutree.h
lib/Kconfig.debug

index 7d30240..a5e90e1 100644 (file)
@@ -486,6 +486,14 @@ config PREEMPT_RCU
          This option enables preemptible-RCU code that is common between
          the TREE_PREEMPT_RCU and TINY_PREEMPT_RCU implementations.
 
+config RCU_STALL_COMMON
+       def_bool ( TREE_RCU || TREE_PREEMPT_RCU || RCU_TRACE )
+       help
+         This option enables RCU CPU stall code that is common between
+         the TINY and TREE variants of RCU.  The purpose is to allow
+         the tiny variants to disable RCU CPU stall warnings, while
+         making these warnings mandatory for the tree variants.
+
 config CONTEXT_TRACKING
        bool
 
index 20dfba5..7f8e759 100644 (file)
@@ -111,4 +111,11 @@ static inline bool __rcu_reclaim(char *rn, struct rcu_head *head)
 
 extern int rcu_expedited;
 
+#ifdef CONFIG_RCU_STALL_COMMON
+
+extern int rcu_cpu_stall_suppress;
+int rcu_jiffies_till_stall_check(void);
+
+#endif /* #ifdef CONFIG_RCU_STALL_COMMON */
+
 #endif /* __LINUX_RCU_H */
index a2cf761..076730d 100644 (file)
@@ -412,3 +412,54 @@ EXPORT_SYMBOL_GPL(do_trace_rcu_torture_read);
 #else
 #define do_trace_rcu_torture_read(rcutorturename, rhp) do { } while (0)
 #endif
+
+#ifdef CONFIG_RCU_STALL_COMMON
+
+#ifdef CONFIG_PROVE_RCU
+#define RCU_STALL_DELAY_DELTA         (5 * HZ)
+#else
+#define RCU_STALL_DELAY_DELTA         0
+#endif
+
+int rcu_cpu_stall_suppress __read_mostly; /* 1 = suppress stall warnings. */
+int rcu_cpu_stall_timeout __read_mostly = CONFIG_RCU_CPU_STALL_TIMEOUT;
+
+module_param(rcu_cpu_stall_suppress, int, 0644);
+module_param(rcu_cpu_stall_timeout, int, 0644);
+
+int rcu_jiffies_till_stall_check(void)
+{
+       int till_stall_check = ACCESS_ONCE(rcu_cpu_stall_timeout);
+
+       /*
+        * Limit check must be consistent with the Kconfig limits
+        * for CONFIG_RCU_CPU_STALL_TIMEOUT.
+        */
+       if (till_stall_check < 3) {
+               ACCESS_ONCE(rcu_cpu_stall_timeout) = 3;
+               till_stall_check = 3;
+       } else if (till_stall_check > 300) {
+               ACCESS_ONCE(rcu_cpu_stall_timeout) = 300;
+               till_stall_check = 300;
+       }
+       return till_stall_check * HZ + RCU_STALL_DELAY_DELTA;
+}
+
+static int rcu_panic(struct notifier_block *this, unsigned long ev, void *ptr)
+{
+       rcu_cpu_stall_suppress = 1;
+       return NOTIFY_DONE;
+}
+
+static struct notifier_block rcu_panic_block = {
+       .notifier_call = rcu_panic,
+};
+
+static int __init check_cpu_stall_init(void)
+{
+       atomic_notifier_chain_register(&panic_notifier_list, &rcu_panic_block);
+       return 0;
+}
+early_initcall(check_cpu_stall_init);
+
+#endif /* #ifdef CONFIG_RCU_STALL_COMMON */
index e7dce58..b899df3 100644 (file)
@@ -51,10 +51,10 @@ static void __call_rcu(struct rcu_head *head,
                       void (*func)(struct rcu_head *rcu),
                       struct rcu_ctrlblk *rcp);
 
-#include "rcutiny_plugin.h"
-
 static long long rcu_dynticks_nesting = DYNTICK_TASK_EXIT_IDLE;
 
+#include "rcutiny_plugin.h"
+
 /* Common code for rcu_idle_enter() and rcu_irq_exit(), see kernel/rcutree.c. */
 static void rcu_idle_enter_common(long long newval)
 {
@@ -205,6 +205,7 @@ int rcu_is_cpu_rrupt_from_idle(void)
  */
 static int rcu_qsctr_help(struct rcu_ctrlblk *rcp)
 {
+       reset_cpu_stall_ticks(rcp);
        if (rcp->rcucblist != NULL &&
            rcp->donetail != rcp->curtail) {
                rcp->donetail = rcp->curtail;
@@ -251,6 +252,7 @@ void rcu_bh_qs(int cpu)
  */
 void rcu_check_callbacks(int cpu, int user)
 {
+       check_cpu_stalls();
        if (user || rcu_is_cpu_rrupt_from_idle())
                rcu_sched_qs(cpu);
        else if (!in_softirq())
index f85016a..8a23300 100644 (file)
@@ -33,6 +33,9 @@ struct rcu_ctrlblk {
        struct rcu_head **donetail;     /* ->next pointer of last "done" CB. */
        struct rcu_head **curtail;      /* ->next pointer of last CB. */
        RCU_TRACE(long qlen);           /* Number of pending CBs. */
+       RCU_TRACE(unsigned long gp_start); /* Start time for stalls. */
+       RCU_TRACE(unsigned long ticks_this_gp); /* Statistic for stalls. */
+       RCU_TRACE(unsigned long jiffies_stall); /* Jiffies at next stall. */
        RCU_TRACE(char *name);          /* Name of RCU type. */
 };
 
@@ -54,6 +57,51 @@ int rcu_scheduler_active __read_mostly;
 EXPORT_SYMBOL_GPL(rcu_scheduler_active);
 #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
 
+#ifdef CONFIG_RCU_TRACE
+
+static void check_cpu_stall(struct rcu_ctrlblk *rcp)
+{
+       unsigned long j;
+       unsigned long js;
+
+       if (rcu_cpu_stall_suppress)
+               return;
+       rcp->ticks_this_gp++;
+       j = jiffies;
+       js = rcp->jiffies_stall;
+       if (*rcp->curtail && ULONG_CMP_GE(j, js)) {
+               pr_err("INFO: %s stall on CPU (%lu ticks this GP) idle=%llx (t=%lu jiffies q=%ld)\n",
+                      rcp->name, rcp->ticks_this_gp, rcu_dynticks_nesting,
+                      jiffies - rcp->gp_start, rcp->qlen);
+               dump_stack();
+       }
+       if (*rcp->curtail && ULONG_CMP_GE(j, js))
+               rcp->jiffies_stall = jiffies +
+                       3 * rcu_jiffies_till_stall_check() + 3;
+       else if (ULONG_CMP_GE(j, js))
+               rcp->jiffies_stall = jiffies + rcu_jiffies_till_stall_check();
+}
+
+static void check_cpu_stall_preempt(void);
+
+#endif /* #ifdef CONFIG_RCU_TRACE */
+
+static void reset_cpu_stall_ticks(struct rcu_ctrlblk *rcp)
+{
+#ifdef CONFIG_RCU_TRACE
+       rcp->ticks_this_gp = 0;
+       rcp->gp_start = jiffies;
+       rcp->jiffies_stall = jiffies + rcu_jiffies_till_stall_check();
+#endif /* #ifdef CONFIG_RCU_TRACE */
+}
+
+static void check_cpu_stalls(void)
+{
+       RCU_TRACE(check_cpu_stall(&rcu_bh_ctrlblk));
+       RCU_TRACE(check_cpu_stall(&rcu_sched_ctrlblk));
+       RCU_TRACE(check_cpu_stall_preempt());
+}
+
 #ifdef CONFIG_TINY_PREEMPT_RCU
 
 #include <linux/delay.h>
@@ -448,6 +496,7 @@ static void rcu_preempt_start_gp(void)
                /* Official start of GP. */
                rcu_preempt_ctrlblk.gpnum++;
                RCU_TRACE(rcu_preempt_ctrlblk.n_grace_periods++);
+               reset_cpu_stall_ticks(&rcu_preempt_ctrlblk.rcb);
 
                /* Any blocked RCU readers block new GP. */
                if (rcu_preempt_blocked_readers_any())
@@ -1054,4 +1103,11 @@ MODULE_AUTHOR("Paul E. McKenney");
 MODULE_DESCRIPTION("Read-Copy Update tracing for tiny implementation");
 MODULE_LICENSE("GPL");
 
+static void check_cpu_stall_preempt(void)
+{
+#ifdef CONFIG_TINY_PREEMPT_RCU
+       check_cpu_stall(&rcu_preempt_ctrlblk.rcb);
+#endif /* #ifdef CONFIG_TINY_PREEMPT_RCU */
+}
+
 #endif /* #ifdef CONFIG_RCU_TRACE */
index e441b77..d069430 100644 (file)
@@ -217,12 +217,6 @@ module_param(blimit, long, 0444);
 module_param(qhimark, long, 0444);
 module_param(qlowmark, long, 0444);
 
-int rcu_cpu_stall_suppress __read_mostly; /* 1 = suppress stall warnings. */
-int rcu_cpu_stall_timeout __read_mostly = CONFIG_RCU_CPU_STALL_TIMEOUT;
-
-module_param(rcu_cpu_stall_suppress, int, 0644);
-module_param(rcu_cpu_stall_timeout, int, 0644);
-
 static ulong jiffies_till_first_fqs = RCU_JIFFIES_TILL_FORCE_QS;
 static ulong jiffies_till_next_fqs = RCU_JIFFIES_TILL_FORCE_QS;
 
@@ -793,28 +787,10 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
        return 0;
 }
 
-static int jiffies_till_stall_check(void)
-{
-       int till_stall_check = ACCESS_ONCE(rcu_cpu_stall_timeout);
-
-       /*
-        * Limit check must be consistent with the Kconfig limits
-        * for CONFIG_RCU_CPU_STALL_TIMEOUT.
-        */
-       if (till_stall_check < 3) {
-               ACCESS_ONCE(rcu_cpu_stall_timeout) = 3;
-               till_stall_check = 3;
-       } else if (till_stall_check > 300) {
-               ACCESS_ONCE(rcu_cpu_stall_timeout) = 300;
-               till_stall_check = 300;
-       }
-       return till_stall_check * HZ + RCU_STALL_DELAY_DELTA;
-}
-
 static void record_gp_stall_check_time(struct rcu_state *rsp)
 {
        rsp->gp_start = jiffies;
-       rsp->jiffies_stall = jiffies + jiffies_till_stall_check();
+       rsp->jiffies_stall = jiffies + rcu_jiffies_till_stall_check();
 }
 
 /*
@@ -857,7 +833,7 @@ static void print_other_cpu_stall(struct rcu_state *rsp)
                raw_spin_unlock_irqrestore(&rnp->lock, flags);
                return;
        }
-       rsp->jiffies_stall = jiffies + 3 * jiffies_till_stall_check() + 3;
+       rsp->jiffies_stall = jiffies + 3 * rcu_jiffies_till_stall_check() + 3;
        raw_spin_unlock_irqrestore(&rnp->lock, flags);
 
        /*
@@ -935,7 +911,7 @@ static void print_cpu_stall(struct rcu_state *rsp)
        raw_spin_lock_irqsave(&rnp->lock, flags);
        if (ULONG_CMP_GE(jiffies, rsp->jiffies_stall))
                rsp->jiffies_stall = jiffies +
-                                    3 * jiffies_till_stall_check() + 3;
+                                    3 * rcu_jiffies_till_stall_check() + 3;
        raw_spin_unlock_irqrestore(&rnp->lock, flags);
 
        set_need_resched();  /* kick ourselves to get things going. */
@@ -966,12 +942,6 @@ static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp)
        }
 }
 
-static int rcu_panic(struct notifier_block *this, unsigned long ev, void *ptr)
-{
-       rcu_cpu_stall_suppress = 1;
-       return NOTIFY_DONE;
-}
-
 /**
  * rcu_cpu_stall_reset - prevent further stall warnings in current grace period
  *
@@ -989,15 +959,6 @@ void rcu_cpu_stall_reset(void)
                rsp->jiffies_stall = jiffies + ULONG_MAX / 2;
 }
 
-static struct notifier_block rcu_panic_block = {
-       .notifier_call = rcu_panic,
-};
-
-static void __init check_cpu_stall_init(void)
-{
-       atomic_notifier_chain_register(&panic_notifier_list, &rcu_panic_block);
-}
-
 /*
  * Update CPU-local rcu_data state to record the newly noticed grace period.
  * This is used both when we started the grace period and when we notice
@@ -3074,7 +3035,6 @@ void __init rcu_init(void)
        cpu_notifier(rcu_cpu_notify, 0);
        for_each_online_cpu(cpu)
                rcu_cpu_notify(NULL, CPU_UP_PREPARE, (void *)(long)cpu);
-       check_cpu_stall_init();
 }
 
 #include "rcutree_plugin.h"
index 4b69291..db9bec8 100644 (file)
@@ -343,11 +343,6 @@ struct rcu_data {
 
 #define RCU_JIFFIES_TILL_FORCE_QS       3      /* for rsp->jiffies_force_qs */
 
-#ifdef CONFIG_PROVE_RCU
-#define RCU_STALL_DELAY_DELTA         (5 * HZ)
-#else
-#define RCU_STALL_DELAY_DELTA         0
-#endif
 #define RCU_STALL_RAT_DELAY            2       /* Allow other CPUs time */
                                                /*  to take at least one */
                                                /*  scheduling clock irq */
index 3a35309..f7329b3 100644 (file)
@@ -970,7 +970,7 @@ config RCU_TORTURE_TEST_RUNNABLE
 
 config RCU_CPU_STALL_TIMEOUT
        int "RCU CPU stall timeout in seconds"
-       depends on TREE_RCU || TREE_PREEMPT_RCU
+       depends on RCU_STALL_COMMON
        range 3 300
        default 21
        help