ARM: tegra: dvfs: Add GPU scaling trip-points interfaces
[linux-3.10.git] / kernel / rcutiny_plugin.h
index 24f4316..8a23300 100644 (file)
  */
 
 #include <linux/kthread.h>
+#include <linux/module.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
 
 /* Global control variables for rcupdate callback mechanism. */
 struct rcu_ctrlblk {
        struct rcu_head *rcucblist;     /* List of pending callbacks (CBs). */
        struct rcu_head **donetail;     /* ->next pointer of last "done" CB. */
        struct rcu_head **curtail;      /* ->next pointer of last CB. */
+       RCU_TRACE(long qlen);           /* Number of pending CBs. */
+       RCU_TRACE(unsigned long gp_start); /* Start time for stalls. */
+       RCU_TRACE(unsigned long ticks_this_gp); /* Statistic for stalls. */
+       RCU_TRACE(unsigned long jiffies_stall); /* Jiffies at next stall. */
+       RCU_TRACE(char *name);          /* Name of RCU type. */
 };
 
 /* Definition for rcupdate control block. */
 static struct rcu_ctrlblk rcu_sched_ctrlblk = {
        .donetail       = &rcu_sched_ctrlblk.rcucblist,
        .curtail        = &rcu_sched_ctrlblk.rcucblist,
+       RCU_TRACE(.name = "rcu_sched")
 };
 
 static struct rcu_ctrlblk rcu_bh_ctrlblk = {
        .donetail       = &rcu_bh_ctrlblk.rcucblist,
        .curtail        = &rcu_bh_ctrlblk.rcucblist,
+       RCU_TRACE(.name = "rcu_bh")
 };
 
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
@@ -47,6 +57,51 @@ int rcu_scheduler_active __read_mostly;
 EXPORT_SYMBOL_GPL(rcu_scheduler_active);
 #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
 
+#ifdef CONFIG_RCU_TRACE
+
+static void check_cpu_stall(struct rcu_ctrlblk *rcp)
+{
+       unsigned long j;
+       unsigned long js;
+
+       if (rcu_cpu_stall_suppress)
+               return;
+       rcp->ticks_this_gp++;
+       j = jiffies;
+       js = rcp->jiffies_stall;
+       if (*rcp->curtail && ULONG_CMP_GE(j, js)) {
+               pr_err("INFO: %s stall on CPU (%lu ticks this GP) idle=%llx (t=%lu jiffies q=%ld)\n",
+                      rcp->name, rcp->ticks_this_gp, rcu_dynticks_nesting,
+                      jiffies - rcp->gp_start, rcp->qlen);
+               dump_stack();
+       }
+       if (*rcp->curtail && ULONG_CMP_GE(j, js))
+               rcp->jiffies_stall = jiffies +
+                       3 * rcu_jiffies_till_stall_check() + 3;
+       else if (ULONG_CMP_GE(j, js))
+               rcp->jiffies_stall = jiffies + rcu_jiffies_till_stall_check();
+}
+
+static void check_cpu_stall_preempt(void);
+
+#endif /* #ifdef CONFIG_RCU_TRACE */
+
+static void reset_cpu_stall_ticks(struct rcu_ctrlblk *rcp)
+{
+#ifdef CONFIG_RCU_TRACE
+       rcp->ticks_this_gp = 0;
+       rcp->gp_start = jiffies;
+       rcp->jiffies_stall = jiffies + rcu_jiffies_till_stall_check();
+#endif /* #ifdef CONFIG_RCU_TRACE */
+}
+
+static void check_cpu_stalls(void)
+{
+       RCU_TRACE(check_cpu_stall(&rcu_bh_ctrlblk));
+       RCU_TRACE(check_cpu_stall(&rcu_sched_ctrlblk));
+       RCU_TRACE(check_cpu_stall_preempt());
+}
+
 #ifdef CONFIG_TINY_PREEMPT_RCU
 
 #include <linux/delay.h>
@@ -90,8 +145,31 @@ struct rcu_preempt_ctrlblk {
        u8 gpcpu;               /* Last grace period blocked by the CPU. */
        u8 completed;           /* Last grace period completed. */
                                /*  If all three are equal, RCU is idle. */
-       s8 boosted_this_gp;     /* Has boosting already happened? */
+#ifdef CONFIG_RCU_BOOST
        unsigned long boost_time; /* When to start boosting (jiffies) */
+#endif /* #ifdef CONFIG_RCU_BOOST */
+#ifdef CONFIG_RCU_TRACE
+       unsigned long n_grace_periods;
+#ifdef CONFIG_RCU_BOOST
+       unsigned long n_tasks_boosted;
+                               /* Total number of tasks boosted. */
+       unsigned long n_exp_boosts;
+                               /* Number of tasks boosted for expedited GP. */
+       unsigned long n_normal_boosts;
+                               /* Number of tasks boosted for normal GP. */
+       unsigned long n_balk_blkd_tasks;
+                               /* Refused to boost: no blocked tasks. */
+       unsigned long n_balk_exp_gp_tasks;
+                               /* Refused to boost: nothing blocking GP. */
+       unsigned long n_balk_boost_tasks;
+                               /* Refused to boost: already boosting. */
+       unsigned long n_balk_notyet;
+                               /* Refused to boost: not yet time. */
+       unsigned long n_balk_nos;
+                               /* Refused to boost: not sure why, though. */
+                               /*  This can happen due to race conditions. */
+#endif /* #ifdef CONFIG_RCU_BOOST */
+#endif /* #ifdef CONFIG_RCU_TRACE */
 };
 
 static struct rcu_preempt_ctrlblk rcu_preempt_ctrlblk = {
@@ -99,6 +177,7 @@ static struct rcu_preempt_ctrlblk rcu_preempt_ctrlblk = {
        .rcb.curtail = &rcu_preempt_ctrlblk.rcb.rcucblist,
        .nexttail = &rcu_preempt_ctrlblk.rcb.rcucblist,
        .blkd_tasks = LIST_HEAD_INIT(rcu_preempt_ctrlblk.blkd_tasks),
+       RCU_TRACE(.rcb.name = "rcu_preempt")
 };
 
 static int rcu_preempted_readers_exp(void);
@@ -115,6 +194,16 @@ static int rcu_cpu_blocking_cur_gp(void)
 /*
  * Check for a running RCU reader.  Because there is only one CPU,
  * there can be but one running RCU reader at a time.  ;-)
+ *
+ * Returns zero if there are no running readers.  Returns a positive
+ * number if there is at least one reader within its RCU read-side
+ * critical section.  Returns a negative number if an outermost reader
+ * is in the midst of exiting from its RCU read-side critical section
+ *
+ * Returns zero if there are no running readers.  Returns a positive
+ * number if there is at least one reader within its RCU read-side
+ * critical section.  Returns a negative number if an outermost reader
+ * is in the midst of exiting from its RCU read-side critical section.
  */
 static int rcu_preempt_running_reader(void)
 {
@@ -170,10 +259,58 @@ static struct list_head *rcu_next_node_entry(struct task_struct *t)
        return np;
 }
 
+#ifdef CONFIG_RCU_TRACE
+
+#ifdef CONFIG_RCU_BOOST
+static void rcu_initiate_boost_trace(void);
+#endif /* #ifdef CONFIG_RCU_BOOST */
+
+/*
+ * Dump additional statistice for TINY_PREEMPT_RCU.
+ */
+static void show_tiny_preempt_stats(struct seq_file *m)
+{
+       seq_printf(m, "rcu_preempt: qlen=%ld gp=%lu g%u/p%u/c%u tasks=%c%c%c\n",
+                  rcu_preempt_ctrlblk.rcb.qlen,
+                  rcu_preempt_ctrlblk.n_grace_periods,
+                  rcu_preempt_ctrlblk.gpnum,
+                  rcu_preempt_ctrlblk.gpcpu,
+                  rcu_preempt_ctrlblk.completed,
+                  "T."[list_empty(&rcu_preempt_ctrlblk.blkd_tasks)],
+                  "N."[!rcu_preempt_ctrlblk.gp_tasks],
+                  "E."[!rcu_preempt_ctrlblk.exp_tasks]);
+#ifdef CONFIG_RCU_BOOST
+       seq_printf(m, "%sttb=%c ntb=%lu neb=%lu nnb=%lu j=%04x bt=%04x\n",
+                  "             ",
+                  "B."[!rcu_preempt_ctrlblk.boost_tasks],
+                  rcu_preempt_ctrlblk.n_tasks_boosted,
+                  rcu_preempt_ctrlblk.n_exp_boosts,
+                  rcu_preempt_ctrlblk.n_normal_boosts,
+                  (int)(jiffies & 0xffff),
+                  (int)(rcu_preempt_ctrlblk.boost_time & 0xffff));
+       seq_printf(m, "%s: nt=%lu egt=%lu bt=%lu ny=%lu nos=%lu\n",
+                  "             balk",
+                  rcu_preempt_ctrlblk.n_balk_blkd_tasks,
+                  rcu_preempt_ctrlblk.n_balk_exp_gp_tasks,
+                  rcu_preempt_ctrlblk.n_balk_boost_tasks,
+                  rcu_preempt_ctrlblk.n_balk_notyet,
+                  rcu_preempt_ctrlblk.n_balk_nos);
+#endif /* #ifdef CONFIG_RCU_BOOST */
+}
+
+#endif /* #ifdef CONFIG_RCU_TRACE */
+
 #ifdef CONFIG_RCU_BOOST
 
 #include "rtmutex_common.h"
 
+#define RCU_BOOST_PRIO CONFIG_RCU_BOOST_PRIO
+
+/* Controls for rcu_kthread() kthread. */
+static struct task_struct *rcu_kthread_task;
+static DECLARE_WAIT_QUEUE_HEAD(rcu_kthread_wq);
+static unsigned long have_rcu_kthread_work;
+
 /*
  * Carry out RCU priority boosting on the task indicated by ->boost_tasks,
  * and advance ->boost_tasks to the next task in the ->blkd_tasks list.
@@ -182,23 +319,58 @@ static int rcu_boost(void)
 {
        unsigned long flags;
        struct rt_mutex mtx;
-       struct list_head *np;
        struct task_struct *t;
+       struct list_head *tb;
 
-       if (rcu_preempt_ctrlblk.boost_tasks == NULL)
+       if (rcu_preempt_ctrlblk.boost_tasks == NULL &&
+           rcu_preempt_ctrlblk.exp_tasks == NULL)
                return 0;  /* Nothing to boost. */
-       raw_local_irq_save(flags);
-       rcu_preempt_ctrlblk.boosted_this_gp++;
-       t = container_of(rcu_preempt_ctrlblk.boost_tasks, struct task_struct,
-                        rcu_node_entry);
-       np = rcu_next_node_entry(t);
+
+       local_irq_save(flags);
+
+       /*
+        * Recheck with irqs disabled: all tasks in need of boosting
+        * might exit their RCU read-side critical sections on their own
+        * if we are preempted just before disabling irqs.
+        */
+       if (rcu_preempt_ctrlblk.boost_tasks == NULL &&
+           rcu_preempt_ctrlblk.exp_tasks == NULL) {
+               local_irq_restore(flags);
+               return 0;
+       }
+
+       /*
+        * Preferentially boost tasks blocking expedited grace periods.
+        * This cannot starve the normal grace periods because a second
+        * expedited grace period must boost all blocked tasks, including
+        * those blocking the pre-existing normal grace period.
+        */
+       if (rcu_preempt_ctrlblk.exp_tasks != NULL) {
+               tb = rcu_preempt_ctrlblk.exp_tasks;
+               RCU_TRACE(rcu_preempt_ctrlblk.n_exp_boosts++);
+       } else {
+               tb = rcu_preempt_ctrlblk.boost_tasks;
+               RCU_TRACE(rcu_preempt_ctrlblk.n_normal_boosts++);
+       }
+       RCU_TRACE(rcu_preempt_ctrlblk.n_tasks_boosted++);
+
+       /*
+        * We boost task t by manufacturing an rt_mutex that appears to
+        * be held by task t.  We leave a pointer to that rt_mutex where
+        * task t can find it, and task t will release the mutex when it
+        * exits its outermost RCU read-side critical section.  Then
+        * simply acquiring this artificial rt_mutex will boost task
+        * t's priority.  (Thanks to tglx for suggesting this approach!)
+        */
+       t = container_of(tb, struct task_struct, rcu_node_entry);
        rt_mutex_init_proxy_locked(&mtx, t);
        t->rcu_boost_mutex = &mtx;
-       t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BOOSTED;
-       raw_local_irq_restore(flags);
+       local_irq_restore(flags);
        rt_mutex_lock(&mtx);
-       rt_mutex_unlock(&mtx);
-       return rcu_preempt_ctrlblk.boost_tasks != NULL;
+       rt_mutex_unlock(&mtx);  /* Keep lockdep happy. */
+
+       return ACCESS_ONCE(rcu_preempt_ctrlblk.boost_tasks) != NULL ||
+              ACCESS_ONCE(rcu_preempt_ctrlblk.exp_tasks) != NULL;
 }
 
 /*
@@ -206,36 +378,33 @@ static int rcu_boost(void)
  * the current grace period, and, if so, tell the rcu_kthread_task to
  * start boosting them.  If there is an expedited boost in progress,
  * we wait for it to complete.
+ *
+ * If there are no blocked readers blocking the current grace period,
+ * return 0 to let the caller know, otherwise return 1.  Note that this
+ * return value is independent of whether or not boosting was done.
  */
-static void rcu_initiate_boost(void)
+static int rcu_initiate_boost(void)
 {
-       if (rcu_preempt_ctrlblk.gp_tasks != NULL &&
-           rcu_preempt_ctrlblk.boost_tasks == NULL &&
-           rcu_preempt_ctrlblk.boosted_this_gp == 0 &&
-           ULONG_CMP_GE(jiffies, rcu_preempt_ctrlblk.boost_time)) {
-               rcu_preempt_ctrlblk.boost_tasks = rcu_preempt_ctrlblk.gp_tasks;
-               invoke_rcu_kthread();
+       if (!rcu_preempt_blocked_readers_cgp() &&
+           rcu_preempt_ctrlblk.exp_tasks == NULL) {
+               RCU_TRACE(rcu_preempt_ctrlblk.n_balk_exp_gp_tasks++);
+               return 0;
        }
-}
-
-/*
- * Initiate boosting for an expedited grace period.
- */
-static void rcu_initiate_expedited_boost(void)
-{
-       unsigned long flags;
-
-       raw_local_irq_save(flags);
-       if (!list_empty(&rcu_preempt_ctrlblk.blkd_tasks)) {
-               rcu_preempt_ctrlblk.boost_tasks =
-                       rcu_preempt_ctrlblk.blkd_tasks.next;
-               rcu_preempt_ctrlblk.boosted_this_gp = -1;
-               invoke_rcu_kthread();
+       if (rcu_preempt_ctrlblk.exp_tasks != NULL ||
+           (rcu_preempt_ctrlblk.gp_tasks != NULL &&
+            rcu_preempt_ctrlblk.boost_tasks == NULL &&
+            ULONG_CMP_GE(jiffies, rcu_preempt_ctrlblk.boost_time))) {
+               if (rcu_preempt_ctrlblk.exp_tasks == NULL)
+                       rcu_preempt_ctrlblk.boost_tasks =
+                               rcu_preempt_ctrlblk.gp_tasks;
+               invoke_rcu_callbacks();
+       } else {
+               RCU_TRACE(rcu_initiate_boost_trace());
        }
-       raw_local_irq_restore(flags);
+       return 1;
 }
 
-#define RCU_BOOST_DELAY_JIFFIES DIV_ROUND_UP(CONFIG_RCU_BOOST_DELAY * HZ, 1000);
+#define RCU_BOOST_DELAY_JIFFIES DIV_ROUND_UP(CONFIG_RCU_BOOST_DELAY * HZ, 1000)
 
 /*
  * Do priority-boost accounting for the start of a new grace period.
@@ -243,32 +412,18 @@ static void rcu_initiate_expedited_boost(void)
 static void rcu_preempt_boost_start_gp(void)
 {
        rcu_preempt_ctrlblk.boost_time = jiffies + RCU_BOOST_DELAY_JIFFIES;
-       if (rcu_preempt_ctrlblk.boosted_this_gp > 0)
-               rcu_preempt_ctrlblk.boosted_this_gp = 0;
 }
 
 #else /* #ifdef CONFIG_RCU_BOOST */
 
 /*
- * If there is no RCU priority boosting, we don't boost.
+ * If there is no RCU priority boosting, we don't initiate boosting,
+ * but we do indicate whether there are blocked readers blocking the
+ * current grace period.
  */
-static int rcu_boost(void)
-{
-       return 0;
-}
-
-/*
- * If there is no RCU priority boosting, we don't initiate boosting.
- */
-static void rcu_initiate_boost(void)
-{
-}
-
-/*
- * If there is no RCU priority boosting, we don't initiate expedited boosting.
- */
-static void rcu_initiate_expedited_boost(void)
+static int rcu_initiate_boost(void)
 {
+       return rcu_preempt_blocked_readers_cgp();
 }
 
 /*
@@ -308,13 +463,14 @@ static void rcu_preempt_cpu_qs(void)
        current->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS;
 
        /* If there is no GP then there is nothing more to do.  */
-       if (!rcu_preempt_gp_in_progress() || rcu_preempt_blocked_readers_cgp())
+       if (!rcu_preempt_gp_in_progress())
                return;
-       /* If there are blocked readers, go check up on boosting. */
-       if (rcu_preempt_blocked_readers_cgp()) {
-               rcu_initiate_boost();
+       /*
+        * Check up on boosting.  If there are readers blocking the
+        * current grace period, leave.
+        */
+       if (rcu_initiate_boost())
                return;
-       }
 
        /* Advance callbacks. */
        rcu_preempt_ctrlblk.completed = rcu_preempt_ctrlblk.gpnum;
@@ -327,7 +483,7 @@ static void rcu_preempt_cpu_qs(void)
 
        /* If there are done callbacks, cause them to be invoked. */
        if (*rcu_preempt_ctrlblk.rcb.donetail != NULL)
-               invoke_rcu_kthread();
+               invoke_rcu_callbacks();
 }
 
 /*
@@ -339,6 +495,8 @@ static void rcu_preempt_start_gp(void)
 
                /* Official start of GP. */
                rcu_preempt_ctrlblk.gpnum++;
+               RCU_TRACE(rcu_preempt_ctrlblk.n_grace_periods++);
+               reset_cpu_stall_ticks(&rcu_preempt_ctrlblk.rcb);
 
                /* Any blocked RCU readers block new GP. */
                if (rcu_preempt_blocked_readers_any())
@@ -376,7 +534,7 @@ void rcu_preempt_note_context_switch(void)
        unsigned long flags;
 
        local_irq_save(flags); /* must exclude scheduler_tick(). */
-       if (rcu_preempt_running_reader() &&
+       if (rcu_preempt_running_reader() > 0 &&
            (t->rcu_read_unlock_special & RCU_READ_UNLOCK_BLOCKED) == 0) {
 
                /* Possibly blocking in an RCU read-side critical section. */
@@ -395,6 +553,13 @@ void rcu_preempt_note_context_switch(void)
                list_add(&t->rcu_node_entry, &rcu_preempt_ctrlblk.blkd_tasks);
                if (rcu_cpu_blocking_cur_gp())
                        rcu_preempt_ctrlblk.gp_tasks = &t->rcu_node_entry;
+       } else if (rcu_preempt_running_reader() < 0 &&
+                  t->rcu_read_unlock_special) {
+               /*
+                * Complete exit from RCU read-side critical section on
+                * behalf of preempted instance of __rcu_read_unlock().
+                */
+               rcu_read_unlock_special(t);
        }
 
        /*
@@ -411,28 +576,19 @@ void rcu_preempt_note_context_switch(void)
 }
 
 /*
- * Tiny-preemptible RCU implementation for rcu_read_lock().
- * Just increment ->rcu_read_lock_nesting, shared state will be updated
- * if we block.
- */
-void __rcu_read_lock(void)
-{
-       current->rcu_read_lock_nesting++;
-       barrier();  /* needed if we ever invoke rcu_read_lock in rcutiny.c */
-}
-EXPORT_SYMBOL_GPL(__rcu_read_lock);
-
-/*
  * Handle special cases during rcu_read_unlock(), such as needing to
  * notify RCU core processing or task having blocked during the RCU
  * read-side critical section.
  */
-static void rcu_read_unlock_special(struct task_struct *t)
+void rcu_read_unlock_special(struct task_struct *t)
 {
        int empty;
        int empty_exp;
        unsigned long flags;
        struct list_head *np;
+#ifdef CONFIG_RCU_BOOST
+       struct rt_mutex *rbmp = NULL;
+#endif /* #ifdef CONFIG_RCU_BOOST */
        int special;
 
        /*
@@ -453,7 +609,7 @@ static void rcu_read_unlock_special(struct task_struct *t)
                rcu_preempt_cpu_qs();
 
        /* Hardware IRQ handlers cannot block. */
-       if (in_irq()) {
+       if (in_irq() || in_serving_softirq()) {
                local_irq_restore(flags);
                return;
        }
@@ -469,7 +625,7 @@ static void rcu_read_unlock_special(struct task_struct *t)
                empty = !rcu_preempt_blocked_readers_cgp();
                empty_exp = rcu_preempt_ctrlblk.exp_tasks == NULL;
                np = rcu_next_node_entry(t);
-               list_del(&t->rcu_node_entry);
+               list_del_init(&t->rcu_node_entry);
                if (&t->rcu_node_entry == rcu_preempt_ctrlblk.gp_tasks)
                        rcu_preempt_ctrlblk.gp_tasks = np;
                if (&t->rcu_node_entry == rcu_preempt_ctrlblk.exp_tasks)
@@ -478,7 +634,6 @@ static void rcu_read_unlock_special(struct task_struct *t)
                if (&t->rcu_node_entry == rcu_preempt_ctrlblk.boost_tasks)
                        rcu_preempt_ctrlblk.boost_tasks = np;
 #endif /* #ifdef CONFIG_RCU_BOOST */
-               INIT_LIST_HEAD(&t->rcu_node_entry);
 
                /*
                 * If this was the last task on the current list, and if
@@ -499,39 +654,16 @@ static void rcu_read_unlock_special(struct task_struct *t)
        }
 #ifdef CONFIG_RCU_BOOST
        /* Unboost self if was boosted. */
-       if (special & RCU_READ_UNLOCK_BOOSTED) {
-               t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_BOOSTED;
-               rt_mutex_unlock(t->rcu_boost_mutex);
+       if (t->rcu_boost_mutex != NULL) {
+               rbmp = t->rcu_boost_mutex;
                t->rcu_boost_mutex = NULL;
+               rt_mutex_unlock(rbmp);
        }
 #endif /* #ifdef CONFIG_RCU_BOOST */
        local_irq_restore(flags);
 }
 
 /*
- * Tiny-preemptible RCU implementation for rcu_read_unlock().
- * Decrement ->rcu_read_lock_nesting.  If the result is zero (outermost
- * rcu_read_unlock()) and ->rcu_read_unlock_special is non-zero, then
- * invoke rcu_read_unlock_special() to clean up after a context switch
- * in an RCU read-side critical section and other special cases.
- */
-void __rcu_read_unlock(void)
-{
-       struct task_struct *t = current;
-
-       barrier();  /* needed if we ever invoke rcu_read_unlock in rcutiny.c */
-       --t->rcu_read_lock_nesting;
-       barrier();  /* decrement before load of ->rcu_read_unlock_special */
-       if (t->rcu_read_lock_nesting == 0 &&
-           unlikely(ACCESS_ONCE(t->rcu_read_unlock_special)))
-               rcu_read_unlock_special(t);
-#ifdef CONFIG_PROVE_LOCKING
-       WARN_ON_ONCE(t->rcu_read_lock_nesting < 0);
-#endif /* #ifdef CONFIG_PROVE_LOCKING */
-}
-EXPORT_SYMBOL_GPL(__rcu_read_unlock);
-
-/*
  * Check for a quiescent state from the current CPU.  When a task blocks,
  * the task is recorded in the rcu_preempt_ctrlblk structure, which is
  * checked elsewhere.  This is called from the scheduling-clock interrupt.
@@ -548,10 +680,10 @@ static void rcu_preempt_check_callbacks(void)
                rcu_preempt_cpu_qs();
        if (&rcu_preempt_ctrlblk.rcb.rcucblist !=
            rcu_preempt_ctrlblk.rcb.donetail)
-               invoke_rcu_kthread();
+               invoke_rcu_callbacks();
        if (rcu_preempt_gp_in_progress() &&
            rcu_cpu_blocking_cur_gp() &&
-           rcu_preempt_running_reader())
+           rcu_preempt_running_reader() > 0)
                t->rcu_read_unlock_special |= RCU_READ_UNLOCK_NEED_QS;
 }
 
@@ -574,7 +706,7 @@ static void rcu_preempt_remove_callbacks(struct rcu_ctrlblk *rcp)
  */
 static void rcu_preempt_process_callbacks(void)
 {
-       rcu_process_callbacks(&rcu_preempt_ctrlblk.rcb);
+       __rcu_process_callbacks(&rcu_preempt_ctrlblk.rcb);
 }
 
 /*
@@ -591,25 +723,12 @@ void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
        local_irq_save(flags);
        *rcu_preempt_ctrlblk.nexttail = head;
        rcu_preempt_ctrlblk.nexttail = &head->next;
+       RCU_TRACE(rcu_preempt_ctrlblk.rcb.qlen++);
        rcu_preempt_start_gp();  /* checks to see if GP needed. */
        local_irq_restore(flags);
 }
 EXPORT_SYMBOL_GPL(call_rcu);
 
-void rcu_barrier(void)
-{
-       struct rcu_synchronize rcu;
-
-       init_rcu_head_on_stack(&rcu.head);
-       init_completion(&rcu.completion);
-       /* Will wake me after RCU finished. */
-       call_rcu(&rcu.head, wakeme_after_rcu);
-       /* Wait for it. */
-       wait_for_completion(&rcu.completion);
-       destroy_rcu_head_on_stack(&rcu.head);
-}
-EXPORT_SYMBOL_GPL(rcu_barrier);
-
 /*
  * synchronize_rcu - wait until a grace period has elapsed.
  *
@@ -621,6 +740,11 @@ EXPORT_SYMBOL_GPL(rcu_barrier);
  */
 void synchronize_rcu(void)
 {
+       rcu_lockdep_assert(!lock_is_held(&rcu_bh_lock_map) &&
+                          !lock_is_held(&rcu_lock_map) &&
+                          !lock_is_held(&rcu_sched_lock_map),
+                          "Illegal synchronize_rcu() in RCU read-side critical section");
+
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
        if (!rcu_scheduler_active)
                return;
@@ -631,7 +755,10 @@ void synchronize_rcu(void)
                return;
 
        /* Once we get past the fastpath checks, same code as rcu_barrier(). */
-       rcu_barrier();
+       if (rcu_expedited)
+               synchronize_rcu_expedited();
+       else
+               rcu_barrier();
 }
 EXPORT_SYMBOL_GPL(synchronize_rcu);
 
@@ -702,13 +829,16 @@ void synchronize_rcu_expedited(void)
        rpcp->exp_tasks = rpcp->blkd_tasks.next;
        if (rpcp->exp_tasks == &rpcp->blkd_tasks)
                rpcp->exp_tasks = NULL;
-       local_irq_restore(flags);
 
        /* Wait for tail of ->blkd_tasks list to drain. */
-       if (rcu_preempted_readers_exp())
-               rcu_initiate_expedited_boost();
+       if (!rcu_preempted_readers_exp()) {
+               local_irq_restore(flags);
+       } else {
+               rcu_initiate_boost();
+               local_irq_restore(flags);
                wait_event(sync_rcu_preempt_exp_wq,
                           !rcu_preempted_readers_exp());
+       }
 
        /* Clean up and exit. */
        barrier(); /* ensure expedited GP seen before counter increment. */
@@ -724,38 +854,23 @@ EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);
  */
 int rcu_preempt_needs_cpu(void)
 {
-       if (!rcu_preempt_running_reader())
-               rcu_preempt_cpu_qs();
        return rcu_preempt_ctrlblk.rcb.rcucblist != NULL;
 }
 
-/*
- * Check for a task exiting while in a preemptible -RCU read-side
- * critical section, clean up if so.  No need to issue warnings,
- * as debug_check_no_locks_held() already does this if lockdep
- * is enabled.
- */
-void exit_rcu(void)
-{
-       struct task_struct *t = current;
-
-       if (t->rcu_read_lock_nesting == 0)
-               return;
-       t->rcu_read_lock_nesting = 1;
-       rcu_read_unlock();
-}
-
 #else /* #ifdef CONFIG_TINY_PREEMPT_RCU */
 
+#ifdef CONFIG_RCU_TRACE
+
 /*
- * Because preemptible RCU does not exist, it is never necessary to
- * boost preempted RCU readers.
+ * Because preemptible RCU does not exist, it is not necessary to
+ * dump out its statistics.
  */
-static int rcu_boost(void)
+static void show_tiny_preempt_stats(struct seq_file *m)
 {
-       return 0;
 }
 
+#endif /* #ifdef CONFIG_RCU_TRACE */
+
 /*
  * Because preemptible RCU does not exist, it never has any callbacks
  * to check.
@@ -782,6 +897,112 @@ static void rcu_preempt_process_callbacks(void)
 
 #endif /* #else #ifdef CONFIG_TINY_PREEMPT_RCU */
 
+#ifdef CONFIG_RCU_BOOST
+
+/*
+ * Wake up rcu_kthread() to process callbacks now eligible for invocation
+ * or to boost readers.
+ */
+static void invoke_rcu_callbacks(void)
+{
+       have_rcu_kthread_work = 1;
+       if (rcu_kthread_task != NULL)
+               wake_up(&rcu_kthread_wq);
+}
+
+#ifdef CONFIG_RCU_TRACE
+
+/*
+ * Is the current CPU running the RCU-callbacks kthread?
+ * Caller must have preemption disabled.
+ */
+static bool rcu_is_callbacks_kthread(void)
+{
+       return rcu_kthread_task == current;
+}
+
+#endif /* #ifdef CONFIG_RCU_TRACE */
+
+/*
+ * This kthread invokes RCU callbacks whose grace periods have
+ * elapsed.  It is awakened as needed, and takes the place of the
+ * RCU_SOFTIRQ that is used for this purpose when boosting is disabled.
+ * This is a kthread, but it is never stopped, at least not until
+ * the system goes down.
+ */
+static int rcu_kthread(void *arg)
+{
+       unsigned long work;
+       unsigned long morework;
+       unsigned long flags;
+
+       for (;;) {
+               wait_event_interruptible(rcu_kthread_wq,
+                                        have_rcu_kthread_work != 0);
+               morework = rcu_boost();
+               local_irq_save(flags);
+               work = have_rcu_kthread_work;
+               have_rcu_kthread_work = morework;
+               local_irq_restore(flags);
+               if (work)
+                       rcu_process_callbacks(NULL);
+               schedule_timeout_interruptible(1); /* Leave CPU for others. */
+       }
+
+       return 0;  /* Not reached, but needed to shut gcc up. */
+}
+
+/*
+ * Spawn the kthread that invokes RCU callbacks.
+ */
+static int __init rcu_spawn_kthreads(void)
+{
+       struct sched_param sp;
+
+       rcu_kthread_task = kthread_run(rcu_kthread, NULL, "rcu_kthread");
+       sp.sched_priority = RCU_BOOST_PRIO;
+       sched_setscheduler_nocheck(rcu_kthread_task, SCHED_FIFO, &sp);
+       return 0;
+}
+early_initcall(rcu_spawn_kthreads);
+
+#else /* #ifdef CONFIG_RCU_BOOST */
+
+/* Hold off callback invocation until early_initcall() time. */
+static int rcu_scheduler_fully_active __read_mostly;
+
+/*
+ * Start up softirq processing of callbacks.
+ */
+void invoke_rcu_callbacks(void)
+{
+       if (rcu_scheduler_fully_active)
+               raise_softirq(RCU_SOFTIRQ);
+}
+
+#ifdef CONFIG_RCU_TRACE
+
+/*
+ * There is no callback kthread, so this thread is never it.
+ */
+static bool rcu_is_callbacks_kthread(void)
+{
+       return false;
+}
+
+#endif /* #ifdef CONFIG_RCU_TRACE */
+
+static int __init rcu_scheduler_really_started(void)
+{
+       rcu_scheduler_fully_active = 1;
+       open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
+       raise_softirq(RCU_SOFTIRQ);  /* Invoke any callbacks from early boot. */
+       return 0;
+}
+early_initcall(rcu_scheduler_really_started);
+
+#endif /* #else #ifdef CONFIG_RCU_BOOST */
+
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 #include <linux/kernel_stat.h>
 
@@ -797,8 +1018,96 @@ void __init rcu_scheduler_starting(void)
 
 #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
 
+#ifdef CONFIG_RCU_TRACE
+
 #ifdef CONFIG_RCU_BOOST
-#define RCU_BOOST_PRIO CONFIG_RCU_BOOST_PRIO
-#else /* #ifdef CONFIG_RCU_BOOST */
-#define RCU_BOOST_PRIO 1
-#endif /* #else #ifdef CONFIG_RCU_BOOST */
+
+static void rcu_initiate_boost_trace(void)
+{
+       if (list_empty(&rcu_preempt_ctrlblk.blkd_tasks))
+               rcu_preempt_ctrlblk.n_balk_blkd_tasks++;
+       else if (rcu_preempt_ctrlblk.gp_tasks == NULL &&
+                rcu_preempt_ctrlblk.exp_tasks == NULL)
+               rcu_preempt_ctrlblk.n_balk_exp_gp_tasks++;
+       else if (rcu_preempt_ctrlblk.boost_tasks != NULL)
+               rcu_preempt_ctrlblk.n_balk_boost_tasks++;
+       else if (!ULONG_CMP_GE(jiffies, rcu_preempt_ctrlblk.boost_time))
+               rcu_preempt_ctrlblk.n_balk_notyet++;
+       else
+               rcu_preempt_ctrlblk.n_balk_nos++;
+}
+
+#endif /* #ifdef CONFIG_RCU_BOOST */
+
+static void rcu_trace_sub_qlen(struct rcu_ctrlblk *rcp, int n)
+{
+       unsigned long flags;
+
+       local_irq_save(flags);
+       rcp->qlen -= n;
+       local_irq_restore(flags);
+}
+
+/*
+ * Dump statistics for TINY_RCU, such as they are.
+ */
+static int show_tiny_stats(struct seq_file *m, void *unused)
+{
+       show_tiny_preempt_stats(m);
+       seq_printf(m, "rcu_sched: qlen: %ld\n", rcu_sched_ctrlblk.qlen);
+       seq_printf(m, "rcu_bh: qlen: %ld\n", rcu_bh_ctrlblk.qlen);
+       return 0;
+}
+
+static int show_tiny_stats_open(struct inode *inode, struct file *file)
+{
+       return single_open(file, show_tiny_stats, NULL);
+}
+
+static const struct file_operations show_tiny_stats_fops = {
+       .owner = THIS_MODULE,
+       .open = show_tiny_stats_open,
+       .read = seq_read,
+       .llseek = seq_lseek,
+       .release = single_release,
+};
+
+static struct dentry *rcudir;
+
+static int __init rcutiny_trace_init(void)
+{
+       struct dentry *retval;
+
+       rcudir = debugfs_create_dir("rcu", NULL);
+       if (!rcudir)
+               goto free_out;
+       retval = debugfs_create_file("rcudata", 0444, rcudir,
+                                    NULL, &show_tiny_stats_fops);
+       if (!retval)
+               goto free_out;
+       return 0;
+free_out:
+       debugfs_remove_recursive(rcudir);
+       return 1;
+}
+
+static void __exit rcutiny_trace_cleanup(void)
+{
+       debugfs_remove_recursive(rcudir);
+}
+
+module_init(rcutiny_trace_init);
+module_exit(rcutiny_trace_cleanup);
+
+MODULE_AUTHOR("Paul E. McKenney");
+MODULE_DESCRIPTION("Read-Copy Update tracing for tiny implementation");
+MODULE_LICENSE("GPL");
+
+static void check_cpu_stall_preempt(void)
+{
+#ifdef CONFIG_TINY_PREEMPT_RCU
+       check_cpu_stall(&rcu_preempt_ctrlblk.rcb);
+#endif /* #ifdef CONFIG_TINY_PREEMPT_RCU */
+}
+
+#endif /* #ifdef CONFIG_RCU_TRACE */