rcu: priority boosting for TINY_PREEMPT_RCU
[linux-2.6.git] / kernel / rcutiny_plugin.h
index 95f9239..24f4316 100644 (file)
 
 #include <linux/kthread.h>
 
+/* Global control variables for rcupdate callback mechanism. */
+struct rcu_ctrlblk {
+       struct rcu_head *rcucblist;     /* List of pending callbacks (CBs). */
+       struct rcu_head **donetail;     /* ->next pointer of last "done" CB. */
+       struct rcu_head **curtail;      /* ->next pointer of last CB. */
+};
+
+/* Definition for rcupdate control block. */
+static struct rcu_ctrlblk rcu_sched_ctrlblk = {
+       .donetail       = &rcu_sched_ctrlblk.rcucblist,
+       .curtail        = &rcu_sched_ctrlblk.rcucblist,
+};
+
+static struct rcu_ctrlblk rcu_bh_ctrlblk = {
+       .donetail       = &rcu_bh_ctrlblk.rcucblist,
+       .curtail        = &rcu_bh_ctrlblk.rcucblist,
+};
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+int rcu_scheduler_active __read_mostly;
+EXPORT_SYMBOL_GPL(rcu_scheduler_active);
+#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
+
 #ifdef CONFIG_TINY_PREEMPT_RCU
 
 #include <linux/delay.h>
@@ -48,17 +71,27 @@ struct rcu_preempt_ctrlblk {
        struct list_head *gp_tasks;
                                /* Pointer to the first task blocking the */
                                /*  current grace period, or NULL if there */
-                               /*  is not such task. */
+                               /*  is no such task. */
        struct list_head *exp_tasks;
                                /* Pointer to first task blocking the */
                                /*  current expedited grace period, or NULL */
                                /*  if there is no such task.  If there */
                                /*  is no current expedited grace period, */
                                /*  then there cannot be any such task. */
+#ifdef CONFIG_RCU_BOOST
+       struct list_head *boost_tasks;
+                               /* Pointer to first task that needs to be */
+                               /*  priority-boosted, or NULL if no priority */
+                               /*  boosting is needed.  If there is no */
+                               /*  current or expedited grace period, there */
+                               /*  can be no such task. */
+#endif /* #ifdef CONFIG_RCU_BOOST */
        u8 gpnum;               /* Current grace period. */
        u8 gpcpu;               /* Last grace period blocked by the CPU. */
        u8 completed;           /* Last grace period completed. */
                                /*  If all three are equal, RCU is idle. */
+       s8 boosted_this_gp;     /* Has boosting already happened? */
+       unsigned long boost_time; /* When to start boosting (jiffies) */
 };
 
 static struct rcu_preempt_ctrlblk rcu_preempt_ctrlblk = {
@@ -124,6 +157,130 @@ static int rcu_preempt_gp_in_progress(void)
 }
 
 /*
+ * Advance a ->blkd_tasks-list pointer to the next entry, instead
+ * returning NULL if at the end of the list.
+ */
+static struct list_head *rcu_next_node_entry(struct task_struct *t)
+{
+       struct list_head *np;
+
+       np = t->rcu_node_entry.next;
+       if (np == &rcu_preempt_ctrlblk.blkd_tasks)
+               np = NULL;
+       return np;
+}
+
+#ifdef CONFIG_RCU_BOOST
+
+#include "rtmutex_common.h"
+
+/*
+ * Carry out RCU priority boosting on the task indicated by ->boost_tasks,
+ * and advance ->boost_tasks to the next task in the ->blkd_tasks list.
+ */
+static int rcu_boost(void)
+{
+       unsigned long flags;
+       struct rt_mutex mtx;
+       struct list_head *np;
+       struct task_struct *t;
+
+       if (rcu_preempt_ctrlblk.boost_tasks == NULL)
+               return 0;  /* Nothing to boost. */
+       raw_local_irq_save(flags);
+       rcu_preempt_ctrlblk.boosted_this_gp++;
+       t = container_of(rcu_preempt_ctrlblk.boost_tasks, struct task_struct,
+                        rcu_node_entry);
+       np = rcu_next_node_entry(t);
+       rt_mutex_init_proxy_locked(&mtx, t);
+       t->rcu_boost_mutex = &mtx;
+       t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BOOSTED;
+       raw_local_irq_restore(flags);
+       rt_mutex_lock(&mtx);
+       rt_mutex_unlock(&mtx);
+       return rcu_preempt_ctrlblk.boost_tasks != NULL;
+}
+
+/*
+ * Check to see if it is now time to start boosting RCU readers blocking
+ * the current grace period, and, if so, tell the rcu_kthread_task to
+ * start boosting them.  If there is an expedited boost in progress,
+ * we wait for it to complete.
+ */
+static void rcu_initiate_boost(void)
+{
+       if (rcu_preempt_ctrlblk.gp_tasks != NULL &&
+           rcu_preempt_ctrlblk.boost_tasks == NULL &&
+           rcu_preempt_ctrlblk.boosted_this_gp == 0 &&
+           ULONG_CMP_GE(jiffies, rcu_preempt_ctrlblk.boost_time)) {
+               rcu_preempt_ctrlblk.boost_tasks = rcu_preempt_ctrlblk.gp_tasks;
+               invoke_rcu_kthread();
+       }
+}
+
+/*
+ * Initiate boosting for an expedited grace period.
+ */
+static void rcu_initiate_expedited_boost(void)
+{
+       unsigned long flags;
+
+       raw_local_irq_save(flags);
+       if (!list_empty(&rcu_preempt_ctrlblk.blkd_tasks)) {
+               rcu_preempt_ctrlblk.boost_tasks =
+                       rcu_preempt_ctrlblk.blkd_tasks.next;
+               rcu_preempt_ctrlblk.boosted_this_gp = -1;
+               invoke_rcu_kthread();
+       }
+       raw_local_irq_restore(flags);
+}
+
+#define RCU_BOOST_DELAY_JIFFIES DIV_ROUND_UP(CONFIG_RCU_BOOST_DELAY * HZ, 1000);
+
+/*
+ * Do priority-boost accounting for the start of a new grace period.
+ */
+static void rcu_preempt_boost_start_gp(void)
+{
+       rcu_preempt_ctrlblk.boost_time = jiffies + RCU_BOOST_DELAY_JIFFIES;
+       if (rcu_preempt_ctrlblk.boosted_this_gp > 0)
+               rcu_preempt_ctrlblk.boosted_this_gp = 0;
+}
+
+#else /* #ifdef CONFIG_RCU_BOOST */
+
+/*
+ * If there is no RCU priority boosting, we don't boost.
+ */
+static int rcu_boost(void)
+{
+       return 0;
+}
+
+/*
+ * If there is no RCU priority boosting, we don't initiate boosting.
+ */
+static void rcu_initiate_boost(void)
+{
+}
+
+/*
+ * If there is no RCU priority boosting, we don't initiate expedited boosting.
+ */
+static void rcu_initiate_expedited_boost(void)
+{
+}
+
+/*
+ * If there is no RCU priority boosting, nothing to do at grace-period start.
+ */
+static void rcu_preempt_boost_start_gp(void)
+{
+}
+
+#endif /* else #ifdef CONFIG_RCU_BOOST */
+
+/*
  * Record a preemptible-RCU quiescent state for the specified CPU.  Note
  * that this just means that the task currently running on the CPU is
  * in a quiescent state.  There might be any number of tasks blocked
@@ -150,12 +307,14 @@ static void rcu_preempt_cpu_qs(void)
        rcu_preempt_ctrlblk.gpcpu = rcu_preempt_ctrlblk.gpnum;
        current->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS;
 
-       /*
-        * If there is no GP, or if blocked readers are still blocking GP,
-        * then there is nothing more to do.
-        */
+       /* If there is no GP then there is nothing more to do.  */
        if (!rcu_preempt_gp_in_progress() || rcu_preempt_blocked_readers_cgp())
                return;
+       /* If there are blocked readers, go check up on boosting. */
+       if (rcu_preempt_blocked_readers_cgp()) {
+               rcu_initiate_boost();
+               return;
+       }
 
        /* Advance callbacks. */
        rcu_preempt_ctrlblk.completed = rcu_preempt_ctrlblk.gpnum;
@@ -168,7 +327,7 @@ static void rcu_preempt_cpu_qs(void)
 
        /* If there are done callbacks, cause them to be invoked. */
        if (*rcu_preempt_ctrlblk.rcb.donetail != NULL)
-               invoke_rcu_cbs();
+               invoke_rcu_kthread();
 }
 
 /*
@@ -186,6 +345,9 @@ static void rcu_preempt_start_gp(void)
                        rcu_preempt_ctrlblk.gp_tasks =
                                rcu_preempt_ctrlblk.blkd_tasks.next;
 
+               /* Set up for RCU priority boosting. */
+               rcu_preempt_boost_start_gp();
+
                /* If there is no running reader, CPU is done with GP. */
                if (!rcu_preempt_running_reader())
                        rcu_preempt_cpu_qs();
@@ -306,14 +468,16 @@ static void rcu_read_unlock_special(struct task_struct *t)
                 */
                empty = !rcu_preempt_blocked_readers_cgp();
                empty_exp = rcu_preempt_ctrlblk.exp_tasks == NULL;
-               np = t->rcu_node_entry.next;
-               if (np == &rcu_preempt_ctrlblk.blkd_tasks)
-                       np = NULL;
+               np = rcu_next_node_entry(t);
                list_del(&t->rcu_node_entry);
                if (&t->rcu_node_entry == rcu_preempt_ctrlblk.gp_tasks)
                        rcu_preempt_ctrlblk.gp_tasks = np;
                if (&t->rcu_node_entry == rcu_preempt_ctrlblk.exp_tasks)
                        rcu_preempt_ctrlblk.exp_tasks = np;
+#ifdef CONFIG_RCU_BOOST
+               if (&t->rcu_node_entry == rcu_preempt_ctrlblk.boost_tasks)
+                       rcu_preempt_ctrlblk.boost_tasks = np;
+#endif /* #ifdef CONFIG_RCU_BOOST */
                INIT_LIST_HEAD(&t->rcu_node_entry);
 
                /*
@@ -333,6 +497,14 @@ static void rcu_read_unlock_special(struct task_struct *t)
                if (!empty_exp && rcu_preempt_ctrlblk.exp_tasks == NULL)
                        rcu_report_exp_done();
        }
+#ifdef CONFIG_RCU_BOOST
+       /* Unboost self if was boosted. */
+       if (special & RCU_READ_UNLOCK_BOOSTED) {
+               t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_BOOSTED;
+               rt_mutex_unlock(t->rcu_boost_mutex);
+               t->rcu_boost_mutex = NULL;
+       }
+#endif /* #ifdef CONFIG_RCU_BOOST */
        local_irq_restore(flags);
 }
 
@@ -376,7 +548,7 @@ static void rcu_preempt_check_callbacks(void)
                rcu_preempt_cpu_qs();
        if (&rcu_preempt_ctrlblk.rcb.rcucblist !=
            rcu_preempt_ctrlblk.rcb.donetail)
-               invoke_rcu_cbs();
+               invoke_rcu_kthread();
        if (rcu_preempt_gp_in_progress() &&
            rcu_cpu_blocking_cur_gp() &&
            rcu_preempt_running_reader())
@@ -534,6 +706,7 @@ void synchronize_rcu_expedited(void)
 
        /* Wait for tail of ->blkd_tasks list to drain. */
        if (rcu_preempted_readers_exp())
+               rcu_initiate_expedited_boost();
                wait_event(sync_rcu_preempt_exp_wq,
                           !rcu_preempted_readers_exp());
 
@@ -575,6 +748,15 @@ void exit_rcu(void)
 #else /* #ifdef CONFIG_TINY_PREEMPT_RCU */
 
 /*
+ * Because preemptible RCU does not exist, it is never necessary to
+ * boost preempted RCU readers.
+ */
+static int rcu_boost(void)
+{
+       return 0;
+}
+
+/*
  * Because preemptible RCU does not exist, it never has any callbacks
  * to check.
  */
@@ -614,3 +796,9 @@ void __init rcu_scheduler_starting(void)
 }
 
 #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
+
+#ifdef CONFIG_RCU_BOOST
+#define RCU_BOOST_PRIO CONFIG_RCU_BOOST_PRIO
+#else /* #ifdef CONFIG_RCU_BOOST */
+#define RCU_BOOST_PRIO 1
+#endif /* #else #ifdef CONFIG_RCU_BOOST */