ftrace: Add self-tests for multiple function trace users
[linux-2.6.git] / kernel / rcutree.h
index 7cc830a..e8f057e 100644 (file)
 /*
  * Define shape of hierarchy based on NR_CPUS and CONFIG_RCU_FANOUT.
  * In theory, it should be possible to add more levels straightforwardly.
- * In practice, this has not been tested, so there is probably some
- * bug somewhere.
+ * In practice, this did work well going from three levels to four.
+ * Of course, your mileage may vary.
  */
-#define MAX_RCU_LVLS 3
-#define RCU_FANOUT           (CONFIG_RCU_FANOUT)
-#define RCU_FANOUT_SQ        (RCU_FANOUT * RCU_FANOUT)
-#define RCU_FANOUT_CUBE              (RCU_FANOUT_SQ * RCU_FANOUT)
+#define MAX_RCU_LVLS 4
+#if CONFIG_RCU_FANOUT > 16
+#define RCU_FANOUT_LEAF       16
+#else /* #if CONFIG_RCU_FANOUT > 16 */
+#define RCU_FANOUT_LEAF       (CONFIG_RCU_FANOUT)
+#endif /* #else #if CONFIG_RCU_FANOUT > 16 */
+#define RCU_FANOUT_1         (RCU_FANOUT_LEAF)
+#define RCU_FANOUT_2         (RCU_FANOUT_1 * CONFIG_RCU_FANOUT)
+#define RCU_FANOUT_3         (RCU_FANOUT_2 * CONFIG_RCU_FANOUT)
+#define RCU_FANOUT_4         (RCU_FANOUT_3 * CONFIG_RCU_FANOUT)
 
-#if NR_CPUS <= RCU_FANOUT
+#if NR_CPUS <= RCU_FANOUT_1
 #  define NUM_RCU_LVLS       1
 #  define NUM_RCU_LVL_0              1
 #  define NUM_RCU_LVL_1              (NR_CPUS)
 #  define NUM_RCU_LVL_2              0
 #  define NUM_RCU_LVL_3              0
-#elif NR_CPUS <= RCU_FANOUT_SQ
+#  define NUM_RCU_LVL_4              0
+#elif NR_CPUS <= RCU_FANOUT_2
 #  define NUM_RCU_LVLS       2
 #  define NUM_RCU_LVL_0              1
-#  define NUM_RCU_LVL_1              (((NR_CPUS) + RCU_FANOUT - 1) / RCU_FANOUT)
+#  define NUM_RCU_LVL_1              DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1)
 #  define NUM_RCU_LVL_2              (NR_CPUS)
 #  define NUM_RCU_LVL_3              0
-#elif NR_CPUS <= RCU_FANOUT_CUBE
+#  define NUM_RCU_LVL_4              0
+#elif NR_CPUS <= RCU_FANOUT_3
 #  define NUM_RCU_LVLS       3
 #  define NUM_RCU_LVL_0              1
-#  define NUM_RCU_LVL_1              (((NR_CPUS) + RCU_FANOUT_SQ - 1) / RCU_FANOUT_SQ)
-#  define NUM_RCU_LVL_2              (((NR_CPUS) + (RCU_FANOUT) - 1) / (RCU_FANOUT))
-#  define NUM_RCU_LVL_3              NR_CPUS
+#  define NUM_RCU_LVL_1              DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_2)
+#  define NUM_RCU_LVL_2              DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1)
+#  define NUM_RCU_LVL_3              (NR_CPUS)
+#  define NUM_RCU_LVL_4              0
+#elif NR_CPUS <= RCU_FANOUT_4
+#  define NUM_RCU_LVLS       4
+#  define NUM_RCU_LVL_0              1
+#  define NUM_RCU_LVL_1              DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_3)
+#  define NUM_RCU_LVL_2              DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_2)
+#  define NUM_RCU_LVL_3              DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1)
+#  define NUM_RCU_LVL_4              (NR_CPUS)
 #else
 # error "CONFIG_RCU_FANOUT insufficient for NR_CPUS"
-#endif /* #if (NR_CPUS) <= RCU_FANOUT */
+#endif /* #if (NR_CPUS) <= RCU_FANOUT_1 */
 
-#define RCU_SUM (NUM_RCU_LVL_0 + NUM_RCU_LVL_1 + NUM_RCU_LVL_2 + NUM_RCU_LVL_3)
+#define RCU_SUM (NUM_RCU_LVL_0 + NUM_RCU_LVL_1 + NUM_RCU_LVL_2 + NUM_RCU_LVL_3 + NUM_RCU_LVL_4)
 #define NUM_RCU_NODES (RCU_SUM - NR_CPUS)
 
 /*
@@ -79,19 +95,67 @@ struct rcu_dynticks {
  * Definition for node within the RCU grace-period-detection hierarchy.
  */
 struct rcu_node {
-       spinlock_t lock;
+       raw_spinlock_t lock;    /* Root rcu_node's lock protects some */
+                               /*  rcu_state fields as well as following. */
+       unsigned long gpnum;    /* Current grace period for this node. */
+                               /*  This will either be equal to or one */
+                               /*  behind the root rcu_node's gpnum. */
+       unsigned long completed; /* Last GP completed for this node. */
+                               /*  This will either be equal to or one */
+                               /*  behind the root rcu_node's gpnum. */
        unsigned long qsmask;   /* CPUs or groups that need to switch in */
                                /*  order for current grace period to proceed.*/
+                               /*  In leaf rcu_node, each bit corresponds to */
+                               /*  an rcu_data structure, otherwise, each */
+                               /*  bit corresponds to a child rcu_node */
+                               /*  structure. */
+       unsigned long expmask;  /* Groups that have ->blocked_tasks[] */
+                               /*  elements that need to drain to allow the */
+                               /*  current expedited grace period to */
+                               /*  complete (only for TREE_PREEMPT_RCU). */
        unsigned long qsmaskinit;
-                               /* Per-GP initialization for qsmask. */
+                               /* Per-GP initial value for qsmask & expmask. */
        unsigned long grpmask;  /* Mask to apply to parent qsmask. */
+                               /*  Only one bit will be set in this mask. */
        int     grplo;          /* lowest-numbered CPU or group here. */
        int     grphi;          /* highest-numbered CPU or group here. */
        u8      grpnum;         /* CPU/group number for next level up. */
        u8      level;          /* root is at level 0. */
        struct rcu_node *parent;
+       struct list_head blocked_tasks[4];
+                               /* Tasks blocked in RCU read-side critsect. */
+                               /*  Grace period number (->gpnum) x blocked */
+                               /*  by tasks on the (x & 0x1) element of the */
+                               /*  blocked_tasks[] array. */
 } ____cacheline_internodealigned_in_smp;
 
+/*
+ * Do a full breadth-first scan of the rcu_node structures for the
+ * specified rcu_state structure.
+ */
+#define rcu_for_each_node_breadth_first(rsp, rnp) \
+       for ((rnp) = &(rsp)->node[0]; \
+            (rnp) < &(rsp)->node[NUM_RCU_NODES]; (rnp)++)
+
+/*
+ * Do a breadth-first scan of the non-leaf rcu_node structures for the
+ * specified rcu_state structure.  Note that if there is a singleton
+ * rcu_node tree with but one rcu_node structure, this loop is a no-op.
+ */
+#define rcu_for_each_nonleaf_node_breadth_first(rsp, rnp) \
+       for ((rnp) = &(rsp)->node[0]; \
+            (rnp) < (rsp)->level[NUM_RCU_LVLS - 1]; (rnp)++)
+
+/*
+ * Scan the leaves of the rcu_node hierarchy for the specified rcu_state
+ * structure.  Note that if there is a singleton rcu_node tree with but
+ * one rcu_node structure, this loop -will- visit the rcu_node structure.
+ * It is still a leaf node, even if it is also the root node.
+ */
+#define rcu_for_each_leaf_node(rsp, rnp) \
+       for ((rnp) = (rsp)->level[NUM_RCU_LVLS - 1]; \
+            (rnp) < &(rsp)->node[NUM_RCU_NODES]; (rnp)++)
+
 /* Index values for nxttail array in struct rcu_data. */
 #define RCU_DONE_TAIL          0       /* Also RCU_WAIT head. */
 #define RCU_WAIT_TAIL          1       /* Also RCU_NEXT_READY head. */
@@ -102,15 +166,16 @@ struct rcu_node {
 /* Per-CPU data for read-copy update. */
 struct rcu_data {
        /* 1) quiescent-state and grace-period handling : */
-       long            completed;      /* Track rsp->completed gp number */
+       unsigned long   completed;      /* Track rsp->completed gp number */
                                        /*  in order to detect GP end. */
-       long            gpnum;          /* Highest gp number that this CPU */
+       unsigned long   gpnum;          /* Highest gp number that this CPU */
                                        /*  is aware of having started. */
-       long            passed_quiesc_completed;
+       unsigned long   passed_quiesc_completed;
                                        /* Value of completed at time of qs. */
        bool            passed_quiesc;  /* User-mode/idle loop etc. */
        bool            qs_pending;     /* Core waits for quiesc state. */
        bool            beenonline;     /* CPU online at least once. */
+       bool            preemptable;    /* Preemptable RCU? */
        struct rcu_node *mynode;        /* This CPU's leaf of hierarchy */
        unsigned long grpmask;          /* Mask to apply to leaf qsmask. */
 
@@ -120,23 +185,33 @@ struct rcu_data {
         * Any of the partitions might be empty, in which case the
         * pointer to that partition will be equal to the pointer for
         * the following partition.  When the list is empty, all of
-        * the nxttail elements point to nxtlist, which is NULL.
+        * the nxttail elements point to the ->nxtlist pointer itself,
+        * which in that case is NULL.
         *
-        * [*nxttail[RCU_NEXT_READY_TAIL], NULL = *nxttail[RCU_NEXT_TAIL]):
-        *      Entries that might have arrived after current GP ended
-        * [*nxttail[RCU_WAIT_TAIL], *nxttail[RCU_NEXT_READY_TAIL]):
-        *      Entries known to have arrived before current GP ended
-        * [*nxttail[RCU_DONE_TAIL], *nxttail[RCU_WAIT_TAIL]):
-        *      Entries that batch # <= ->completed - 1: waiting for current GP
         * [nxtlist, *nxttail[RCU_DONE_TAIL]):
         *      Entries that batch # <= ->completed
         *      The grace period for these entries has completed, and
         *      the other grace-period-completed entries may be moved
         *      here temporarily in rcu_process_callbacks().
+        * [*nxttail[RCU_DONE_TAIL], *nxttail[RCU_WAIT_TAIL]):
+        *      Entries that batch # <= ->completed - 1: waiting for current GP
+        * [*nxttail[RCU_WAIT_TAIL], *nxttail[RCU_NEXT_READY_TAIL]):
+        *      Entries known to have arrived before current GP ended
+        * [*nxttail[RCU_NEXT_READY_TAIL], *nxttail[RCU_NEXT_TAIL]):
+        *      Entries that might have arrived after current GP ended
+        *      Note that the value of *nxttail[RCU_NEXT_TAIL] will
+        *      always be NULL, as this is the end of the list.
         */
        struct rcu_head *nxtlist;
        struct rcu_head **nxttail[RCU_NEXT_SIZE];
-       long            qlen;           /* # of queued callbacks */
+       long            qlen;           /* # of queued callbacks */
+       long            qlen_last_fqs_check;
+                                       /* qlen at last check for QS forcing */
+       unsigned long   n_cbs_invoked;  /* count of RCU cbs invoked. */
+       unsigned long   n_cbs_orphaned; /* RCU cbs orphaned by dying CPU */
+       unsigned long   n_cbs_adopted;  /* RCU cbs adopted from dying CPU */
+       unsigned long   n_force_qs_snap;
+                                       /* did other CPU force QS recently? */
        long            blimit;         /* Upper limit on a processed batch */
 
 #ifdef CONFIG_NO_HZ
@@ -154,22 +229,24 @@ struct rcu_data {
        unsigned long resched_ipi;      /* Sent a resched IPI. */
 
        /* 5) __rcu_pending() statistics. */
-       long n_rcu_pending;             /* rcu_pending() calls since boot. */
-       long n_rp_qs_pending;
-       long n_rp_cb_ready;
-       long n_rp_cpu_needs_gp;
-       long n_rp_gp_completed;
-       long n_rp_gp_started;
-       long n_rp_need_fqs;
-       long n_rp_need_nothing;
+       unsigned long n_rcu_pending;    /* rcu_pending() calls since boot. */
+       unsigned long n_rp_qs_pending;
+       unsigned long n_rp_report_qs;
+       unsigned long n_rp_cb_ready;
+       unsigned long n_rp_cpu_needs_gp;
+       unsigned long n_rp_gp_completed;
+       unsigned long n_rp_gp_started;
+       unsigned long n_rp_need_fqs;
+       unsigned long n_rp_need_nothing;
 
        int cpu;
 };
 
 /* Values for signaled field in struct rcu_state. */
-#define RCU_GP_INIT            0       /* Grace period being initialized. */
-#define RCU_SAVE_DYNTICK       1       /* Need to scan dyntick state. */
-#define RCU_FORCE_QS           2       /* Need to force quiescent state. */
+#define RCU_GP_IDLE            0       /* No grace period in progress. */
+#define RCU_GP_INIT            1       /* Grace period being initialized. */
+#define RCU_SAVE_DYNTICK       2       /* Need to scan dyntick state. */
+#define RCU_FORCE_QS           3       /* Need to force quiescent state. */
 #ifdef CONFIG_NO_HZ
 #define RCU_SIGNAL_INIT                RCU_SAVE_DYNTICK
 #else /* #ifdef CONFIG_NO_HZ */
@@ -178,12 +255,28 @@ struct rcu_data {
 
 #define RCU_JIFFIES_TILL_FORCE_QS       3      /* for rsp->jiffies_force_qs */
 #ifdef CONFIG_RCU_CPU_STALL_DETECTOR
-#define RCU_SECONDS_TILL_STALL_CHECK   (10 * HZ)  /* for rsp->jiffies_stall */
-#define RCU_SECONDS_TILL_STALL_RECHECK (30 * HZ)  /* for rsp->jiffies_stall */
-#define RCU_STALL_RAT_DELAY            2         /* Allow other CPUs time */
-                                                 /*  to take at least one */
-                                                 /*  scheduling clock irq */
-                                                 /*  before ratting on them. */
+
+#ifdef CONFIG_PROVE_RCU
+#define RCU_STALL_DELAY_DELTA         (5 * HZ)
+#else
+#define RCU_STALL_DELAY_DELTA         0
+#endif
+
+#define RCU_SECONDS_TILL_STALL_CHECK   (CONFIG_RCU_CPU_STALL_TIMEOUT * HZ + \
+                                       RCU_STALL_DELAY_DELTA)
+                                               /* for rsp->jiffies_stall */
+#define RCU_SECONDS_TILL_STALL_RECHECK (3 * RCU_SECONDS_TILL_STALL_CHECK + 30)
+                                               /* for rsp->jiffies_stall */
+#define RCU_STALL_RAT_DELAY            2       /* Allow other CPUs time */
+                                               /*  to take at least one */
+                                               /*  scheduling clock irq */
+                                               /*  before ratting on them. */
+
+#ifdef CONFIG_RCU_CPU_STALL_DETECTOR_RUNNABLE
+#define RCU_CPU_STALL_SUPPRESS_INIT 0
+#else
+#define RCU_CPU_STALL_SUPPRESS_INIT 1
+#endif
 
 #endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
 
@@ -202,17 +295,27 @@ struct rcu_state {
        struct rcu_node *level[NUM_RCU_LVLS];   /* Hierarchy levels. */
        u32 levelcnt[MAX_RCU_LVLS + 1];         /* # nodes in each level. */
        u8 levelspread[NUM_RCU_LVLS];           /* kids/node in each level. */
-       struct rcu_data *rda[NR_CPUS];          /* array of rdp pointers. */
+       struct rcu_data __percpu *rda;          /* pointer of percu rcu_data. */
 
        /* The following fields are guarded by the root rcu_node's lock. */
 
        u8      signaled ____cacheline_internodealigned_in_smp;
                                                /* Force QS state. */
-       long    gpnum;                          /* Current gp number. */
-       long    completed;                      /* # of last completed gp. */
-       spinlock_t onofflock;                   /* exclude on/offline and */
+       u8      fqs_active;                     /* force_quiescent_state() */
+                                               /*  is running. */
+       u8      fqs_need_gp;                    /* A CPU was prevented from */
+                                               /*  starting a new grace */
+                                               /*  period because */
+                                               /*  force_quiescent_state() */
+                                               /*  was running. */
+       unsigned long gpnum;                    /* Current gp number. */
+       unsigned long completed;                /* # of last completed gp. */
+
+       /* End of fields guarded by root rcu_node's lock. */
+
+       raw_spinlock_t onofflock;               /* exclude on/offline and */
                                                /*  starting new GP. */
-       spinlock_t fqslock;                     /* Only one task forcing */
+       raw_spinlock_t fqslock;                 /* Only one task forcing */
                                                /*  quiescent states. */
        unsigned long jiffies_force_qs;         /* Time at which to invoke */
                                                /*  force_quiescent_state(). */
@@ -228,21 +331,64 @@ struct rcu_state {
        unsigned long jiffies_stall;            /* Time at which to check */
                                                /*  for CPU stalls. */
 #endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
-#ifdef CONFIG_NO_HZ
-       long dynticks_completed;                /* Value of completed @ snap. */
-#endif /* #ifdef CONFIG_NO_HZ */
+       char *name;                             /* Name of structure. */
 };
 
-#ifdef RCU_TREE_NONCORE
+/* Return values for rcu_preempt_offline_tasks(). */
+
+#define RCU_OFL_TASKS_NORM_GP  0x1             /* Tasks blocking normal */
+                                               /*  GP were moved to root. */
+#define RCU_OFL_TASKS_EXP_GP   0x2             /* Tasks blocking expedited */
+                                               /*  GP were moved to root. */
 
 /*
  * RCU implementation internal declarations:
  */
-extern struct rcu_state rcu_state;
-DECLARE_PER_CPU(struct rcu_data, rcu_data);
+extern struct rcu_state rcu_sched_state;
+DECLARE_PER_CPU(struct rcu_data, rcu_sched_data);
 
 extern struct rcu_state rcu_bh_state;
 DECLARE_PER_CPU(struct rcu_data, rcu_bh_data);
 
-#endif /* #ifdef RCU_TREE_NONCORE */
+#ifdef CONFIG_TREE_PREEMPT_RCU
+extern struct rcu_state rcu_preempt_state;
+DECLARE_PER_CPU(struct rcu_data, rcu_preempt_data);
+#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */
+
+#ifndef RCU_TREE_NONCORE
+
+/* Forward declarations for rcutree_plugin.h */
+static void rcu_bootup_announce(void);
+long rcu_batches_completed(void);
+static void rcu_preempt_note_context_switch(int cpu);
+static int rcu_preempted_readers(struct rcu_node *rnp);
+#ifdef CONFIG_HOTPLUG_CPU
+static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp,
+                                     unsigned long flags);
+#endif /* #ifdef CONFIG_HOTPLUG_CPU */
+#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
+static void rcu_print_detail_task_stall(struct rcu_state *rsp);
+static void rcu_print_task_stall(struct rcu_node *rnp);
+static void rcu_preempt_stall_reset(void);
+#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
+static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp);
+#ifdef CONFIG_HOTPLUG_CPU
+static int rcu_preempt_offline_tasks(struct rcu_state *rsp,
+                                    struct rcu_node *rnp,
+                                    struct rcu_data *rdp);
+static void rcu_preempt_offline_cpu(int cpu);
+#endif /* #ifdef CONFIG_HOTPLUG_CPU */
+static void rcu_preempt_check_callbacks(int cpu);
+static void rcu_preempt_process_callbacks(void);
+void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu));
+#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_TREE_PREEMPT_RCU)
+static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp);
+#endif /* #if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_TREE_PREEMPT_RCU) */
+static int rcu_preempt_pending(int cpu);
+static int rcu_preempt_needs_cpu(int cpu);
+static void __cpuinit rcu_preempt_init_percpu_data(int cpu);
+static void rcu_preempt_send_cbs_to_online(void);
+static void __init __rcu_init_preempt(void);
+static void rcu_needs_cpu_flush(void);
 
+#endif /* #ifndef RCU_TREE_NONCORE */