hrtimer: prevent migration of per CPU hrtimers
Thomas Gleixner [Mon, 29 Sep 2008 13:47:42 +0000 (15:47 +0200)]
Impact: per CPU hrtimers can be migrated from a dead CPU

The hrtimer code has no knowledge about per CPU timers, but we need to
prevent the migration of such timers and warn when such a timer is
active at migration time.

Explicitely mark the timers as per CPU and use a more understandable
mode descriptor for the interrupts safe unlocked callback mode, which
is used by hrtimer_sleeper and the scheduler code.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

include/linux/hrtimer.h
kernel/hrtimer.c
kernel/sched.c
kernel/time/tick-sched.c
kernel/trace/trace_sysprof.c

index bdd88df..2f245fe 100644 (file)
@@ -47,14 +47,22 @@ enum hrtimer_restart {
  *     HRTIMER_CB_IRQSAFE:             Callback may run in hardirq context
  *     HRTIMER_CB_IRQSAFE_NO_RESTART:  Callback may run in hardirq context and
  *                                     does not restart the timer
- *     HRTIMER_CB_IRQSAFE_NO_SOFTIRQ:  Callback must run in hardirq context
- *                                     Special mode for tick emultation
+ *     HRTIMER_CB_IRQSAFE_PERCPU:      Callback must run in hardirq context
+ *                                     Special mode for tick emulation and
+ *                                     scheduler timer. Such timers are per
+ *                                     cpu and not allowed to be migrated on
+ *                                     cpu unplug.
+ *     HRTIMER_CB_IRQSAFE_UNLOCKED:    Callback should run in hardirq context
+ *                                     with timer->base lock unlocked
+ *                                     used for timers which call wakeup to
+ *                                     avoid lock order problems with rq->lock
  */
 enum hrtimer_cb_mode {
        HRTIMER_CB_SOFTIRQ,
        HRTIMER_CB_IRQSAFE,
        HRTIMER_CB_IRQSAFE_NO_RESTART,
-       HRTIMER_CB_IRQSAFE_NO_SOFTIRQ,
+       HRTIMER_CB_IRQSAFE_PERCPU,
+       HRTIMER_CB_IRQSAFE_UNLOCKED,
 };
 
 /*
index ace723d..cdec83e 100644 (file)
@@ -672,13 +672,14 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer,
                         */
                        BUG_ON(timer->function(timer) != HRTIMER_NORESTART);
                        return 1;
-               case HRTIMER_CB_IRQSAFE_NO_SOFTIRQ:
+               case HRTIMER_CB_IRQSAFE_PERCPU:
+               case HRTIMER_CB_IRQSAFE_UNLOCKED:
                        /*
                         * This is solely for the sched tick emulation with
                         * dynamic tick support to ensure that we do not
                         * restart the tick right on the edge and end up with
                         * the tick timer in the softirq ! The calling site
-                        * takes care of this.
+                        * takes care of this. Also used for hrtimer sleeper !
                         */
                        debug_hrtimer_deactivate(timer);
                        return 1;
@@ -1245,7 +1246,8 @@ static void __run_hrtimer(struct hrtimer *timer)
        timer_stats_account_hrtimer(timer);
 
        fn = timer->function;
-       if (timer->cb_mode == HRTIMER_CB_IRQSAFE_NO_SOFTIRQ) {
+       if (timer->cb_mode == HRTIMER_CB_IRQSAFE_PERCPU ||
+           timer->cb_mode == HRTIMER_CB_IRQSAFE_UNLOCKED) {
                /*
                 * Used for scheduler timers, avoid lock inversion with
                 * rq->lock and tasklist_lock.
@@ -1452,7 +1454,7 @@ void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, struct task_struct *task)
        sl->timer.function = hrtimer_wakeup;
        sl->task = task;
 #ifdef CONFIG_HIGH_RES_TIMERS
-       sl->timer.cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ;
+       sl->timer.cb_mode = HRTIMER_CB_IRQSAFE_UNLOCKED;
 #endif
 }
 
@@ -1592,7 +1594,7 @@ static void __cpuinit init_hrtimers_cpu(int cpu)
 #ifdef CONFIG_HOTPLUG_CPU
 
 static int migrate_hrtimer_list(struct hrtimer_clock_base *old_base,
-                               struct hrtimer_clock_base *new_base)
+                               struct hrtimer_clock_base *new_base, int dcpu)
 {
        struct hrtimer *timer;
        struct rb_node *node;
@@ -1604,6 +1606,18 @@ static int migrate_hrtimer_list(struct hrtimer_clock_base *old_base,
                debug_hrtimer_deactivate(timer);
 
                /*
+                * Should not happen. Per CPU timers should be
+                * canceled _before_ the migration code is called
+                */
+               if (timer->cb_mode == HRTIMER_CB_IRQSAFE_PERCPU) {
+                       __remove_hrtimer(timer, old_base,
+                                        HRTIMER_STATE_INACTIVE, 0);
+                       WARN(1, "hrtimer (%p %p)active but cpu %d dead\n",
+                            timer, timer->function, dcpu);
+                       continue;
+               }
+
+               /*
                 * Mark it as STATE_MIGRATE not INACTIVE otherwise the
                 * timer could be seen as !active and just vanish away
                 * under us on another CPU
@@ -1619,12 +1633,11 @@ static int migrate_hrtimer_list(struct hrtimer_clock_base *old_base,
                /*
                 * Happens with high res enabled when the timer was
                 * already expired and the callback mode is
-                * HRTIMER_CB_IRQSAFE_NO_SOFTIRQ
-                * (hrtimer_sleeper). The enqueue code does not move
-                * them to the soft irq pending list for
-                * performance/latency reasons, but in the migration
-                * state, we need to do that otherwise we end up with
-                * a stale timer.
+                * HRTIMER_CB_IRQSAFE_UNLOCKED (hrtimer_sleeper). The
+                * enqueue code does not move them to the soft irq
+                * pending list for performance/latency reasons, but
+                * in the migration state, we need to do that
+                * otherwise we end up with a stale timer.
                 */
                if (timer->state == HRTIMER_STATE_MIGRATE) {
                        timer->state = HRTIMER_STATE_PENDING;
@@ -1682,7 +1695,7 @@ static void migrate_hrtimers(int cpu)
 
        for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
                if (migrate_hrtimer_list(&old_base->clock_base[i],
-                                        &new_base->clock_base[i]))
+                                        &new_base->clock_base[i], cpu))
                        raise = 1;
        }
 
index 13dd2db..ad1962d 100644 (file)
@@ -201,7 +201,7 @@ void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime)
        hrtimer_init(&rt_b->rt_period_timer,
                        CLOCK_MONOTONIC, HRTIMER_MODE_REL);
        rt_b->rt_period_timer.function = sched_rt_period_timer;
-       rt_b->rt_period_timer.cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ;
+       rt_b->rt_period_timer.cb_mode = HRTIMER_CB_IRQSAFE_UNLOCKED;
 }
 
 static void start_rt_bandwidth(struct rt_bandwidth *rt_b)
@@ -1119,7 +1119,7 @@ static void init_rq_hrtick(struct rq *rq)
 
        hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
        rq->hrtick_timer.function = hrtick;
-       rq->hrtick_timer.cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ;
+       rq->hrtick_timer.cb_mode = HRTIMER_CB_IRQSAFE_PERCPU;
 }
 #else
 static inline void hrtick_clear(struct rq *rq)
index 39019b3..cb02324 100644 (file)
@@ -625,7 +625,7 @@ void tick_setup_sched_timer(void)
         */
        hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
        ts->sched_timer.function = tick_sched_timer;
-       ts->sched_timer.cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ;
+       ts->sched_timer.cb_mode = HRTIMER_CB_IRQSAFE_PERCPU;
 
        /* Get the next period (per cpu) */
        ts->sched_timer.expires = tick_init_jiffy_update();
index bb948e5..db58fb6 100644 (file)
@@ -202,7 +202,7 @@ static void start_stack_timer(int cpu)
 
        hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
        hrtimer->function = stack_trace_timer_fn;
-       hrtimer->cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ;
+       hrtimer->cb_mode = HRTIMER_CB_IRQSAFE_PERCPU;
 
        hrtimer_start(hrtimer, ns_to_ktime(sample_period), HRTIMER_MODE_REL);
 }