misc: nct1008: avoid reading bogus temperature
[linux-3.10.git] / kernel / timer.c
index d53ce66..15bc1b4 100644 (file)
@@ -1,7 +1,7 @@
 /*
  *  linux/kernel/timer.c
  *
- *  Kernel internal timers, basic process system calls
+ *  Kernel internal timers
  *
  *  Copyright (C) 1991, 1992  Linus Torvalds
  *
@@ -20,7 +20,7 @@
  */
 
 #include <linux/kernel_stat.h>
-#include <linux/module.h>
+#include <linux/export.h>
 #include <linux/interrupt.h>
 #include <linux/percpu.h>
 #include <linux/init.h>
@@ -39,7 +39,9 @@
 #include <linux/kallsyms.h>
 #include <linux/irq_work.h>
 #include <linux/sched.h>
+#include <linux/sched/sysctl.h>
 #include <linux/slab.h>
+#include <linux/compat.h>
 
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
@@ -63,6 +65,7 @@ EXPORT_SYMBOL(jiffies_64);
 #define TVR_SIZE (1 << TVR_BITS)
 #define TVN_MASK (TVN_SIZE - 1)
 #define TVR_MASK (TVR_SIZE - 1)
+#define MAX_TVAL ((unsigned long)((1ULL << (TVR_BITS + 4*TVN_BITS)) - 1))
 
 struct tvec {
        struct list_head vec[TVN_SIZE];
@@ -77,6 +80,7 @@ struct tvec_base {
        struct timer_list *running_timer;
        unsigned long timer_jiffies;
        unsigned long next_timer;
+       unsigned long active_timers;
        struct tvec_root tv1;
        struct tvec tv2;
        struct tvec tv3;
@@ -91,24 +95,25 @@ static DEFINE_PER_CPU(struct tvec_base *, tvec_bases) = &boot_tvec_bases;
 /* Functions below help us manage 'deferrable' flag */
 static inline unsigned int tbase_get_deferrable(struct tvec_base *base)
 {
-       return ((unsigned int)(unsigned long)base & TBASE_DEFERRABLE_FLAG);
+       return ((unsigned int)(unsigned long)base & TIMER_DEFERRABLE);
 }
 
-static inline struct tvec_base *tbase_get_base(struct tvec_base *base)
+static inline unsigned int tbase_get_irqsafe(struct tvec_base *base)
 {
-       return ((struct tvec_base *)((unsigned long)base & ~TBASE_DEFERRABLE_FLAG));
+       return ((unsigned int)(unsigned long)base & TIMER_IRQSAFE);
 }
 
-static inline void timer_set_deferrable(struct timer_list *timer)
+static inline struct tvec_base *tbase_get_base(struct tvec_base *base)
 {
-       timer->base = TBASE_MAKE_DEFERRED(timer->base);
+       return ((struct tvec_base *)((unsigned long)base & ~TIMER_FLAG_MASK));
 }
 
 static inline void
 timer_set_base(struct timer_list *timer, struct tvec_base *new_base)
 {
-       timer->base = (struct tvec_base *)((unsigned long)(new_base) |
-                                     tbase_get_deferrable(timer->base));
+       unsigned long flags = (unsigned long)timer->base & TIMER_FLAG_MASK;
+
+       timer->base = (struct tvec_base *)((unsigned long)(new_base) | flags);
 }
 
 static unsigned long round_jiffies_common(unsigned long j, int cpu,
@@ -144,9 +149,11 @@ static unsigned long round_jiffies_common(unsigned long j, int cpu,
        /* now that we have rounded, subtract the extra skew again */
        j -= cpu * 3;
 
-       if (j <= jiffies) /* rounding ate our timeout entirely; */
-               return original;
-       return j;
+       /*
+        * Make sure j is still in the future. Otherwise return the
+        * unmodified value.
+        */
+       return time_is_after_jiffies(j) ? j : original;
 }
 
 /**
@@ -330,7 +337,8 @@ void set_timer_slack(struct timer_list *timer, int slack_hz)
 }
 EXPORT_SYMBOL_GPL(set_timer_slack);
 
-static void internal_add_timer(struct tvec_base *base, struct timer_list *timer)
+static void
+__internal_add_timer(struct tvec_base *base, struct timer_list *timer)
 {
        unsigned long expires = timer->expires;
        unsigned long idx = expires - base->timer_jiffies;
@@ -356,11 +364,12 @@ static void internal_add_timer(struct tvec_base *base, struct timer_list *timer)
                vec = base->tv1.vec + (base->timer_jiffies & TVR_MASK);
        } else {
                int i;
-               /* If the timeout is larger than 0xffffffff on 64-bit
-                * architectures then we use the maximum timeout:
+               /* If the timeout is larger than MAX_TVAL (on 64-bit
+                * architectures or with CONFIG_BASE_SMALL=1) then we
+                * use the maximum timeout.
                 */
-               if (idx > 0xffffffffUL) {
-                       idx = 0xffffffffUL;
+               if (idx > MAX_TVAL) {
+                       idx = MAX_TVAL;
                        expires = idx + base->timer_jiffies;
                }
                i = (expires >> (TVR_BITS + 3 * TVN_BITS)) & TVN_MASK;
@@ -372,6 +381,19 @@ static void internal_add_timer(struct tvec_base *base, struct timer_list *timer)
        list_add_tail(&timer->entry, vec);
 }
 
+static void internal_add_timer(struct tvec_base *base, struct timer_list *timer)
+{
+       __internal_add_timer(base, timer);
+       /*
+        * Update base->active_timers and base->next_timer
+        */
+       if (!tbase_get_deferrable(timer->base)) {
+               if (time_before(timer->expires, base->next_timer))
+                       base->next_timer = timer->expires;
+               base->active_timers++;
+       }
+}
+
 #ifdef CONFIG_TIMER_STATS
 void __timer_stats_timer_set_start_info(struct timer_list *timer, void *addr)
 {
@@ -404,6 +426,11 @@ static void timer_stats_account_timer(struct timer_list *timer) {}
 
 static struct debug_obj_descr timer_debug_descr;
 
+static void *timer_debug_hint(void *addr)
+{
+       return ((struct timer_list *) addr)->function;
+}
+
 /*
  * fixup_init is called when:
  * - an active object is initialized
@@ -422,6 +449,12 @@ static int timer_fixup_init(void *addr, enum debug_obj_state state)
        }
 }
 
+/* Stub timer callback for improperly used timers. */
+static void stub_timer(unsigned long data)
+{
+       WARN_ON(1);
+}
+
 /*
  * fixup_activate is called when:
  * - an active object is activated
@@ -445,7 +478,8 @@ static int timer_fixup_activate(void *addr, enum debug_obj_state state)
                        debug_object_activate(timer, &timer_debug_descr);
                        return 0;
                } else {
-                       WARN_ON_ONCE(1);
+                       setup_timer(timer, stub_timer, 0);
+                       return 1;
                }
                return 0;
 
@@ -475,11 +509,40 @@ static int timer_fixup_free(void *addr, enum debug_obj_state state)
        }
 }
 
+/*
+ * fixup_assert_init is called when:
+ * - an untracked/uninit-ed object is found
+ */
+static int timer_fixup_assert_init(void *addr, enum debug_obj_state state)
+{
+       struct timer_list *timer = addr;
+
+       switch (state) {
+       case ODEBUG_STATE_NOTAVAILABLE:
+               if (timer->entry.prev == TIMER_ENTRY_STATIC) {
+                       /*
+                        * This is not really a fixup. The timer was
+                        * statically initialized. We just make sure that it
+                        * is tracked in the object tracker.
+                        */
+                       debug_object_init(timer, &timer_debug_descr);
+                       return 0;
+               } else {
+                       setup_timer(timer, stub_timer, 0);
+                       return 1;
+               }
+       default:
+               return 0;
+       }
+}
+
 static struct debug_obj_descr timer_debug_descr = {
-       .name           = "timer_list",
-       .fixup_init     = timer_fixup_init,
-       .fixup_activate = timer_fixup_activate,
-       .fixup_free     = timer_fixup_free,
+       .name                   = "timer_list",
+       .debug_hint             = timer_debug_hint,
+       .fixup_init             = timer_fixup_init,
+       .fixup_activate         = timer_fixup_activate,
+       .fixup_free             = timer_fixup_free,
+       .fixup_assert_init      = timer_fixup_assert_init,
 };
 
 static inline void debug_timer_init(struct timer_list *timer)
@@ -502,16 +565,19 @@ static inline void debug_timer_free(struct timer_list *timer)
        debug_object_free(timer, &timer_debug_descr);
 }
 
-static void __init_timer(struct timer_list *timer,
-                        const char *name,
-                        struct lock_class_key *key);
+static inline void debug_timer_assert_init(struct timer_list *timer)
+{
+       debug_object_assert_init(timer, &timer_debug_descr);
+}
 
-void init_timer_on_stack_key(struct timer_list *timer,
-                            const char *name,
-                            struct lock_class_key *key)
+static void do_init_timer(struct timer_list *timer, unsigned int flags,
+                         const char *name, struct lock_class_key *key);
+
+void init_timer_on_stack_key(struct timer_list *timer, unsigned int flags,
+                            const char *name, struct lock_class_key *key)
 {
        debug_object_init_on_stack(timer, &timer_debug_descr);
-       __init_timer(timer, name, key);
+       do_init_timer(timer, flags, name, key);
 }
 EXPORT_SYMBOL_GPL(init_timer_on_stack_key);
 
@@ -525,6 +591,7 @@ EXPORT_SYMBOL_GPL(destroy_timer_on_stack);
 static inline void debug_timer_init(struct timer_list *timer) { }
 static inline void debug_timer_activate(struct timer_list *timer) { }
 static inline void debug_timer_deactivate(struct timer_list *timer) { }
+static inline void debug_timer_assert_init(struct timer_list *timer) { }
 #endif
 
 static inline void debug_init(struct timer_list *timer)
@@ -546,12 +613,18 @@ static inline void debug_deactivate(struct timer_list *timer)
        trace_timer_cancel(timer);
 }
 
-static void __init_timer(struct timer_list *timer,
-                        const char *name,
-                        struct lock_class_key *key)
+static inline void debug_assert_init(struct timer_list *timer)
+{
+       debug_timer_assert_init(timer);
+}
+
+static void do_init_timer(struct timer_list *timer, unsigned int flags,
+                         const char *name, struct lock_class_key *key)
 {
+       struct tvec_base *base = __raw_get_cpu_var(tvec_bases);
+
        timer->entry.next = NULL;
-       timer->base = __raw_get_cpu_var(tvec_bases);
+       timer->base = (void *)((unsigned long)base | flags);
        timer->slack = -1;
 #ifdef CONFIG_TIMER_STATS
        timer->start_site = NULL;
@@ -561,22 +634,10 @@ static void __init_timer(struct timer_list *timer,
        lockdep_init_map(&timer->lockdep_map, name, key, 0);
 }
 
-void setup_deferrable_timer_on_stack_key(struct timer_list *timer,
-                                        const char *name,
-                                        struct lock_class_key *key,
-                                        void (*function)(unsigned long),
-                                        unsigned long data)
-{
-       timer->function = function;
-       timer->data = data;
-       init_timer_on_stack_key(timer, name, key);
-       timer_set_deferrable(timer);
-}
-EXPORT_SYMBOL_GPL(setup_deferrable_timer_on_stack_key);
-
 /**
  * init_timer_key - initialize a timer
  * @timer: the timer to be initialized
+ * @flags: timer flags
  * @name: name of the timer
  * @key: lockdep class key of the fake lock used for tracking timer
  *       sync lock dependencies
@@ -584,26 +645,15 @@ EXPORT_SYMBOL_GPL(setup_deferrable_timer_on_stack_key);
  * init_timer_key() must be done to a timer prior calling *any* of the
  * other timer functions.
  */
-void init_timer_key(struct timer_list *timer,
-                   const char *name,
-                   struct lock_class_key *key)
+void init_timer_key(struct timer_list *timer, unsigned int flags,
+                   const char *name, struct lock_class_key *key)
 {
        debug_init(timer);
-       __init_timer(timer, name, key);
+       do_init_timer(timer, flags, name, key);
 }
 EXPORT_SYMBOL(init_timer_key);
 
-void init_timer_deferrable_key(struct timer_list *timer,
-                              const char *name,
-                              struct lock_class_key *key)
-{
-       init_timer_key(timer, name, key);
-       timer_set_deferrable(timer);
-}
-EXPORT_SYMBOL(init_timer_deferrable_key);
-
-static inline void detach_timer(struct timer_list *timer,
-                               int clear_pending)
+static inline void detach_timer(struct timer_list *timer, bool clear_pending)
 {
        struct list_head *entry = &timer->entry;
 
@@ -615,6 +665,29 @@ static inline void detach_timer(struct timer_list *timer,
        entry->prev = LIST_POISON2;
 }
 
+static inline void
+detach_expired_timer(struct timer_list *timer, struct tvec_base *base)
+{
+       detach_timer(timer, true);
+       if (!tbase_get_deferrable(timer->base))
+               base->active_timers--;
+}
+
+static int detach_if_pending(struct timer_list *timer, struct tvec_base *base,
+                            bool clear_pending)
+{
+       if (!timer_pending(timer))
+               return 0;
+
+       detach_timer(timer, clear_pending);
+       if (!tbase_get_deferrable(timer->base)) {
+               base->active_timers--;
+               if (timer->expires == base->next_timer)
+                       base->next_timer = base->timer_jiffies;
+       }
+       return 1;
+}
+
 /*
  * We are using hashed locking: holding per_cpu(tvec_bases).lock
  * means that all timers which are tied to this base via timer->base are
@@ -660,22 +733,15 @@ __mod_timer(struct timer_list *timer, unsigned long expires,
 
        base = lock_timer_base(timer, &flags);
 
-       if (timer_pending(timer)) {
-               detach_timer(timer, 0);
-               if (timer->expires == base->next_timer &&
-                   !tbase_get_deferrable(timer->base))
-                       base->next_timer = base->timer_jiffies;
-               ret = 1;
-       } else {
-               if (pending_only)
-                       goto out_unlock;
-       }
+       ret = detach_if_pending(timer, base, false);
+       if (!ret && pending_only)
+               goto out_unlock;
 
        debug_activate(timer, expires);
 
        cpu = smp_processor_id();
 
-#if defined(CONFIG_NO_HZ) && defined(CONFIG_SMP)
+#if defined(CONFIG_NO_HZ_COMMON) && defined(CONFIG_SMP)
        if (!pinned && get_sysctl_timer_migration() && idle_cpu(cpu))
                cpu = get_nohz_timer_target();
 #endif
@@ -700,9 +766,6 @@ __mod_timer(struct timer_list *timer, unsigned long expires,
        }
 
        timer->expires = expires;
-       if (time_before(timer->expires, base->next_timer) &&
-           !tbase_get_deferrable(timer->base))
-               base->next_timer = timer->expires;
        internal_add_timer(base, timer);
 
 out_unlock:
@@ -743,16 +806,15 @@ unsigned long apply_slack(struct timer_list *timer, unsigned long expires)
        unsigned long expires_limit, mask;
        int bit;
 
-       expires_limit = expires;
-
        if (timer->slack >= 0) {
                expires_limit = expires + timer->slack;
        } else {
-               unsigned long now = jiffies;
+               long delta = expires - jiffies;
+
+               if (delta < 256)
+                       return expires;
 
-               /* No slack, if already expired else auto slack 0.4% */
-               if (time_after(expires, now))
-                       expires_limit = expires + (expires - now)/256;
+               expires_limit = expires + delta / 256;
        }
        mask = expires ^ expires_limit;
        if (mask == 0)
@@ -789,6 +851,8 @@ unsigned long apply_slack(struct timer_list *timer, unsigned long expires)
  */
 int mod_timer(struct timer_list *timer, unsigned long expires)
 {
+       expires = apply_slack(timer, expires);
+
        /*
         * This is a common optimization triggered by the
         * networking code - if the timer is re-modified
@@ -797,8 +861,6 @@ int mod_timer(struct timer_list *timer, unsigned long expires)
        if (timer_pending(timer) && timer->expires == expires)
                return 1;
 
-       expires = apply_slack(timer, expires);
-
        return __mod_timer(timer, expires, false, TIMER_NOT_PINNED);
 }
 EXPORT_SYMBOL(mod_timer);
@@ -810,7 +872,13 @@ EXPORT_SYMBOL(mod_timer);
  *
  * mod_timer_pinned() is a way to update the expire field of an
  * active timer (if the timer is inactive it will be activated)
- * and not allow the timer to be migrated to a different CPU.
+ * and to ensure that the timer is scheduled on the current CPU.
+ *
+ * Note that this does not prevent the timer from being migrated
+ * when the current CPU goes offline.  If this is a problem for
+ * you, use CPU-hotplug notifiers to handle it correctly, for
+ * example, cancelling the timer when the corresponding CPU goes
+ * offline.
  *
  * mod_timer_pinned(timer, expires) is equivalent to:
  *
@@ -863,19 +931,16 @@ void add_timer_on(struct timer_list *timer, int cpu)
        spin_lock_irqsave(&base->lock, flags);
        timer_set_base(timer, base);
        debug_activate(timer, timer->expires);
-       if (time_before(timer->expires, base->next_timer) &&
-           !tbase_get_deferrable(timer->base))
-               base->next_timer = timer->expires;
        internal_add_timer(base, timer);
        /*
-        * Check whether the other CPU is idle and needs to be
-        * triggered to reevaluate the timer wheel when nohz is
-        * active. We are protected against the other CPU fiddling
+        * Check whether the other CPU is in dynticks mode and needs
+        * to be triggered to reevaluate the timer wheel.
+        * We are protected against the other CPU fiddling
         * with the timer by holding the timer base lock. This also
-        * makes sure that a CPU on the way to idle can not evaluate
-        * the timer wheel.
+        * makes sure that a CPU on the way to stop its tick can not
+        * evaluate the timer wheel.
         */
-       wake_up_idle_cpu(cpu);
+       wake_up_nohz_cpu(cpu);
        spin_unlock_irqrestore(&base->lock, flags);
 }
 EXPORT_SYMBOL_GPL(add_timer_on);
@@ -897,16 +962,12 @@ int del_timer(struct timer_list *timer)
        unsigned long flags;
        int ret = 0;
 
+       debug_assert_init(timer);
+
        timer_stats_timer_clear_start_info(timer);
        if (timer_pending(timer)) {
                base = lock_timer_base(timer, &flags);
-               if (timer_pending(timer)) {
-                       detach_timer(timer, 1);
-                       if (timer->expires == base->next_timer &&
-                           !tbase_get_deferrable(timer->base))
-                               base->next_timer = base->timer_jiffies;
-                       ret = 1;
-               }
+               ret = detach_if_pending(timer, base, true);
                spin_unlock_irqrestore(&base->lock, flags);
        }
 
@@ -927,21 +988,14 @@ int try_to_del_timer_sync(struct timer_list *timer)
        unsigned long flags;
        int ret = -1;
 
-       base = lock_timer_base(timer, &flags);
+       debug_assert_init(timer);
 
-       if (base->running_timer == timer)
-               goto out;
+       base = lock_timer_base(timer, &flags);
 
-       timer_stats_timer_clear_start_info(timer);
-       ret = 0;
-       if (timer_pending(timer)) {
-               detach_timer(timer, 1);
-               if (timer->expires == base->next_timer &&
-                   !tbase_get_deferrable(timer->base))
-                       base->next_timer = base->timer_jiffies;
-               ret = 1;
+       if (base->running_timer != timer) {
+               timer_stats_timer_clear_start_info(timer);
+               ret = detach_if_pending(timer, base, true);
        }
-out:
        spin_unlock_irqrestore(&base->lock, flags);
 
        return ret;
@@ -959,10 +1013,29 @@ EXPORT_SYMBOL(try_to_del_timer_sync);
  *
  * Synchronization rules: Callers must prevent restarting of the timer,
  * otherwise this function is meaningless. It must not be called from
- * hardirq contexts. The caller must not hold locks which would prevent
- * completion of the timer's handler. The timer's handler must not call
- * add_timer_on(). Upon exit the timer is not queued and the handler is
- * not running on any CPU.
+ * interrupt contexts unless the timer is an irqsafe one. The caller must
+ * not hold locks which would prevent completion of the timer's
+ * handler. The timer's handler must not call add_timer_on(). Upon exit the
+ * timer is not queued and the handler is not running on any CPU.
+ *
+ * Note: For !irqsafe timers, you must not hold locks that are held in
+ *   interrupt context while calling this function. Even if the lock has
+ *   nothing to do with the timer in question.  Here's why:
+ *
+ *    CPU0                             CPU1
+ *    ----                             ----
+ *                                   <SOFTIRQ>
+ *                                   call_timer_fn();
+ *                                     base->running_timer = mytimer;
+ *  spin_lock_irq(somelock);
+ *                                     <IRQ>
+ *                                        spin_lock(somelock);
+ *  del_timer_sync(mytimer);
+ *   while (base->running_timer == mytimer);
+ *
+ * Now del_timer_sync() will never return and never release somelock.
+ * The interrupt on the other CPU is waiting to grab somelock but
+ * it has interrupted the softirq that CPU0 is waiting to finish.
  *
  * The function returns whether it has deactivated a pending timer or not.
  */
@@ -971,18 +1044,20 @@ int del_timer_sync(struct timer_list *timer)
 #ifdef CONFIG_LOCKDEP
        unsigned long flags;
 
-       raw_local_irq_save(flags);
-       local_bh_disable();
+       /*
+        * If lockdep gives a backtrace here, please reference
+        * the synchronization rules above.
+        */
+       local_irq_save(flags);
        lock_map_acquire(&timer->lockdep_map);
        lock_map_release(&timer->lockdep_map);
-       _local_bh_enable();
-       raw_local_irq_restore(flags);
+       local_irq_restore(flags);
 #endif
        /*
         * don't use it in hardirq context, because it
         * could lead to deadlock.
         */
-       WARN_ON(in_irq());
+       WARN_ON(in_irq() && !tbase_get_irqsafe(timer->base));
        for (;;) {
                int ret = try_to_del_timer_sync(timer);
                if (ret >= 0)
@@ -1007,7 +1082,8 @@ static int cascade(struct tvec_base *base, struct tvec *tv, int index)
         */
        list_for_each_entry_safe(timer, tmp, &tv_list, entry) {
                BUG_ON(tbase_get_base(timer->base) != base);
-               internal_add_timer(base, timer);
+               /* No accounting, while moving them */
+               __internal_add_timer(base, timer);
        }
 
        return index;
@@ -1026,7 +1102,9 @@ static void call_timer_fn(struct timer_list *timer, void (*fn)(unsigned long),
         * warnings as well as problems when looking into
         * timer->lockdep_map, make a copy and use that here.
         */
-       struct lockdep_map lockdep_map = timer->lockdep_map;
+       struct lockdep_map lockdep_map;
+
+       lockdep_copy_map(&lockdep_map, &timer->lockdep_map);
 #endif
        /*
         * Couple the lock chain with the lock chain at
@@ -1086,26 +1164,34 @@ static inline void __run_timers(struct tvec_base *base)
                while (!list_empty(head)) {
                        void (*fn)(unsigned long);
                        unsigned long data;
+                       bool irqsafe;
 
                        timer = list_first_entry(head, struct timer_list,entry);
                        fn = timer->function;
                        data = timer->data;
+                       irqsafe = tbase_get_irqsafe(timer->base);
 
                        timer_stats_account_timer(timer);
 
                        base->running_timer = timer;
-                       detach_timer(timer, 1);
-
-                       spin_unlock_irq(&base->lock);
-                       call_timer_fn(timer, fn, data);
-                       spin_lock_irq(&base->lock);
+                       detach_expired_timer(timer, base);
+
+                       if (irqsafe) {
+                               spin_unlock(&base->lock);
+                               call_timer_fn(timer, fn, data);
+                               spin_lock(&base->lock);
+                       } else {
+                               spin_unlock_irq(&base->lock);
+                               call_timer_fn(timer, fn, data);
+                               spin_lock_irq(&base->lock);
+                       }
                }
        }
        base->running_timer = NULL;
        spin_unlock_irq(&base->lock);
 }
 
-#ifdef CONFIG_NO_HZ
+#ifdef CONFIG_NO_HZ_COMMON
 /*
  * Find out when the next timer event is due to happen. This
  * is used on S/390 to stop all activity when a CPU is idle.
@@ -1232,18 +1318,21 @@ static unsigned long cmp_next_hrtimer_event(unsigned long now,
 unsigned long get_next_timer_interrupt(unsigned long now)
 {
        struct tvec_base *base = __this_cpu_read(tvec_bases);
-       unsigned long expires;
+       unsigned long expires = now + NEXT_TIMER_MAX_DELTA;
 
        /*
         * Pretend that there is no timer pending if the cpu is offline.
         * Possible pending timers will be migrated later to an active cpu.
         */
        if (cpu_is_offline(smp_processor_id()))
-               return now + NEXT_TIMER_MAX_DELTA;
+               return expires;
+
        spin_lock(&base->lock);
-       if (time_before_eq(base->next_timer, base->timer_jiffies))
-               base->next_timer = __next_timer_interrupt(base);
-       expires = base->next_timer;
+       if (base->active_timers) {
+               if (time_before_eq(base->next_timer, base->timer_jiffies))
+                       base->next_timer = __next_timer_interrupt(base);
+               expires = base->next_timer;
+       }
        spin_unlock(&base->lock);
 
        if (time_before_eq(expires, now))
@@ -1266,7 +1355,6 @@ void update_process_times(int user_tick)
        account_process_tick(p, user_tick);
        run_local_timers();
        rcu_check_callbacks(cpu, user_tick);
-       printk_tick();
 #ifdef CONFIG_IRQ_WORK
        if (in_irq())
                irq_work_run();
@@ -1297,19 +1385,6 @@ void run_local_timers(void)
        raise_softirq(TIMER_SOFTIRQ);
 }
 
-/*
- * The 64-bit jiffies value is not atomic - you MUST NOT read it
- * without sampling the sequence number in xtime_lock.
- * jiffies is defined in the linker script...
- */
-
-void do_timer(unsigned long ticks)
-{
-       jiffies_64 += ticks;
-       update_wall_time();
-       calc_global_load(ticks);
-}
-
 #ifdef __ARCH_WANT_SYS_ALARM
 
 /*
@@ -1323,70 +1398,6 @@ SYSCALL_DEFINE1(alarm, unsigned int, seconds)
 
 #endif
 
-#ifndef __alpha__
-
-/*
- * The Alpha uses getxpid, getxuid, and getxgid instead.  Maybe this
- * should be moved into arch/i386 instead?
- */
-
-/**
- * sys_getpid - return the thread group id of the current process
- *
- * Note, despite the name, this returns the tgid not the pid.  The tgid and
- * the pid are identical unless CLONE_THREAD was specified on clone() in
- * which case the tgid is the same in all threads of the same group.
- *
- * This is SMP safe as current->tgid does not change.
- */
-SYSCALL_DEFINE0(getpid)
-{
-       return task_tgid_vnr(current);
-}
-
-/*
- * Accessing ->real_parent is not SMP-safe, it could
- * change from under us. However, we can use a stale
- * value of ->real_parent under rcu_read_lock(), see
- * release_task()->call_rcu(delayed_put_task_struct).
- */
-SYSCALL_DEFINE0(getppid)
-{
-       int pid;
-
-       rcu_read_lock();
-       pid = task_tgid_vnr(current->real_parent);
-       rcu_read_unlock();
-
-       return pid;
-}
-
-SYSCALL_DEFINE0(getuid)
-{
-       /* Only we change this so SMP safe */
-       return current_uid();
-}
-
-SYSCALL_DEFINE0(geteuid)
-{
-       /* Only we change this so SMP safe */
-       return current_euid();
-}
-
-SYSCALL_DEFINE0(getgid)
-{
-       /* Only we change this so SMP safe */
-       return current_gid();
-}
-
-SYSCALL_DEFINE0(getegid)
-{
-       /* Only we change this so SMP safe */
-       return  current_egid();
-}
-
-#endif
-
 static void process_timeout(unsigned long __data)
 {
        wake_up_process((struct task_struct *)__data);
@@ -1494,91 +1505,6 @@ signed long __sched schedule_timeout_uninterruptible(signed long timeout)
 }
 EXPORT_SYMBOL(schedule_timeout_uninterruptible);
 
-/* Thread ID - the internal kernel "pid" */
-SYSCALL_DEFINE0(gettid)
-{
-       return task_pid_vnr(current);
-}
-
-/**
- * do_sysinfo - fill in sysinfo struct
- * @info: pointer to buffer to fill
- */
-int do_sysinfo(struct sysinfo *info)
-{
-       unsigned long mem_total, sav_total;
-       unsigned int mem_unit, bitcount;
-       struct timespec tp;
-
-       memset(info, 0, sizeof(struct sysinfo));
-
-       ktime_get_ts(&tp);
-       monotonic_to_bootbased(&tp);
-       info->uptime = tp.tv_sec + (tp.tv_nsec ? 1 : 0);
-
-       get_avenrun(info->loads, 0, SI_LOAD_SHIFT - FSHIFT);
-
-       info->procs = nr_threads;
-
-       si_meminfo(info);
-       si_swapinfo(info);
-
-       /*
-        * If the sum of all the available memory (i.e. ram + swap)
-        * is less than can be stored in a 32 bit unsigned long then
-        * we can be binary compatible with 2.2.x kernels.  If not,
-        * well, in that case 2.2.x was broken anyways...
-        *
-        *  -Erik Andersen <andersee@debian.org>
-        */
-
-       mem_total = info->totalram + info->totalswap;
-       if (mem_total < info->totalram || mem_total < info->totalswap)
-               goto out;
-       bitcount = 0;
-       mem_unit = info->mem_unit;
-       while (mem_unit > 1) {
-               bitcount++;
-               mem_unit >>= 1;
-               sav_total = mem_total;
-               mem_total <<= 1;
-               if (mem_total < sav_total)
-                       goto out;
-       }
-
-       /*
-        * If mem_total did not overflow, multiply all memory values by
-        * info->mem_unit and set it to 1.  This leaves things compatible
-        * with 2.2.x, and also retains compatibility with earlier 2.4.x
-        * kernels...
-        */
-
-       info->mem_unit = 1;
-       info->totalram <<= bitcount;
-       info->freeram <<= bitcount;
-       info->sharedram <<= bitcount;
-       info->bufferram <<= bitcount;
-       info->totalswap <<= bitcount;
-       info->freeswap <<= bitcount;
-       info->totalhigh <<= bitcount;
-       info->freehigh <<= bitcount;
-
-out:
-       return 0;
-}
-
-SYSCALL_DEFINE1(sysinfo, struct sysinfo __user *, info)
-{
-       struct sysinfo val;
-
-       do_sysinfo(&val);
-
-       if (copy_to_user(info, &val, sizeof(struct sysinfo)))
-               return -EFAULT;
-
-       return 0;
-}
-
 static int __cpuinit init_timers_cpu(int cpu)
 {
        int j;
@@ -1615,12 +1541,12 @@ static int __cpuinit init_timers_cpu(int cpu)
                        boot_done = 1;
                        base = &boot_tvec_bases;
                }
+               spin_lock_init(&base->lock);
                tvec_base_done[cpu] = 1;
        } else {
                base = per_cpu(tvec_bases, cpu);
        }
 
-       spin_lock_init(&base->lock);
 
        for (j = 0; j < TVN_SIZE; j++) {
                INIT_LIST_HEAD(base->tv5.vec + j);
@@ -1633,6 +1559,7 @@ static int __cpuinit init_timers_cpu(int cpu)
 
        base->timer_jiffies = jiffies;
        base->next_timer = base->timer_jiffies;
+       base->active_timers = 0;
        return 0;
 }
 
@@ -1643,11 +1570,9 @@ static void migrate_timer_list(struct tvec_base *new_base, struct list_head *hea
 
        while (!list_empty(head)) {
                timer = list_first_entry(head, struct timer_list, entry);
-               detach_timer(timer, 0);
+               /* We ignore the accounting on the dying cpu */
+               detach_timer(timer, false);
                timer_set_base(timer, new_base);
-               if (time_before(timer->expires, new_base->next_timer) &&
-                   !tbase_get_deferrable(timer->base))
-                       new_base->next_timer = timer->expires;
                internal_add_timer(new_base, timer);
        }
 }
@@ -1717,9 +1642,13 @@ static struct notifier_block __cpuinitdata timers_nb = {
 
 void __init init_timers(void)
 {
-       int err = timer_cpu_notify(&timers_nb, (unsigned long)CPU_UP_PREPARE,
-                               (void *)(long)smp_processor_id());
+       int err;
+
+       /* ensure there are enough low bits for flags in timer->base pointer */
+       BUILD_BUG_ON(__alignof__(struct tvec_base) & TIMER_FLAG_MASK);
 
+       err = timer_cpu_notify(&timers_nb, (unsigned long)CPU_UP_PREPARE,
+                              (void *)(long)smp_processor_id());
        init_timer_stats();
 
        BUG_ON(err != NOTIFY_OK);