Merge branch 'timers-for-linus-urgent' of git://git.kernel.org/pub/scm/linux/kernel...
[linux-2.6.git] / kernel / time / timekeeping.c
index 2a6d3e3e2c3eceedcbcf42c98439acb656e0dede..af4135f058254b0607fc2594f2c52ae6355e76b1 100644 (file)
@@ -165,13 +165,6 @@ struct timespec raw_time;
 /* flag for if timekeeping is suspended */
 int __read_mostly timekeeping_suspended;
 
 /* flag for if timekeeping is suspended */
 int __read_mostly timekeeping_suspended;
 
-static struct timespec xtime_cache __attribute__ ((aligned (16)));
-void update_xtime_cache(u64 nsec)
-{
-       xtime_cache = xtime;
-       timespec_add_ns(&xtime_cache, nsec);
-}
-
 /* must hold xtime_lock */
 void timekeeping_leap_insert(int leapsecond)
 {
 /* must hold xtime_lock */
 void timekeeping_leap_insert(int leapsecond)
 {
@@ -332,8 +325,6 @@ int do_settimeofday(struct timespec *tv)
 
        xtime = *tv;
 
 
        xtime = *tv;
 
-       update_xtime_cache(0);
-
        timekeeper.ntp_error = 0;
        ntp_clear();
 
        timekeeper.ntp_error = 0;
        ntp_clear();
 
@@ -487,6 +478,17 @@ int timekeeping_valid_for_hres(void)
        return ret;
 }
 
        return ret;
 }
 
+/**
+ * timekeeping_max_deferment - Returns max time the clocksource can be deferred
+ *
+ * Caller must observe xtime_lock via read_seqbegin/read_seqretry to
+ * ensure that the clocksource does not change!
+ */
+u64 timekeeping_max_deferment(void)
+{
+       return timekeeper.clock->max_idle_ns;
+}
+
 /**
  * read_persistent_clock -  Return time from the persistent clock.
  *
 /**
  * read_persistent_clock -  Return time from the persistent clock.
  *
@@ -548,7 +550,6 @@ void __init timekeeping_init(void)
        }
        set_normalized_timespec(&wall_to_monotonic,
                                -boot.tv_sec, -boot.tv_nsec);
        }
        set_normalized_timespec(&wall_to_monotonic,
                                -boot.tv_sec, -boot.tv_nsec);
-       update_xtime_cache(0);
        total_sleep_time.tv_sec = 0;
        total_sleep_time.tv_nsec = 0;
        write_sequnlock_irqrestore(&xtime_lock, flags);
        total_sleep_time.tv_sec = 0;
        total_sleep_time.tv_nsec = 0;
        write_sequnlock_irqrestore(&xtime_lock, flags);
@@ -582,7 +583,6 @@ static int timekeeping_resume(struct sys_device *dev)
                wall_to_monotonic = timespec_sub(wall_to_monotonic, ts);
                total_sleep_time = timespec_add_safe(total_sleep_time, ts);
        }
                wall_to_monotonic = timespec_sub(wall_to_monotonic, ts);
                total_sleep_time = timespec_add_safe(total_sleep_time, ts);
        }
-       update_xtime_cache(0);
        /* re-base the last cycle value */
        timekeeper.clock->cycle_last = timekeeper.clock->read(timekeeper.clock);
        timekeeper.ntp_error = 0;
        /* re-base the last cycle value */
        timekeeper.clock->cycle_last = timekeeper.clock->read(timekeeper.clock);
        timekeeper.ntp_error = 0;
@@ -722,6 +722,49 @@ static void timekeeping_adjust(s64 offset)
                                timekeeper.ntp_error_shift;
 }
 
                                timekeeper.ntp_error_shift;
 }
 
+/**
+ * logarithmic_accumulation - shifted accumulation of cycles
+ *
+ * This functions accumulates a shifted interval of cycles into
+ * into a shifted interval nanoseconds. Allows for O(log) accumulation
+ * loop.
+ *
+ * Returns the unconsumed cycles.
+ */
+static cycle_t logarithmic_accumulation(cycle_t offset, int shift)
+{
+       u64 nsecps = (u64)NSEC_PER_SEC << timekeeper.shift;
+
+       /* If the offset is smaller then a shifted interval, do nothing */
+       if (offset < timekeeper.cycle_interval<<shift)
+               return offset;
+
+       /* Accumulate one shifted interval */
+       offset -= timekeeper.cycle_interval << shift;
+       timekeeper.clock->cycle_last += timekeeper.cycle_interval << shift;
+
+       timekeeper.xtime_nsec += timekeeper.xtime_interval << shift;
+       while (timekeeper.xtime_nsec >= nsecps) {
+               timekeeper.xtime_nsec -= nsecps;
+               xtime.tv_sec++;
+               second_overflow();
+       }
+
+       /* Accumulate into raw time */
+       raw_time.tv_nsec += timekeeper.raw_interval << shift;;
+       while (raw_time.tv_nsec >= NSEC_PER_SEC) {
+               raw_time.tv_nsec -= NSEC_PER_SEC;
+               raw_time.tv_sec++;
+       }
+
+       /* Accumulate error between NTP and clock interval */
+       timekeeper.ntp_error += tick_length << shift;
+       timekeeper.ntp_error -= timekeeper.xtime_interval <<
+                               (timekeeper.ntp_error_shift + shift);
+
+       return offset;
+}
+
 /**
  * update_wall_time - Uses the current clocksource to increment the wall time
  *
 /**
  * update_wall_time - Uses the current clocksource to increment the wall time
  *
@@ -731,7 +774,7 @@ void update_wall_time(void)
 {
        struct clocksource *clock;
        cycle_t offset;
 {
        struct clocksource *clock;
        cycle_t offset;
-       u64 nsecs;
+       int shift = 0, maxshift;
 
        /* Make sure we're fully resumed: */
        if (unlikely(timekeeping_suspended))
 
        /* Make sure we're fully resumed: */
        if (unlikely(timekeeping_suspended))
@@ -745,33 +788,22 @@ void update_wall_time(void)
 #endif
        timekeeper.xtime_nsec = (s64)xtime.tv_nsec << timekeeper.shift;
 
 #endif
        timekeeper.xtime_nsec = (s64)xtime.tv_nsec << timekeeper.shift;
 
-       /* normally this loop will run just once, however in the
-        * case of lost or late ticks, it will accumulate correctly.
+       /*
+        * With NO_HZ we may have to accumulate many cycle_intervals
+        * (think "ticks") worth of time at once. To do this efficiently,
+        * we calculate the largest doubling multiple of cycle_intervals
+        * that is smaller then the offset. We then accumulate that
+        * chunk in one go, and then try to consume the next smaller
+        * doubled multiple.
         */
         */
+       shift = ilog2(offset) - ilog2(timekeeper.cycle_interval);
+       shift = max(0, shift);
+       /* Bound shift to one less then what overflows tick_length */
+       maxshift = (8*sizeof(tick_length) - (ilog2(tick_length)+1)) - 1;
+       shift = min(shift, maxshift);
        while (offset >= timekeeper.cycle_interval) {
        while (offset >= timekeeper.cycle_interval) {
-               u64 nsecps = (u64)NSEC_PER_SEC << timekeeper.shift;
-
-               /* accumulate one interval */
-               offset -= timekeeper.cycle_interval;
-               clock->cycle_last += timekeeper.cycle_interval;
-
-               timekeeper.xtime_nsec += timekeeper.xtime_interval;
-               if (timekeeper.xtime_nsec >= nsecps) {
-                       timekeeper.xtime_nsec -= nsecps;
-                       xtime.tv_sec++;
-                       second_overflow();
-               }
-
-               raw_time.tv_nsec += timekeeper.raw_interval;
-               if (raw_time.tv_nsec >= NSEC_PER_SEC) {
-                       raw_time.tv_nsec -= NSEC_PER_SEC;
-                       raw_time.tv_sec++;
-               }
-
-               /* accumulate error between NTP and clock interval */
-               timekeeper.ntp_error += tick_length;
-               timekeeper.ntp_error -= timekeeper.xtime_interval <<
-                                       timekeeper.ntp_error_shift;
+               offset = logarithmic_accumulation(offset, shift);
+               shift--;
        }
 
        /* correct the clock when NTP error is too big */
        }
 
        /* correct the clock when NTP error is too big */
@@ -807,9 +839,6 @@ void update_wall_time(void)
        timekeeper.ntp_error += timekeeper.xtime_nsec <<
                                timekeeper.ntp_error_shift;
 
        timekeeper.ntp_error += timekeeper.xtime_nsec <<
                                timekeeper.ntp_error_shift;
 
-       nsecs = clocksource_cyc2ns(offset, timekeeper.mult, timekeeper.shift);
-       update_xtime_cache(nsecs);
-
        /* check to see if there is a new clocksource to use */
        update_vsyscall(&xtime, timekeeper.clock, timekeeper.mult);
 }
        /* check to see if there is a new clocksource to use */
        update_vsyscall(&xtime, timekeeper.clock, timekeeper.mult);
 }
@@ -846,13 +875,13 @@ void monotonic_to_bootbased(struct timespec *ts)
 
 unsigned long get_seconds(void)
 {
 
 unsigned long get_seconds(void)
 {
-       return xtime_cache.tv_sec;
+       return xtime.tv_sec;
 }
 EXPORT_SYMBOL(get_seconds);
 
 struct timespec __current_kernel_time(void)
 {
 }
 EXPORT_SYMBOL(get_seconds);
 
 struct timespec __current_kernel_time(void)
 {
-       return xtime_cache;
+       return xtime;
 }
 
 struct timespec current_kernel_time(void)
 }
 
 struct timespec current_kernel_time(void)
@@ -862,8 +891,7 @@ struct timespec current_kernel_time(void)
 
        do {
                seq = read_seqbegin(&xtime_lock);
 
        do {
                seq = read_seqbegin(&xtime_lock);
-
-               now = xtime_cache;
+               now = xtime;
        } while (read_seqretry(&xtime_lock, seq));
 
        return now;
        } while (read_seqretry(&xtime_lock, seq));
 
        return now;
@@ -877,8 +905,7 @@ struct timespec get_monotonic_coarse(void)
 
        do {
                seq = read_seqbegin(&xtime_lock);
 
        do {
                seq = read_seqbegin(&xtime_lock);
-
-               now = xtime_cache;
+               now = xtime;
                mono = wall_to_monotonic;
        } while (read_seqretry(&xtime_lock, seq));
 
                mono = wall_to_monotonic;
        } while (read_seqretry(&xtime_lock, seq));