Merge remote branch 'tip/x86/tsc' into fortglx/2.6.38/tip/x86/tsc
[linux-2.6.git] / arch / x86 / kernel / tsc.c
index 648fb269e5d1e800e1cdb9dffc466d167122c985..bb64beb301d982a7653af75b45c4195d0daecd91 100644 (file)
@@ -17,6 +17,8 @@
 #include <asm/time.h>
 #include <asm/delay.h>
 #include <asm/hypervisor.h>
+#include <asm/nmi.h>
+#include <asm/x86_init.h>
 
 unsigned int __read_mostly cpu_khz;    /* TSC clocks / usec, not used here */
 EXPORT_SYMBOL(cpu_khz);
@@ -48,7 +50,7 @@ u64 native_sched_clock(void)
         *   unstable. We do this because unlike Time Of Day,
         *   the scheduler clock tolerates small errors and it's
         *   very important for it to be as fast as the platform
-        *   can achive it. )
+        *   can achieve it. )
         */
        if (unlikely(tsc_disabled)) {
                /* No locking but a rare wrong value is not a big deal: */
@@ -102,10 +104,14 @@ int __init notsc_setup(char *str)
 
 __setup("notsc", notsc_setup);
 
+static int no_sched_irq_time;
+
 static int __init tsc_setup(char *str)
 {
        if (!strcmp(str, "reliable"))
                tsc_clocksource_reliable = 1;
+       if (!strncmp(str, "noirqtime", 9))
+               no_sched_irq_time = 1;
        return 1;
 }
 
@@ -400,15 +406,9 @@ unsigned long native_calibrate_tsc(void)
 {
        u64 tsc1, tsc2, delta, ref1, ref2;
        unsigned long tsc_pit_min = ULONG_MAX, tsc_ref_min = ULONG_MAX;
-       unsigned long flags, latch, ms, fast_calibrate, hv_tsc_khz;
+       unsigned long flags, latch, ms, fast_calibrate;
        int hpet = is_hpet_enabled(), i, loopmin;
 
-       hv_tsc_khz = get_hypervisor_tsc_freq();
-       if (hv_tsc_khz) {
-               printk(KERN_INFO "TSC: Frequency read from the hypervisor\n");
-               return hv_tsc_khz;
-       }
-
        local_irq_save(flags);
        fast_calibrate = quick_pit_calibrate();
        local_irq_restore(flags);
@@ -566,7 +566,7 @@ int recalibrate_cpu_khz(void)
        unsigned long cpu_khz_old = cpu_khz;
 
        if (cpu_has_tsc) {
-               tsc_khz = calibrate_tsc();
+               tsc_khz = x86_platform.calibrate_tsc();
                cpu_khz = tsc_khz;
                cpu_data(0).loops_per_jiffy =
                        cpufreq_scale(cpu_data(0).loops_per_jiffy,
@@ -630,6 +630,44 @@ static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu)
        local_irq_restore(flags);
 }
 
+static unsigned long long cyc2ns_suspend;
+
+void save_sched_clock_state(void)
+{
+       if (!sched_clock_stable)
+               return;
+
+       cyc2ns_suspend = sched_clock();
+}
+
+/*
+ * Even on processors with invariant TSC, TSC gets reset in some the
+ * ACPI system sleep states. And in some systems BIOS seem to reinit TSC to
+ * arbitrary value (still sync'd across cpu's) during resume from such sleep
+ * states. To cope up with this, recompute the cyc2ns_offset for each cpu so
+ * that sched_clock() continues from the point where it was left off during
+ * suspend.
+ */
+void restore_sched_clock_state(void)
+{
+       unsigned long long offset;
+       unsigned long flags;
+       int cpu;
+
+       if (!sched_clock_stable)
+               return;
+
+       local_irq_save(flags);
+
+       __get_cpu_var(cyc2ns_offset) = 0;
+       offset = cyc2ns_suspend - sched_clock();
+
+       for_each_possible_cpu(cpu)
+               per_cpu(cyc2ns_offset, cpu) = offset;
+
+       local_irq_restore(flags);
+}
+
 #ifdef CONFIG_CPU_FREQ
 
 /* Frequency scaling support. Adjust the TSC based timer when the cpu frequency
@@ -670,7 +708,7 @@ static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
        if ((val == CPUFREQ_PRECHANGE  && freq->old < freq->new) ||
                        (val == CPUFREQ_POSTCHANGE && freq->old > freq->new) ||
                        (val == CPUFREQ_RESUMECHANGE)) {
-               *lpj =  cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new);
+               *lpj = cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new);
 
                tsc_khz = cpufreq_scale(tsc_khz_ref, ref_freq, freq->new);
                if (!(freq->flags & CPUFREQ_CONST_LOOPS))
@@ -744,12 +782,17 @@ static cycle_t __vsyscall_fn vread_tsc(void)
 }
 #endif
 
+static void resume_tsc(struct clocksource *cs)
+{
+       clocksource_tsc.cycle_last = 0;
+}
+
 static struct clocksource clocksource_tsc = {
        .name                   = "tsc",
        .rating                 = 300,
        .read                   = read_tsc,
+       .resume                 = resume_tsc,
        .mask                   = CLOCKSOURCE_MASK(64),
-       .shift                  = 22,
        .flags                  = CLOCK_SOURCE_IS_CONTINUOUS |
                                  CLOCK_SOURCE_MUST_VERIFY,
 #ifdef CONFIG_X86_64
@@ -761,12 +804,16 @@ void mark_tsc_unstable(char *reason)
 {
        if (!tsc_unstable) {
                tsc_unstable = 1;
-               printk("Marking TSC unstable due to %s\n", reason);
+               sched_clock_stable = 0;
+               disable_sched_clock_irqtime();
+               printk(KERN_INFO "Marking TSC unstable due to %s\n", reason);
                /* Change only the rating, when not registered */
                if (clocksource_tsc.mult)
-                       clocksource_change_rating(&clocksource_tsc, 0);
-               else
+                       clocksource_mark_unstable(&clocksource_tsc);
+               else {
+                       clocksource_tsc.flags |= CLOCK_SOURCE_UNSTABLE;
                        clocksource_tsc.rating = 0;
+               }
        }
 }
 
@@ -801,7 +848,7 @@ static void __init check_system_tsc_reliable(void)
        unsigned long res_low, res_high;
 
        rdmsr_safe(MSR_GEODE_BUSCONT_CONF0, &res_low, &res_high);
-       /* Geode_LX - the OLPC CPU has a possibly a very reliable TSC */
+       /* Geode_LX - the OLPC CPU has a very reliable TSC */
        if (res_low & RTSC_SUSP)
                tsc_clocksource_reliable = 1;
 #endif
@@ -843,8 +890,6 @@ __cpuinit int unsynchronized_tsc(void)
 
 static void __init init_tsc_clocksource(void)
 {
-       clocksource_tsc.mult = clocksource_khz2mult(tsc_khz,
-                       clocksource_tsc.shift);
        if (tsc_clocksource_reliable)
                clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY;
        /* lower the rating if we already know its unstable: */
@@ -852,7 +897,7 @@ static void __init init_tsc_clocksource(void)
                clocksource_tsc.rating = 0;
                clocksource_tsc.flags &= ~CLOCK_SOURCE_IS_CONTINUOUS;
        }
-       clocksource_register(&clocksource_tsc);
+       clocksource_register_khz(&clocksource_tsc, tsc_khz);
 }
 
 void __init tsc_init(void)
@@ -860,10 +905,12 @@ void __init tsc_init(void)
        u64 lpj;
        int cpu;
 
+       x86_init.timers.tsc_pre_init();
+
        if (!cpu_has_tsc)
                return;
 
-       tsc_khz = calibrate_tsc();
+       tsc_khz = x86_platform.calibrate_tsc();
        cpu_khz = tsc_khz;
 
        if (!tsc_khz) {
@@ -871,12 +918,6 @@ void __init tsc_init(void)
                return;
        }
 
-#ifdef CONFIG_X86_64
-       if (cpu_has(&boot_cpu_data, X86_FEATURE_CONSTANT_TSC) &&
-                       (boot_cpu_data.x86_vendor == X86_VENDOR_AMD))
-               cpu_khz = calibrate_cpu();
-#endif
-
        printk("Detected %lu.%03lu MHz processor.\n",
                        (unsigned long)cpu_khz / 1000,
                        (unsigned long)cpu_khz % 1000);
@@ -896,6 +937,9 @@ void __init tsc_init(void)
        /* now allow native_sched_clock() to use rdtsc */
        tsc_disabled = 0;
 
+       if (!no_sched_irq_time)
+               enable_sched_clock_irqtime();
+
        lpj = ((u64)tsc_khz * 1000);
        do_div(lpj, HZ);
        lpj_fine = lpj;