8f4e4d5bc560d58138f24ab514902f18a175bd57
[linux-2.6.git] / arch / i386 / kernel / timers / timer_tsc.c
1 /*
2  * This code largely moved from arch/i386/kernel/time.c.
3  * See comments there for proper credits.
4  *
5  * 2004-06-25    Jesper Juhl
6  *      moved mark_offset_tsc below cpufreq_delayed_get to avoid gcc 3.4
7  *      failing to inline.
8  */
9
10 #include <linux/spinlock.h>
11 #include <linux/init.h>
12 #include <linux/timex.h>
13 #include <linux/errno.h>
14 #include <linux/cpufreq.h>
15 #include <linux/string.h>
16 #include <linux/jiffies.h>
17
18 #include <asm/timer.h>
19 #include <asm/io.h>
20 /* processor.h for distable_tsc flag */
21 #include <asm/processor.h>
22
23 #include "io_ports.h"
24 #include "mach_timer.h"
25
26 #include <asm/hpet.h>
27 #include <asm/i8253.h>
28
29 #ifdef CONFIG_HPET_TIMER
30 static unsigned long hpet_usec_quotient;
31 static unsigned long hpet_last;
32 static struct timer_opts timer_tsc;
33 #endif
34
35 static inline void cpufreq_delayed_get(void);
36
37 int tsc_disable __devinitdata = 0;
38
39 static int use_tsc;
40 /* Number of usecs that the last interrupt was delayed */
41 static int delay_at_last_interrupt;
42
43 static unsigned long last_tsc_low; /* lsb 32 bits of Time Stamp Counter */
44 static unsigned long last_tsc_high; /* msb 32 bits of Time Stamp Counter */
45 static unsigned long long monotonic_base;
46 static seqlock_t monotonic_lock = SEQLOCK_UNLOCKED;
47
48 /* convert from cycles(64bits) => nanoseconds (64bits)
49  *  basic equation:
50  *              ns = cycles / (freq / ns_per_sec)
51  *              ns = cycles * (ns_per_sec / freq)
52  *              ns = cycles * (10^9 / (cpu_mhz * 10^6))
53  *              ns = cycles * (10^3 / cpu_mhz)
54  *
55  *      Then we use scaling math (suggested by george@mvista.com) to get:
56  *              ns = cycles * (10^3 * SC / cpu_mhz) / SC
57  *              ns = cycles * cyc2ns_scale / SC
58  *
59  *      And since SC is a constant power of two, we can convert the div
60  *  into a shift.   
61  *                      -johnstul@us.ibm.com "math is hard, lets go shopping!"
62  */
63 static unsigned long cyc2ns_scale; 
64 #define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */
65
66 static inline void set_cyc2ns_scale(unsigned long cpu_mhz)
67 {
68         cyc2ns_scale = (1000 << CYC2NS_SCALE_FACTOR)/cpu_mhz;
69 }
70
71 static inline unsigned long long cycles_2_ns(unsigned long long cyc)
72 {
73         return (cyc * cyc2ns_scale) >> CYC2NS_SCALE_FACTOR;
74 }
75
76 static int count2; /* counter for mark_offset_tsc() */
77
78 /* Cached *multiplier* to convert TSC counts to microseconds.
79  * (see the equation below).
80  * Equal to 2^32 * (1 / (clocks per usec) ).
81  * Initialized in time_init.
82  */
83 static unsigned long fast_gettimeoffset_quotient;
84
85 static unsigned long get_offset_tsc(void)
86 {
87         register unsigned long eax, edx;
88
89         /* Read the Time Stamp Counter */
90
91         rdtsc(eax,edx);
92
93         /* .. relative to previous jiffy (32 bits is enough) */
94         eax -= last_tsc_low;    /* tsc_low delta */
95
96         /*
97          * Time offset = (tsc_low delta) * fast_gettimeoffset_quotient
98          *             = (tsc_low delta) * (usecs_per_clock)
99          *             = (tsc_low delta) * (usecs_per_jiffy / clocks_per_jiffy)
100          *
101          * Using a mull instead of a divl saves up to 31 clock cycles
102          * in the critical path.
103          */
104
105         __asm__("mull %2"
106                 :"=a" (eax), "=d" (edx)
107                 :"rm" (fast_gettimeoffset_quotient),
108                  "0" (eax));
109
110         /* our adjusted time offset in microseconds */
111         return delay_at_last_interrupt + edx;
112 }
113
114 static unsigned long long monotonic_clock_tsc(void)
115 {
116         unsigned long long last_offset, this_offset, base;
117         unsigned seq;
118         
119         /* atomically read monotonic base & last_offset */
120         do {
121                 seq = read_seqbegin(&monotonic_lock);
122                 last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
123                 base = monotonic_base;
124         } while (read_seqretry(&monotonic_lock, seq));
125
126         /* Read the Time Stamp Counter */
127         rdtscll(this_offset);
128
129         /* return the value in ns */
130         return base + cycles_2_ns(this_offset - last_offset);
131 }
132
133 /*
134  * Scheduler clock - returns current time in nanosec units.
135  */
136 unsigned long long sched_clock(void)
137 {
138         unsigned long long this_offset;
139
140         /*
141          * In the NUMA case we dont use the TSC as they are not
142          * synchronized across all CPUs.
143          */
144 #ifndef CONFIG_NUMA
145         if (!use_tsc)
146 #endif
147                 /* no locking but a rare wrong value is not a big deal */
148                 return jiffies_64 * (1000000000 / HZ);
149
150         /* Read the Time Stamp Counter */
151         rdtscll(this_offset);
152
153         /* return the value in ns */
154         return cycles_2_ns(this_offset);
155 }
156
157 static void delay_tsc(unsigned long loops)
158 {
159         unsigned long bclock, now;
160         
161         rdtscl(bclock);
162         do
163         {
164                 rep_nop();
165                 rdtscl(now);
166         } while ((now-bclock) < loops);
167 }
168
169 #ifdef CONFIG_HPET_TIMER
170 static void mark_offset_tsc_hpet(void)
171 {
172         unsigned long long this_offset, last_offset;
173         unsigned long offset, temp, hpet_current;
174
175         write_seqlock(&monotonic_lock);
176         last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
177         /*
178          * It is important that these two operations happen almost at
179          * the same time. We do the RDTSC stuff first, since it's
180          * faster. To avoid any inconsistencies, we need interrupts
181          * disabled locally.
182          */
183         /*
184          * Interrupts are just disabled locally since the timer irq
185          * has the SA_INTERRUPT flag set. -arca
186          */
187         /* read Pentium cycle counter */
188
189         hpet_current = hpet_readl(HPET_COUNTER);
190         rdtsc(last_tsc_low, last_tsc_high);
191
192         /* lost tick compensation */
193         offset = hpet_readl(HPET_T0_CMP) - hpet_tick;
194         if (unlikely(((offset - hpet_last) > hpet_tick) && (hpet_last != 0))) {
195                 int lost_ticks = (offset - hpet_last) / hpet_tick;
196                 jiffies_64 += lost_ticks;
197         }
198         hpet_last = hpet_current;
199
200         /* update the monotonic base value */
201         this_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
202         monotonic_base += cycles_2_ns(this_offset - last_offset);
203         write_sequnlock(&monotonic_lock);
204
205         /* calculate delay_at_last_interrupt */
206         /*
207          * Time offset = (hpet delta) * ( usecs per HPET clock )
208          *             = (hpet delta) * ( usecs per tick / HPET clocks per tick)
209          *             = (hpet delta) * ( hpet_usec_quotient ) / (2^32)
210          * Where,
211          * hpet_usec_quotient = (2^32 * usecs per tick)/HPET clocks per tick
212          */
213         delay_at_last_interrupt = hpet_current - offset;
214         ASM_MUL64_REG(temp, delay_at_last_interrupt,
215                         hpet_usec_quotient, delay_at_last_interrupt);
216 }
217 #endif
218
219
220 #ifdef CONFIG_CPU_FREQ
221 #include <linux/workqueue.h>
222
223 static unsigned int cpufreq_delayed_issched = 0;
224 static unsigned int cpufreq_init = 0;
225 static struct work_struct cpufreq_delayed_get_work;
226
227 static void handle_cpufreq_delayed_get(void *v)
228 {
229         unsigned int cpu;
230         for_each_online_cpu(cpu) {
231                 cpufreq_get(cpu);
232         }
233         cpufreq_delayed_issched = 0;
234 }
235
236 /* if we notice lost ticks, schedule a call to cpufreq_get() as it tries
237  * to verify the CPU frequency the timing core thinks the CPU is running
238  * at is still correct.
239  */
240 static inline void cpufreq_delayed_get(void) 
241 {
242         if (cpufreq_init && !cpufreq_delayed_issched) {
243                 cpufreq_delayed_issched = 1;
244                 printk(KERN_DEBUG "Losing some ticks... checking if CPU frequency changed.\n");
245                 schedule_work(&cpufreq_delayed_get_work);
246         }
247 }
248
249 /* If the CPU frequency is scaled, TSC-based delays will need a different
250  * loops_per_jiffy value to function properly.
251  */
252
253 static unsigned int  ref_freq = 0;
254 static unsigned long loops_per_jiffy_ref = 0;
255
256 #ifndef CONFIG_SMP
257 static unsigned long fast_gettimeoffset_ref = 0;
258 static unsigned int cpu_khz_ref = 0;
259 #endif
260
261 static int
262 time_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
263                        void *data)
264 {
265         struct cpufreq_freqs *freq = data;
266
267         if (val != CPUFREQ_RESUMECHANGE)
268                 write_seqlock_irq(&xtime_lock);
269         if (!ref_freq) {
270                 ref_freq = freq->old;
271                 loops_per_jiffy_ref = cpu_data[freq->cpu].loops_per_jiffy;
272 #ifndef CONFIG_SMP
273                 fast_gettimeoffset_ref = fast_gettimeoffset_quotient;
274                 cpu_khz_ref = cpu_khz;
275 #endif
276         }
277
278         if ((val == CPUFREQ_PRECHANGE  && freq->old < freq->new) ||
279             (val == CPUFREQ_POSTCHANGE && freq->old > freq->new) ||
280             (val == CPUFREQ_RESUMECHANGE)) {
281                 if (!(freq->flags & CPUFREQ_CONST_LOOPS))
282                         cpu_data[freq->cpu].loops_per_jiffy = cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new);
283 #ifndef CONFIG_SMP
284                 if (cpu_khz)
285                         cpu_khz = cpufreq_scale(cpu_khz_ref, ref_freq, freq->new);
286                 if (use_tsc) {
287                         if (!(freq->flags & CPUFREQ_CONST_LOOPS)) {
288                                 fast_gettimeoffset_quotient = cpufreq_scale(fast_gettimeoffset_ref, freq->new, ref_freq);
289                                 set_cyc2ns_scale(cpu_khz/1000);
290                         }
291                 }
292 #endif
293         }
294
295         if (val != CPUFREQ_RESUMECHANGE)
296                 write_sequnlock_irq(&xtime_lock);
297
298         return 0;
299 }
300
301 static struct notifier_block time_cpufreq_notifier_block = {
302         .notifier_call  = time_cpufreq_notifier
303 };
304
305
306 static int __init cpufreq_tsc(void)
307 {
308         int ret;
309         INIT_WORK(&cpufreq_delayed_get_work, handle_cpufreq_delayed_get, NULL);
310         ret = cpufreq_register_notifier(&time_cpufreq_notifier_block,
311                                         CPUFREQ_TRANSITION_NOTIFIER);
312         if (!ret)
313                 cpufreq_init = 1;
314         return ret;
315 }
316 core_initcall(cpufreq_tsc);
317
318 #else /* CONFIG_CPU_FREQ */
319 static inline void cpufreq_delayed_get(void) { return; }
320 #endif 
321
322 int recalibrate_cpu_khz(void)
323 {
324 #ifndef CONFIG_SMP
325         unsigned int cpu_khz_old = cpu_khz;
326
327         if (cpu_has_tsc) {
328                 init_cpu_khz();
329                 cpu_data[0].loops_per_jiffy =
330                     cpufreq_scale(cpu_data[0].loops_per_jiffy,
331                                   cpu_khz_old,
332                                   cpu_khz);
333                 return 0;
334         } else
335                 return -ENODEV;
336 #else
337         return -ENODEV;
338 #endif
339 }
340 EXPORT_SYMBOL(recalibrate_cpu_khz);
341
342 static void mark_offset_tsc(void)
343 {
344         unsigned long lost,delay;
345         unsigned long delta = last_tsc_low;
346         int count;
347         int countmp;
348         static int count1 = 0;
349         unsigned long long this_offset, last_offset;
350         static int lost_count = 0;
351
352         write_seqlock(&monotonic_lock);
353         last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
354         /*
355          * It is important that these two operations happen almost at
356          * the same time. We do the RDTSC stuff first, since it's
357          * faster. To avoid any inconsistencies, we need interrupts
358          * disabled locally.
359          */
360
361         /*
362          * Interrupts are just disabled locally since the timer irq
363          * has the SA_INTERRUPT flag set. -arca
364          */
365
366         /* read Pentium cycle counter */
367
368         rdtsc(last_tsc_low, last_tsc_high);
369
370         spin_lock(&i8253_lock);
371         outb_p(0x00, PIT_MODE);     /* latch the count ASAP */
372
373         count = inb_p(PIT_CH0);    /* read the latched count */
374         count |= inb(PIT_CH0) << 8;
375
376         /*
377          * VIA686a test code... reset the latch if count > max + 1
378          * from timer_pit.c - cjb
379          */
380         if (count > LATCH) {
381                 outb_p(0x34, PIT_MODE);
382                 outb_p(LATCH & 0xff, PIT_CH0);
383                 outb(LATCH >> 8, PIT_CH0);
384                 count = LATCH - 1;
385         }
386
387         spin_unlock(&i8253_lock);
388
389         if (pit_latch_buggy) {
390                 /* get center value of last 3 time lutch */
391                 if ((count2 >= count && count >= count1)
392                     || (count1 >= count && count >= count2)) {
393                         count2 = count1; count1 = count;
394                 } else if ((count1 >= count2 && count2 >= count)
395                            || (count >= count2 && count2 >= count1)) {
396                         countmp = count;count = count2;
397                         count2 = count1;count1 = countmp;
398                 } else {
399                         count2 = count1; count1 = count; count = count1;
400                 }
401         }
402
403         /* lost tick compensation */
404         delta = last_tsc_low - delta;
405         {
406                 register unsigned long eax, edx;
407                 eax = delta;
408                 __asm__("mull %2"
409                 :"=a" (eax), "=d" (edx)
410                 :"rm" (fast_gettimeoffset_quotient),
411                  "0" (eax));
412                 delta = edx;
413         }
414         delta += delay_at_last_interrupt;
415         lost = delta/(1000000/HZ);
416         delay = delta%(1000000/HZ);
417         if (lost >= 2) {
418                 jiffies_64 += lost-1;
419
420                 /* sanity check to ensure we're not always losing ticks */
421                 if (lost_count++ > 100) {
422                         printk(KERN_WARNING "Losing too many ticks!\n");
423                         printk(KERN_WARNING "TSC cannot be used as a timesource.  \n");
424                         printk(KERN_WARNING "Possible reasons for this are:\n");
425                         printk(KERN_WARNING "  You're running with Speedstep,\n");
426                         printk(KERN_WARNING "  You don't have DMA enabled for your hard disk (see hdparm),\n");
427                         printk(KERN_WARNING "  Incorrect TSC synchronization on an SMP system (see dmesg).\n");
428                         printk(KERN_WARNING "Falling back to a sane timesource now.\n");
429
430                         clock_fallback();
431                 }
432                 /* ... but give the TSC a fair chance */
433                 if (lost_count > 25)
434                         cpufreq_delayed_get();
435         } else
436                 lost_count = 0;
437         /* update the monotonic base value */
438         this_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
439         monotonic_base += cycles_2_ns(this_offset - last_offset);
440         write_sequnlock(&monotonic_lock);
441
442         /* calculate delay_at_last_interrupt */
443         count = ((LATCH-1) - count) * TICK_SIZE;
444         delay_at_last_interrupt = (count + LATCH/2) / LATCH;
445
446         /* catch corner case where tick rollover occured
447          * between tsc and pit reads (as noted when
448          * usec delta is > 90% # of usecs/tick)
449          */
450         if (lost && abs(delay - delay_at_last_interrupt) > (900000/HZ))
451                 jiffies_64++;
452 }
453
454 static int __init init_tsc(char* override)
455 {
456
457         /* check clock override */
458         if (override[0] && strncmp(override,"tsc",3)) {
459 #ifdef CONFIG_HPET_TIMER
460                 if (is_hpet_enabled()) {
461                         printk(KERN_ERR "Warning: clock= override failed. Defaulting to tsc\n");
462                 } else
463 #endif
464                 {
465                         return -ENODEV;
466                 }
467         }
468
469         /*
470          * If we have APM enabled or the CPU clock speed is variable
471          * (CPU stops clock on HLT or slows clock to save power)
472          * then the TSC timestamps may diverge by up to 1 jiffy from
473          * 'real time' but nothing will break.
474          * The most frequent case is that the CPU is "woken" from a halt
475          * state by the timer interrupt itself, so we get 0 error. In the
476          * rare cases where a driver would "wake" the CPU and request a
477          * timestamp, the maximum error is < 1 jiffy. But timestamps are
478          * still perfectly ordered.
479          * Note that the TSC counter will be reset if APM suspends
480          * to disk; this won't break the kernel, though, 'cuz we're
481          * smart.  See arch/i386/kernel/apm.c.
482          */
483         /*
484          *      Firstly we have to do a CPU check for chips with
485          *      a potentially buggy TSC. At this point we haven't run
486          *      the ident/bugs checks so we must run this hook as it
487          *      may turn off the TSC flag.
488          *
489          *      NOTE: this doesn't yet handle SMP 486 machines where only
490          *      some CPU's have a TSC. Thats never worked and nobody has
491          *      moaned if you have the only one in the world - you fix it!
492          */
493
494         count2 = LATCH; /* initialize counter for mark_offset_tsc() */
495
496         if (cpu_has_tsc) {
497                 unsigned long tsc_quotient;
498 #ifdef CONFIG_HPET_TIMER
499                 if (is_hpet_enabled() && hpet_use_timer) {
500                         unsigned long result, remain;
501                         printk("Using TSC for gettimeofday\n");
502                         tsc_quotient = calibrate_tsc_hpet(NULL);
503                         timer_tsc.mark_offset = &mark_offset_tsc_hpet;
504                         /*
505                          * Math to calculate hpet to usec multiplier
506                          * Look for the comments at get_offset_tsc_hpet()
507                          */
508                         ASM_DIV64_REG(result, remain, hpet_tick,
509                                         0, KERNEL_TICK_USEC);
510                         if (remain > (hpet_tick >> 1))
511                                 result++; /* rounding the result */
512
513                         hpet_usec_quotient = result;
514                 } else
515 #endif
516                 {
517                         tsc_quotient = calibrate_tsc();
518                 }
519
520                 if (tsc_quotient) {
521                         fast_gettimeoffset_quotient = tsc_quotient;
522                         use_tsc = 1;
523                         /*
524                          *      We could be more selective here I suspect
525                          *      and just enable this for the next intel chips ?
526                          */
527                         /* report CPU clock rate in Hz.
528                          * The formula is (10^6 * 2^32) / (2^32 * 1 / (clocks/us)) =
529                          * clock/second. Our precision is about 100 ppm.
530                          */
531                         {       unsigned long eax=0, edx=1000;
532                                 __asm__("divl %2"
533                                 :"=a" (cpu_khz), "=d" (edx)
534                                 :"r" (tsc_quotient),
535                                 "0" (eax), "1" (edx));
536                                 printk("Detected %u.%03u MHz processor.\n",
537                                         cpu_khz / 1000, cpu_khz % 1000);
538                         }
539                         set_cyc2ns_scale(cpu_khz/1000);
540                         return 0;
541                 }
542         }
543         return -ENODEV;
544 }
545
546 #ifndef CONFIG_X86_TSC
547 /* disable flag for tsc.  Takes effect by clearing the TSC cpu flag
548  * in cpu/common.c */
549 static int __init tsc_setup(char *str)
550 {
551         tsc_disable = 1;
552         return 1;
553 }
554 #else
555 static int __init tsc_setup(char *str)
556 {
557         printk(KERN_WARNING "notsc: Kernel compiled with CONFIG_X86_TSC, "
558                                 "cannot disable TSC.\n");
559         return 1;
560 }
561 #endif
562 __setup("notsc", tsc_setup);
563
564
565
566 /************************************************************/
567
568 /* tsc timer_opts struct */
569 static struct timer_opts timer_tsc = {
570         .name = "tsc",
571         .mark_offset = mark_offset_tsc, 
572         .get_offset = get_offset_tsc,
573         .monotonic_clock = monotonic_clock_tsc,
574         .delay = delay_tsc,
575         .read_timer = read_timer_tsc,
576 };
577
578 struct init_timer_opts __initdata timer_tsc_init = {
579         .init = init_tsc,
580         .opts = &timer_tsc,
581 };