Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/cmarinas...
[linux-2.6.git] / kernel / time / tick-broadcast.c
1 /*
2  * linux/kernel/time/tick-broadcast.c
3  *
4  * This file contains functions which emulate a local clock-event
5  * device via a broadcast event source.
6  *
7  * Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de>
8  * Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar
9  * Copyright(C) 2006-2007, Timesys Corp., Thomas Gleixner
10  *
11  * This code is licenced under the GPL version 2. For details see
12  * kernel-base/COPYING.
13  */
14 #include <linux/cpu.h>
15 #include <linux/err.h>
16 #include <linux/hrtimer.h>
17 #include <linux/interrupt.h>
18 #include <linux/percpu.h>
19 #include <linux/profile.h>
20 #include <linux/sched.h>
21
22 #include "tick-internal.h"
23
24 /*
25  * Broadcast support for broken x86 hardware, where the local apic
26  * timer stops in C3 state.
27  */
28
29 static struct tick_device tick_broadcast_device;
30 /* FIXME: Use cpumask_var_t. */
31 static DECLARE_BITMAP(tick_broadcast_mask, NR_CPUS);
32 static DECLARE_BITMAP(tmpmask, NR_CPUS);
33 static DEFINE_RAW_SPINLOCK(tick_broadcast_lock);
34 static int tick_broadcast_force;
35
36 #ifdef CONFIG_TICK_ONESHOT
37 static void tick_broadcast_clear_oneshot(int cpu);
38 #else
39 static inline void tick_broadcast_clear_oneshot(int cpu) { }
40 #endif
41
42 /*
43  * Debugging: see timer_list.c
44  */
45 struct tick_device *tick_get_broadcast_device(void)
46 {
47         return &tick_broadcast_device;
48 }
49
50 struct cpumask *tick_get_broadcast_mask(void)
51 {
52         return to_cpumask(tick_broadcast_mask);
53 }
54
55 /*
56  * Start the device in periodic mode
57  */
58 static void tick_broadcast_start_periodic(struct clock_event_device *bc)
59 {
60         if (bc)
61                 tick_setup_periodic(bc, 1);
62 }
63
64 /*
65  * Check, if the device can be utilized as broadcast device:
66  */
67 int tick_check_broadcast_device(struct clock_event_device *dev)
68 {
69         if ((tick_broadcast_device.evtdev &&
70              tick_broadcast_device.evtdev->rating >= dev->rating) ||
71              (dev->features & CLOCK_EVT_FEAT_C3STOP))
72                 return 0;
73
74         clockevents_exchange_device(NULL, dev);
75         tick_broadcast_device.evtdev = dev;
76         if (!cpumask_empty(tick_get_broadcast_mask()))
77                 tick_broadcast_start_periodic(dev);
78         return 1;
79 }
80
81 /*
82  * Check, if the device is the broadcast device
83  */
84 int tick_is_broadcast_device(struct clock_event_device *dev)
85 {
86         return (dev && tick_broadcast_device.evtdev == dev);
87 }
88
89 /*
90  * Check, if the device is disfunctional and a place holder, which
91  * needs to be handled by the broadcast device.
92  */
93 int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu)
94 {
95         unsigned long flags;
96         int ret = 0;
97
98         raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
99
100         /*
101          * Devices might be registered with both periodic and oneshot
102          * mode disabled. This signals, that the device needs to be
103          * operated from the broadcast device and is a placeholder for
104          * the cpu local device.
105          */
106         if (!tick_device_is_functional(dev)) {
107                 dev->event_handler = tick_handle_periodic;
108                 cpumask_set_cpu(cpu, tick_get_broadcast_mask());
109                 tick_broadcast_start_periodic(tick_broadcast_device.evtdev);
110                 ret = 1;
111         } else {
112                 /*
113                  * When the new device is not affected by the stop
114                  * feature and the cpu is marked in the broadcast mask
115                  * then clear the broadcast bit.
116                  */
117                 if (!(dev->features & CLOCK_EVT_FEAT_C3STOP)) {
118                         int cpu = smp_processor_id();
119
120                         cpumask_clear_cpu(cpu, tick_get_broadcast_mask());
121                         tick_broadcast_clear_oneshot(cpu);
122                 }
123         }
124         raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
125         return ret;
126 }
127
128 /*
129  * Broadcast the event to the cpus, which are set in the mask (mangled).
130  */
131 static void tick_do_broadcast(struct cpumask *mask)
132 {
133         int cpu = smp_processor_id();
134         struct tick_device *td;
135
136         /*
137          * Check, if the current cpu is in the mask
138          */
139         if (cpumask_test_cpu(cpu, mask)) {
140                 cpumask_clear_cpu(cpu, mask);
141                 td = &per_cpu(tick_cpu_device, cpu);
142                 td->evtdev->event_handler(td->evtdev);
143         }
144
145         if (!cpumask_empty(mask)) {
146                 /*
147                  * It might be necessary to actually check whether the devices
148                  * have different broadcast functions. For now, just use the
149                  * one of the first device. This works as long as we have this
150                  * misfeature only on x86 (lapic)
151                  */
152                 td = &per_cpu(tick_cpu_device, cpumask_first(mask));
153                 td->evtdev->broadcast(mask);
154         }
155 }
156
157 /*
158  * Periodic broadcast:
159  * - invoke the broadcast handlers
160  */
161 static void tick_do_periodic_broadcast(void)
162 {
163         raw_spin_lock(&tick_broadcast_lock);
164
165         cpumask_and(to_cpumask(tmpmask),
166                     cpu_online_mask, tick_get_broadcast_mask());
167         tick_do_broadcast(to_cpumask(tmpmask));
168
169         raw_spin_unlock(&tick_broadcast_lock);
170 }
171
172 /*
173  * Event handler for periodic broadcast ticks
174  */
175 static void tick_handle_periodic_broadcast(struct clock_event_device *dev)
176 {
177         ktime_t next;
178
179         tick_do_periodic_broadcast();
180
181         /*
182          * The device is in periodic mode. No reprogramming necessary:
183          */
184         if (dev->mode == CLOCK_EVT_MODE_PERIODIC)
185                 return;
186
187         /*
188          * Setup the next period for devices, which do not have
189          * periodic mode. We read dev->next_event first and add to it
190          * when the event already expired. clockevents_program_event()
191          * sets dev->next_event only when the event is really
192          * programmed to the device.
193          */
194         for (next = dev->next_event; ;) {
195                 next = ktime_add(next, tick_period);
196
197                 if (!clockevents_program_event(dev, next, ktime_get()))
198                         return;
199                 tick_do_periodic_broadcast();
200         }
201 }
202
203 /*
204  * Powerstate information: The system enters/leaves a state, where
205  * affected devices might stop
206  */
207 static void tick_do_broadcast_on_off(unsigned long *reason)
208 {
209         struct clock_event_device *bc, *dev;
210         struct tick_device *td;
211         unsigned long flags;
212         int cpu, bc_stopped;
213
214         raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
215
216         cpu = smp_processor_id();
217         td = &per_cpu(tick_cpu_device, cpu);
218         dev = td->evtdev;
219         bc = tick_broadcast_device.evtdev;
220
221         /*
222          * Is the device not affected by the powerstate ?
223          */
224         if (!dev || !(dev->features & CLOCK_EVT_FEAT_C3STOP))
225                 goto out;
226
227         if (!tick_device_is_functional(dev))
228                 goto out;
229
230         bc_stopped = cpumask_empty(tick_get_broadcast_mask());
231
232         switch (*reason) {
233         case CLOCK_EVT_NOTIFY_BROADCAST_ON:
234         case CLOCK_EVT_NOTIFY_BROADCAST_FORCE:
235                 if (!cpumask_test_cpu(cpu, tick_get_broadcast_mask())) {
236                         cpumask_set_cpu(cpu, tick_get_broadcast_mask());
237                         if (tick_broadcast_device.mode ==
238                             TICKDEV_MODE_PERIODIC)
239                                 clockevents_shutdown(dev);
240                 }
241                 if (*reason == CLOCK_EVT_NOTIFY_BROADCAST_FORCE)
242                         tick_broadcast_force = 1;
243                 break;
244         case CLOCK_EVT_NOTIFY_BROADCAST_OFF:
245                 if (!tick_broadcast_force &&
246                     cpumask_test_cpu(cpu, tick_get_broadcast_mask())) {
247                         cpumask_clear_cpu(cpu, tick_get_broadcast_mask());
248                         if (tick_broadcast_device.mode ==
249                             TICKDEV_MODE_PERIODIC)
250                                 tick_setup_periodic(dev, 0);
251                 }
252                 break;
253         }
254
255         if (cpumask_empty(tick_get_broadcast_mask())) {
256                 if (!bc_stopped)
257                         clockevents_shutdown(bc);
258         } else if (bc_stopped) {
259                 if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC)
260                         tick_broadcast_start_periodic(bc);
261                 else
262                         tick_broadcast_setup_oneshot(bc);
263         }
264 out:
265         raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
266 }
267
268 /*
269  * Powerstate information: The system enters/leaves a state, where
270  * affected devices might stop.
271  */
272 void tick_broadcast_on_off(unsigned long reason, int *oncpu)
273 {
274         if (!cpumask_test_cpu(*oncpu, cpu_online_mask))
275                 printk(KERN_ERR "tick-broadcast: ignoring broadcast for "
276                        "offline CPU #%d\n", *oncpu);
277         else
278                 tick_do_broadcast_on_off(&reason);
279 }
280
281 /*
282  * Set the periodic handler depending on broadcast on/off
283  */
284 void tick_set_periodic_handler(struct clock_event_device *dev, int broadcast)
285 {
286         if (!broadcast)
287                 dev->event_handler = tick_handle_periodic;
288         else
289                 dev->event_handler = tick_handle_periodic_broadcast;
290 }
291
292 /*
293  * Remove a CPU from broadcasting
294  */
295 void tick_shutdown_broadcast(unsigned int *cpup)
296 {
297         struct clock_event_device *bc;
298         unsigned long flags;
299         unsigned int cpu = *cpup;
300
301         raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
302
303         bc = tick_broadcast_device.evtdev;
304         cpumask_clear_cpu(cpu, tick_get_broadcast_mask());
305
306         if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) {
307                 if (bc && cpumask_empty(tick_get_broadcast_mask()))
308                         clockevents_shutdown(bc);
309         }
310
311         raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
312 }
313
314 void tick_suspend_broadcast(void)
315 {
316         struct clock_event_device *bc;
317         unsigned long flags;
318
319         raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
320
321         bc = tick_broadcast_device.evtdev;
322         if (bc)
323                 clockevents_shutdown(bc);
324
325         raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
326 }
327
328 int tick_resume_broadcast(void)
329 {
330         struct clock_event_device *bc;
331         unsigned long flags;
332         int broadcast = 0;
333
334         raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
335
336         bc = tick_broadcast_device.evtdev;
337
338         if (bc) {
339                 clockevents_set_mode(bc, CLOCK_EVT_MODE_RESUME);
340
341                 switch (tick_broadcast_device.mode) {
342                 case TICKDEV_MODE_PERIODIC:
343                         if (!cpumask_empty(tick_get_broadcast_mask()))
344                                 tick_broadcast_start_periodic(bc);
345                         broadcast = cpumask_test_cpu(smp_processor_id(),
346                                                      tick_get_broadcast_mask());
347                         break;
348                 case TICKDEV_MODE_ONESHOT:
349                         broadcast = tick_resume_broadcast_oneshot(bc);
350                         break;
351                 }
352         }
353         raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
354
355         return broadcast;
356 }
357
358
359 #ifdef CONFIG_TICK_ONESHOT
360
361 /* FIXME: use cpumask_var_t. */
362 static DECLARE_BITMAP(tick_broadcast_oneshot_mask, NR_CPUS);
363
364 /*
365  * Exposed for debugging: see timer_list.c
366  */
367 struct cpumask *tick_get_broadcast_oneshot_mask(void)
368 {
369         return to_cpumask(tick_broadcast_oneshot_mask);
370 }
371
372 static int tick_broadcast_set_event(ktime_t expires, int force)
373 {
374         struct clock_event_device *bc = tick_broadcast_device.evtdev;
375
376         return tick_dev_program_event(bc, expires, force);
377 }
378
379 int tick_resume_broadcast_oneshot(struct clock_event_device *bc)
380 {
381         clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT);
382         return 0;
383 }
384
385 /*
386  * Called from irq_enter() when idle was interrupted to reenable the
387  * per cpu device.
388  */
389 void tick_check_oneshot_broadcast(int cpu)
390 {
391         if (cpumask_test_cpu(cpu, to_cpumask(tick_broadcast_oneshot_mask))) {
392                 struct tick_device *td = &per_cpu(tick_cpu_device, cpu);
393
394                 clockevents_set_mode(td->evtdev, CLOCK_EVT_MODE_ONESHOT);
395         }
396 }
397
398 /*
399  * Handle oneshot mode broadcasting
400  */
401 static void tick_handle_oneshot_broadcast(struct clock_event_device *dev)
402 {
403         struct tick_device *td;
404         ktime_t now, next_event;
405         int cpu;
406
407         raw_spin_lock(&tick_broadcast_lock);
408 again:
409         dev->next_event.tv64 = KTIME_MAX;
410         next_event.tv64 = KTIME_MAX;
411         cpumask_clear(to_cpumask(tmpmask));
412         now = ktime_get();
413         /* Find all expired events */
414         for_each_cpu(cpu, tick_get_broadcast_oneshot_mask()) {
415                 td = &per_cpu(tick_cpu_device, cpu);
416                 if (td->evtdev->next_event.tv64 <= now.tv64)
417                         cpumask_set_cpu(cpu, to_cpumask(tmpmask));
418                 else if (td->evtdev->next_event.tv64 < next_event.tv64)
419                         next_event.tv64 = td->evtdev->next_event.tv64;
420         }
421
422         /*
423          * Wakeup the cpus which have an expired event.
424          */
425         tick_do_broadcast(to_cpumask(tmpmask));
426
427         /*
428          * Two reasons for reprogram:
429          *
430          * - The global event did not expire any CPU local
431          * events. This happens in dyntick mode, as the maximum PIT
432          * delta is quite small.
433          *
434          * - There are pending events on sleeping CPUs which were not
435          * in the event mask
436          */
437         if (next_event.tv64 != KTIME_MAX) {
438                 /*
439                  * Rearm the broadcast device. If event expired,
440                  * repeat the above
441                  */
442                 if (tick_broadcast_set_event(next_event, 0))
443                         goto again;
444         }
445         raw_spin_unlock(&tick_broadcast_lock);
446 }
447
448 /*
449  * Powerstate information: The system enters/leaves a state, where
450  * affected devices might stop
451  */
452 void tick_broadcast_oneshot_control(unsigned long reason)
453 {
454         struct clock_event_device *bc, *dev;
455         struct tick_device *td;
456         unsigned long flags;
457         int cpu;
458
459         raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
460
461         /*
462          * Periodic mode does not care about the enter/exit of power
463          * states
464          */
465         if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC)
466                 goto out;
467
468         bc = tick_broadcast_device.evtdev;
469         cpu = smp_processor_id();
470         td = &per_cpu(tick_cpu_device, cpu);
471         dev = td->evtdev;
472
473         if (!(dev->features & CLOCK_EVT_FEAT_C3STOP))
474                 goto out;
475
476         if (reason == CLOCK_EVT_NOTIFY_BROADCAST_ENTER) {
477                 if (!cpumask_test_cpu(cpu, tick_get_broadcast_oneshot_mask())) {
478                         cpumask_set_cpu(cpu, tick_get_broadcast_oneshot_mask());
479                         clockevents_set_mode(dev, CLOCK_EVT_MODE_SHUTDOWN);
480                         if (dev->next_event.tv64 < bc->next_event.tv64)
481                                 tick_broadcast_set_event(dev->next_event, 1);
482                 }
483         } else {
484                 if (cpumask_test_cpu(cpu, tick_get_broadcast_oneshot_mask())) {
485                         cpumask_clear_cpu(cpu,
486                                           tick_get_broadcast_oneshot_mask());
487                         clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT);
488                         if (dev->next_event.tv64 != KTIME_MAX)
489                                 tick_program_event(dev->next_event, 1);
490                 }
491         }
492
493 out:
494         raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
495 }
496
497 /*
498  * Reset the one shot broadcast for a cpu
499  *
500  * Called with tick_broadcast_lock held
501  */
502 static void tick_broadcast_clear_oneshot(int cpu)
503 {
504         cpumask_clear_cpu(cpu, tick_get_broadcast_oneshot_mask());
505 }
506
507 static void tick_broadcast_init_next_event(struct cpumask *mask,
508                                            ktime_t expires)
509 {
510         struct tick_device *td;
511         int cpu;
512
513         for_each_cpu(cpu, mask) {
514                 td = &per_cpu(tick_cpu_device, cpu);
515                 if (td->evtdev)
516                         td->evtdev->next_event = expires;
517         }
518 }
519
520 /**
521  * tick_broadcast_setup_oneshot - setup the broadcast device
522  */
523 void tick_broadcast_setup_oneshot(struct clock_event_device *bc)
524 {
525         int cpu = smp_processor_id();
526
527         /* Set it up only once ! */
528         if (bc->event_handler != tick_handle_oneshot_broadcast) {
529                 int was_periodic = bc->mode == CLOCK_EVT_MODE_PERIODIC;
530
531                 bc->event_handler = tick_handle_oneshot_broadcast;
532                 clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT);
533
534                 /* Take the do_timer update */
535                 tick_do_timer_cpu = cpu;
536
537                 /*
538                  * We must be careful here. There might be other CPUs
539                  * waiting for periodic broadcast. We need to set the
540                  * oneshot_mask bits for those and program the
541                  * broadcast device to fire.
542                  */
543                 cpumask_copy(to_cpumask(tmpmask), tick_get_broadcast_mask());
544                 cpumask_clear_cpu(cpu, to_cpumask(tmpmask));
545                 cpumask_or(tick_get_broadcast_oneshot_mask(),
546                            tick_get_broadcast_oneshot_mask(),
547                            to_cpumask(tmpmask));
548
549                 if (was_periodic && !cpumask_empty(to_cpumask(tmpmask))) {
550                         tick_broadcast_init_next_event(to_cpumask(tmpmask),
551                                                        tick_next_period);
552                         tick_broadcast_set_event(tick_next_period, 1);
553                 } else
554                         bc->next_event.tv64 = KTIME_MAX;
555         } else {
556                 /*
557                  * The first cpu which switches to oneshot mode sets
558                  * the bit for all other cpus which are in the general
559                  * (periodic) broadcast mask. So the bit is set and
560                  * would prevent the first broadcast enter after this
561                  * to program the bc device.
562                  */
563                 tick_broadcast_clear_oneshot(cpu);
564         }
565 }
566
567 /*
568  * Select oneshot operating mode for the broadcast device
569  */
570 void tick_broadcast_switch_to_oneshot(void)
571 {
572         struct clock_event_device *bc;
573         unsigned long flags;
574
575         raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
576
577         tick_broadcast_device.mode = TICKDEV_MODE_ONESHOT;
578         bc = tick_broadcast_device.evtdev;
579         if (bc)
580                 tick_broadcast_setup_oneshot(bc);
581         raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
582 }
583
584
585 /*
586  * Remove a dead CPU from broadcasting
587  */
588 void tick_shutdown_broadcast_oneshot(unsigned int *cpup)
589 {
590         unsigned long flags;
591         unsigned int cpu = *cpup;
592
593         raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
594
595         /*
596          * Clear the broadcast mask flag for the dead cpu, but do not
597          * stop the broadcast device!
598          */
599         cpumask_clear_cpu(cpu, tick_get_broadcast_oneshot_mask());
600
601         raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
602 }
603
604 /*
605  * Check, whether the broadcast device is in one shot mode
606  */
607 int tick_broadcast_oneshot_active(void)
608 {
609         return tick_broadcast_device.mode == TICKDEV_MODE_ONESHOT;
610 }
611
612 /*
613  * Check whether the broadcast device supports oneshot.
614  */
615 bool tick_broadcast_oneshot_available(void)
616 {
617         struct clock_event_device *bc = tick_broadcast_device.evtdev;
618
619         return bc ? bc->features & CLOCK_EVT_FEAT_ONESHOT : false;
620 }
621
622 #endif