nohz: Assign timekeeping duty to a CPU outside the full dynticks range
[linux-3.10.git] / kernel / time / tick-broadcast.c
1 /*
2  * linux/kernel/time/tick-broadcast.c
3  *
4  * This file contains functions which emulate a local clock-event
5  * device via a broadcast event source.
6  *
7  * Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de>
8  * Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar
9  * Copyright(C) 2006-2007, Timesys Corp., Thomas Gleixner
10  *
11  * This code is licenced under the GPL version 2. For details see
12  * kernel-base/COPYING.
13  */
14 #include <linux/cpu.h>
15 #include <linux/err.h>
16 #include <linux/hrtimer.h>
17 #include <linux/interrupt.h>
18 #include <linux/percpu.h>
19 #include <linux/profile.h>
20 #include <linux/sched.h>
21 #include <linux/smp.h>
22
23 #include "tick-internal.h"
24
25 /*
26  * Broadcast support for broken x86 hardware, where the local apic
27  * timer stops in C3 state.
28  */
29
30 static struct tick_device tick_broadcast_device;
31 /* FIXME: Use cpumask_var_t. */
32 static DECLARE_BITMAP(tick_broadcast_mask, NR_CPUS);
33 static DECLARE_BITMAP(tmpmask, NR_CPUS);
34 static DEFINE_RAW_SPINLOCK(tick_broadcast_lock);
35 static int tick_broadcast_force;
36
37 #ifdef CONFIG_TICK_ONESHOT
38 static void tick_broadcast_clear_oneshot(int cpu);
39 #else
40 static inline void tick_broadcast_clear_oneshot(int cpu) { }
41 #endif
42
43 /*
44  * Debugging: see timer_list.c
45  */
46 struct tick_device *tick_get_broadcast_device(void)
47 {
48         return &tick_broadcast_device;
49 }
50
51 struct cpumask *tick_get_broadcast_mask(void)
52 {
53         return to_cpumask(tick_broadcast_mask);
54 }
55
56 /*
57  * Start the device in periodic mode
58  */
59 static void tick_broadcast_start_periodic(struct clock_event_device *bc)
60 {
61         if (bc)
62                 tick_setup_periodic(bc, 1);
63 }
64
65 /*
66  * Check, if the device can be utilized as broadcast device:
67  */
68 int tick_check_broadcast_device(struct clock_event_device *dev)
69 {
70         if ((tick_broadcast_device.evtdev &&
71              tick_broadcast_device.evtdev->rating >= dev->rating) ||
72              (dev->features & CLOCK_EVT_FEAT_C3STOP))
73                 return 0;
74
75         clockevents_exchange_device(tick_broadcast_device.evtdev, dev);
76         tick_broadcast_device.evtdev = dev;
77         if (!cpumask_empty(tick_get_broadcast_mask()))
78                 tick_broadcast_start_periodic(dev);
79         return 1;
80 }
81
82 /*
83  * Check, if the device is the broadcast device
84  */
85 int tick_is_broadcast_device(struct clock_event_device *dev)
86 {
87         return (dev && tick_broadcast_device.evtdev == dev);
88 }
89
90 static void err_broadcast(const struct cpumask *mask)
91 {
92         pr_crit_once("Failed to broadcast timer tick. Some CPUs may be unresponsive.\n");
93 }
94
95 static void tick_device_setup_broadcast_func(struct clock_event_device *dev)
96 {
97         if (!dev->broadcast)
98                 dev->broadcast = tick_broadcast;
99         if (!dev->broadcast) {
100                 pr_warn_once("%s depends on broadcast, but no broadcast function available\n",
101                              dev->name);
102                 dev->broadcast = err_broadcast;
103         }
104 }
105
106 /*
107  * Check, if the device is disfunctional and a place holder, which
108  * needs to be handled by the broadcast device.
109  */
110 int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu)
111 {
112         unsigned long flags;
113         int ret = 0;
114
115         raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
116
117         /*
118          * Devices might be registered with both periodic and oneshot
119          * mode disabled. This signals, that the device needs to be
120          * operated from the broadcast device and is a placeholder for
121          * the cpu local device.
122          */
123         if (!tick_device_is_functional(dev)) {
124                 dev->event_handler = tick_handle_periodic;
125                 tick_device_setup_broadcast_func(dev);
126                 cpumask_set_cpu(cpu, tick_get_broadcast_mask());
127                 tick_broadcast_start_periodic(tick_broadcast_device.evtdev);
128                 ret = 1;
129         } else {
130                 /*
131                  * When the new device is not affected by the stop
132                  * feature and the cpu is marked in the broadcast mask
133                  * then clear the broadcast bit.
134                  */
135                 if (!(dev->features & CLOCK_EVT_FEAT_C3STOP)) {
136                         int cpu = smp_processor_id();
137                         cpumask_clear_cpu(cpu, tick_get_broadcast_mask());
138                         tick_broadcast_clear_oneshot(cpu);
139                 } else {
140                         tick_device_setup_broadcast_func(dev);
141                 }
142         }
143         raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
144         return ret;
145 }
146
147 #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
148 int tick_receive_broadcast(void)
149 {
150         struct tick_device *td = this_cpu_ptr(&tick_cpu_device);
151         struct clock_event_device *evt = td->evtdev;
152
153         if (!evt)
154                 return -ENODEV;
155
156         if (!evt->event_handler)
157                 return -EINVAL;
158
159         evt->event_handler(evt);
160         return 0;
161 }
162 #endif
163
164 /*
165  * Broadcast the event to the cpus, which are set in the mask (mangled).
166  */
167 static void tick_do_broadcast(struct cpumask *mask)
168 {
169         int cpu = smp_processor_id();
170         struct tick_device *td;
171
172         /*
173          * Check, if the current cpu is in the mask
174          */
175         if (cpumask_test_cpu(cpu, mask)) {
176                 cpumask_clear_cpu(cpu, mask);
177                 td = &per_cpu(tick_cpu_device, cpu);
178                 td->evtdev->event_handler(td->evtdev);
179         }
180
181         if (!cpumask_empty(mask)) {
182                 /*
183                  * It might be necessary to actually check whether the devices
184                  * have different broadcast functions. For now, just use the
185                  * one of the first device. This works as long as we have this
186                  * misfeature only on x86 (lapic)
187                  */
188                 td = &per_cpu(tick_cpu_device, cpumask_first(mask));
189                 td->evtdev->broadcast(mask);
190         }
191 }
192
193 /*
194  * Periodic broadcast:
195  * - invoke the broadcast handlers
196  */
197 static void tick_do_periodic_broadcast(void)
198 {
199         raw_spin_lock(&tick_broadcast_lock);
200
201         cpumask_and(to_cpumask(tmpmask),
202                     cpu_online_mask, tick_get_broadcast_mask());
203         tick_do_broadcast(to_cpumask(tmpmask));
204
205         raw_spin_unlock(&tick_broadcast_lock);
206 }
207
208 /*
209  * Event handler for periodic broadcast ticks
210  */
211 static void tick_handle_periodic_broadcast(struct clock_event_device *dev)
212 {
213         ktime_t next;
214
215         tick_do_periodic_broadcast();
216
217         /*
218          * The device is in periodic mode. No reprogramming necessary:
219          */
220         if (dev->mode == CLOCK_EVT_MODE_PERIODIC)
221                 return;
222
223         /*
224          * Setup the next period for devices, which do not have
225          * periodic mode. We read dev->next_event first and add to it
226          * when the event already expired. clockevents_program_event()
227          * sets dev->next_event only when the event is really
228          * programmed to the device.
229          */
230         for (next = dev->next_event; ;) {
231                 next = ktime_add(next, tick_period);
232
233                 if (!clockevents_program_event(dev, next, false))
234                         return;
235                 tick_do_periodic_broadcast();
236         }
237 }
238
239 /*
240  * Powerstate information: The system enters/leaves a state, where
241  * affected devices might stop
242  */
243 static void tick_do_broadcast_on_off(unsigned long *reason)
244 {
245         struct clock_event_device *bc, *dev;
246         struct tick_device *td;
247         unsigned long flags;
248         int cpu, bc_stopped;
249
250         raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
251
252         cpu = smp_processor_id();
253         td = &per_cpu(tick_cpu_device, cpu);
254         dev = td->evtdev;
255         bc = tick_broadcast_device.evtdev;
256
257         /*
258          * Is the device not affected by the powerstate ?
259          */
260         if (!dev || !(dev->features & CLOCK_EVT_FEAT_C3STOP))
261                 goto out;
262
263         if (!tick_device_is_functional(dev))
264                 goto out;
265
266         bc_stopped = cpumask_empty(tick_get_broadcast_mask());
267
268         switch (*reason) {
269         case CLOCK_EVT_NOTIFY_BROADCAST_ON:
270         case CLOCK_EVT_NOTIFY_BROADCAST_FORCE:
271                 if (!cpumask_test_cpu(cpu, tick_get_broadcast_mask())) {
272                         cpumask_set_cpu(cpu, tick_get_broadcast_mask());
273                         if (tick_broadcast_device.mode ==
274                             TICKDEV_MODE_PERIODIC)
275                                 clockevents_shutdown(dev);
276                 }
277                 if (*reason == CLOCK_EVT_NOTIFY_BROADCAST_FORCE)
278                         tick_broadcast_force = 1;
279                 break;
280         case CLOCK_EVT_NOTIFY_BROADCAST_OFF:
281                 if (!tick_broadcast_force &&
282                     cpumask_test_cpu(cpu, tick_get_broadcast_mask())) {
283                         cpumask_clear_cpu(cpu, tick_get_broadcast_mask());
284                         if (tick_broadcast_device.mode ==
285                             TICKDEV_MODE_PERIODIC)
286                                 tick_setup_periodic(dev, 0);
287                 }
288                 break;
289         }
290
291         if (cpumask_empty(tick_get_broadcast_mask())) {
292                 if (!bc_stopped)
293                         clockevents_shutdown(bc);
294         } else if (bc_stopped) {
295                 if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC)
296                         tick_broadcast_start_periodic(bc);
297                 else
298                         tick_broadcast_setup_oneshot(bc);
299         }
300 out:
301         raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
302 }
303
304 /*
305  * Powerstate information: The system enters/leaves a state, where
306  * affected devices might stop.
307  */
308 void tick_broadcast_on_off(unsigned long reason, int *oncpu)
309 {
310         if (!cpumask_test_cpu(*oncpu, cpu_online_mask))
311                 printk(KERN_ERR "tick-broadcast: ignoring broadcast for "
312                        "offline CPU #%d\n", *oncpu);
313         else
314                 tick_do_broadcast_on_off(&reason);
315 }
316
317 /*
318  * Set the periodic handler depending on broadcast on/off
319  */
320 void tick_set_periodic_handler(struct clock_event_device *dev, int broadcast)
321 {
322         if (!broadcast)
323                 dev->event_handler = tick_handle_periodic;
324         else
325                 dev->event_handler = tick_handle_periodic_broadcast;
326 }
327
328 /*
329  * Remove a CPU from broadcasting
330  */
331 void tick_shutdown_broadcast(unsigned int *cpup)
332 {
333         struct clock_event_device *bc;
334         unsigned long flags;
335         unsigned int cpu = *cpup;
336
337         raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
338
339         bc = tick_broadcast_device.evtdev;
340         cpumask_clear_cpu(cpu, tick_get_broadcast_mask());
341
342         if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) {
343                 if (bc && cpumask_empty(tick_get_broadcast_mask()))
344                         clockevents_shutdown(bc);
345         }
346
347         raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
348 }
349
350 void tick_suspend_broadcast(void)
351 {
352         struct clock_event_device *bc;
353         unsigned long flags;
354
355         raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
356
357         bc = tick_broadcast_device.evtdev;
358         if (bc)
359                 clockevents_shutdown(bc);
360
361         raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
362 }
363
364 int tick_resume_broadcast(void)
365 {
366         struct clock_event_device *bc;
367         unsigned long flags;
368         int broadcast = 0;
369
370         raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
371
372         bc = tick_broadcast_device.evtdev;
373
374         if (bc) {
375                 clockevents_set_mode(bc, CLOCK_EVT_MODE_RESUME);
376
377                 switch (tick_broadcast_device.mode) {
378                 case TICKDEV_MODE_PERIODIC:
379                         if (!cpumask_empty(tick_get_broadcast_mask()))
380                                 tick_broadcast_start_periodic(bc);
381                         broadcast = cpumask_test_cpu(smp_processor_id(),
382                                                      tick_get_broadcast_mask());
383                         break;
384                 case TICKDEV_MODE_ONESHOT:
385                         if (!cpumask_empty(tick_get_broadcast_mask()))
386                                 broadcast = tick_resume_broadcast_oneshot(bc);
387                         break;
388                 }
389         }
390         raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
391
392         return broadcast;
393 }
394
395
396 #ifdef CONFIG_TICK_ONESHOT
397
398 /* FIXME: use cpumask_var_t. */
399 static DECLARE_BITMAP(tick_broadcast_oneshot_mask, NR_CPUS);
400
401 /*
402  * Exposed for debugging: see timer_list.c
403  */
404 struct cpumask *tick_get_broadcast_oneshot_mask(void)
405 {
406         return to_cpumask(tick_broadcast_oneshot_mask);
407 }
408
409 static int tick_broadcast_set_event(ktime_t expires, int force)
410 {
411         struct clock_event_device *bc = tick_broadcast_device.evtdev;
412
413         if (bc->mode != CLOCK_EVT_MODE_ONESHOT)
414                 clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT);
415
416         return clockevents_program_event(bc, expires, force);
417 }
418
419 int tick_resume_broadcast_oneshot(struct clock_event_device *bc)
420 {
421         clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT);
422         return 0;
423 }
424
425 /*
426  * Called from irq_enter() when idle was interrupted to reenable the
427  * per cpu device.
428  */
429 void tick_check_oneshot_broadcast(int cpu)
430 {
431         if (cpumask_test_cpu(cpu, to_cpumask(tick_broadcast_oneshot_mask))) {
432                 struct tick_device *td = &per_cpu(tick_cpu_device, cpu);
433
434                 clockevents_set_mode(td->evtdev, CLOCK_EVT_MODE_ONESHOT);
435         }
436 }
437
438 /*
439  * Handle oneshot mode broadcasting
440  */
441 static void tick_handle_oneshot_broadcast(struct clock_event_device *dev)
442 {
443         struct tick_device *td;
444         ktime_t now, next_event;
445         int cpu;
446
447         raw_spin_lock(&tick_broadcast_lock);
448 again:
449         dev->next_event.tv64 = KTIME_MAX;
450         next_event.tv64 = KTIME_MAX;
451         cpumask_clear(to_cpumask(tmpmask));
452         now = ktime_get();
453         /* Find all expired events */
454         for_each_cpu(cpu, tick_get_broadcast_oneshot_mask()) {
455                 td = &per_cpu(tick_cpu_device, cpu);
456                 if (td->evtdev->next_event.tv64 <= now.tv64)
457                         cpumask_set_cpu(cpu, to_cpumask(tmpmask));
458                 else if (td->evtdev->next_event.tv64 < next_event.tv64)
459                         next_event.tv64 = td->evtdev->next_event.tv64;
460         }
461
462         /*
463          * Wakeup the cpus which have an expired event.
464          */
465         tick_do_broadcast(to_cpumask(tmpmask));
466
467         /*
468          * Two reasons for reprogram:
469          *
470          * - The global event did not expire any CPU local
471          * events. This happens in dyntick mode, as the maximum PIT
472          * delta is quite small.
473          *
474          * - There are pending events on sleeping CPUs which were not
475          * in the event mask
476          */
477         if (next_event.tv64 != KTIME_MAX) {
478                 /*
479                  * Rearm the broadcast device. If event expired,
480                  * repeat the above
481                  */
482                 if (tick_broadcast_set_event(next_event, 0))
483                         goto again;
484         }
485         raw_spin_unlock(&tick_broadcast_lock);
486 }
487
488 /*
489  * Powerstate information: The system enters/leaves a state, where
490  * affected devices might stop
491  */
492 void tick_broadcast_oneshot_control(unsigned long reason)
493 {
494         struct clock_event_device *bc, *dev;
495         struct tick_device *td;
496         unsigned long flags;
497         int cpu;
498
499         /*
500          * Periodic mode does not care about the enter/exit of power
501          * states
502          */
503         if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC)
504                 return;
505
506         /*
507          * We are called with preemtion disabled from the depth of the
508          * idle code, so we can't be moved away.
509          */
510         cpu = smp_processor_id();
511         td = &per_cpu(tick_cpu_device, cpu);
512         dev = td->evtdev;
513
514         if (!(dev->features & CLOCK_EVT_FEAT_C3STOP))
515                 return;
516
517         bc = tick_broadcast_device.evtdev;
518
519         raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
520         if (reason == CLOCK_EVT_NOTIFY_BROADCAST_ENTER) {
521                 if (!cpumask_test_cpu(cpu, tick_get_broadcast_oneshot_mask())) {
522                         cpumask_set_cpu(cpu, tick_get_broadcast_oneshot_mask());
523                         clockevents_set_mode(dev, CLOCK_EVT_MODE_SHUTDOWN);
524                         if (dev->next_event.tv64 < bc->next_event.tv64)
525                                 tick_broadcast_set_event(dev->next_event, 1);
526                 }
527         } else {
528                 if (cpumask_test_cpu(cpu, tick_get_broadcast_oneshot_mask())) {
529                         cpumask_clear_cpu(cpu,
530                                           tick_get_broadcast_oneshot_mask());
531                         clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT);
532                         if (dev->next_event.tv64 != KTIME_MAX)
533                                 tick_program_event(dev->next_event, 1);
534                 }
535         }
536         raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
537 }
538
539 /*
540  * Reset the one shot broadcast for a cpu
541  *
542  * Called with tick_broadcast_lock held
543  */
544 static void tick_broadcast_clear_oneshot(int cpu)
545 {
546         cpumask_clear_cpu(cpu, tick_get_broadcast_oneshot_mask());
547 }
548
549 static void tick_broadcast_init_next_event(struct cpumask *mask,
550                                            ktime_t expires)
551 {
552         struct tick_device *td;
553         int cpu;
554
555         for_each_cpu(cpu, mask) {
556                 td = &per_cpu(tick_cpu_device, cpu);
557                 if (td->evtdev)
558                         td->evtdev->next_event = expires;
559         }
560 }
561
562 /**
563  * tick_broadcast_setup_oneshot - setup the broadcast device
564  */
565 void tick_broadcast_setup_oneshot(struct clock_event_device *bc)
566 {
567         int cpu = smp_processor_id();
568
569         /* Set it up only once ! */
570         if (bc->event_handler != tick_handle_oneshot_broadcast) {
571                 int was_periodic = bc->mode == CLOCK_EVT_MODE_PERIODIC;
572
573                 bc->event_handler = tick_handle_oneshot_broadcast;
574
575                 /* Take the do_timer update */
576                 if (!tick_nohz_extended_cpu(cpu))
577                         tick_do_timer_cpu = cpu;
578
579                 /*
580                  * We must be careful here. There might be other CPUs
581                  * waiting for periodic broadcast. We need to set the
582                  * oneshot_mask bits for those and program the
583                  * broadcast device to fire.
584                  */
585                 cpumask_copy(to_cpumask(tmpmask), tick_get_broadcast_mask());
586                 cpumask_clear_cpu(cpu, to_cpumask(tmpmask));
587                 cpumask_or(tick_get_broadcast_oneshot_mask(),
588                            tick_get_broadcast_oneshot_mask(),
589                            to_cpumask(tmpmask));
590
591                 if (was_periodic && !cpumask_empty(to_cpumask(tmpmask))) {
592                         clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT);
593                         tick_broadcast_init_next_event(to_cpumask(tmpmask),
594                                                        tick_next_period);
595                         tick_broadcast_set_event(tick_next_period, 1);
596                 } else
597                         bc->next_event.tv64 = KTIME_MAX;
598         } else {
599                 /*
600                  * The first cpu which switches to oneshot mode sets
601                  * the bit for all other cpus which are in the general
602                  * (periodic) broadcast mask. So the bit is set and
603                  * would prevent the first broadcast enter after this
604                  * to program the bc device.
605                  */
606                 tick_broadcast_clear_oneshot(cpu);
607         }
608 }
609
610 /*
611  * Select oneshot operating mode for the broadcast device
612  */
613 void tick_broadcast_switch_to_oneshot(void)
614 {
615         struct clock_event_device *bc;
616         unsigned long flags;
617
618         raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
619
620         tick_broadcast_device.mode = TICKDEV_MODE_ONESHOT;
621         bc = tick_broadcast_device.evtdev;
622         if (bc)
623                 tick_broadcast_setup_oneshot(bc);
624
625         raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
626 }
627
628
629 /*
630  * Remove a dead CPU from broadcasting
631  */
632 void tick_shutdown_broadcast_oneshot(unsigned int *cpup)
633 {
634         unsigned long flags;
635         unsigned int cpu = *cpup;
636
637         raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
638
639         /*
640          * Clear the broadcast mask flag for the dead cpu, but do not
641          * stop the broadcast device!
642          */
643         cpumask_clear_cpu(cpu, tick_get_broadcast_oneshot_mask());
644
645         raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
646 }
647
648 /*
649  * Check, whether the broadcast device is in one shot mode
650  */
651 int tick_broadcast_oneshot_active(void)
652 {
653         return tick_broadcast_device.mode == TICKDEV_MODE_ONESHOT;
654 }
655
656 /*
657  * Check whether the broadcast device supports oneshot.
658  */
659 bool tick_broadcast_oneshot_available(void)
660 {
661         struct clock_event_device *bc = tick_broadcast_device.evtdev;
662
663         return bc ? bc->features & CLOCK_EVT_FEAT_ONESHOT : false;
664 }
665
666 #endif