tick: Prevent uncontrolled switch to oneshot mode
[linux-3.10.git] / kernel / time / tick-broadcast.c
1 /*
2  * linux/kernel/time/tick-broadcast.c
3  *
4  * This file contains functions which emulate a local clock-event
5  * device via a broadcast event source.
6  *
7  * Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de>
8  * Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar
9  * Copyright(C) 2006-2007, Timesys Corp., Thomas Gleixner
10  *
11  * This code is licenced under the GPL version 2. For details see
12  * kernel-base/COPYING.
13  */
14 #include <linux/cpu.h>
15 #include <linux/err.h>
16 #include <linux/hrtimer.h>
17 #include <linux/interrupt.h>
18 #include <linux/percpu.h>
19 #include <linux/profile.h>
20 #include <linux/sched.h>
21 #include <linux/smp.h>
22
23 #include "tick-internal.h"
24
25 /*
26  * Broadcast support for broken x86 hardware, where the local apic
27  * timer stops in C3 state.
28  */
29
30 static struct tick_device tick_broadcast_device;
31 static cpumask_var_t tick_broadcast_mask;
32 static cpumask_var_t tick_broadcast_on;
33 static cpumask_var_t tmpmask;
34 static DEFINE_RAW_SPINLOCK(tick_broadcast_lock);
35 static int tick_broadcast_force;
36
37 #ifdef CONFIG_TICK_ONESHOT
38 static void tick_broadcast_clear_oneshot(int cpu);
39 #else
40 static inline void tick_broadcast_clear_oneshot(int cpu) { }
41 #endif
42
43 /*
44  * Debugging: see timer_list.c
45  */
46 struct tick_device *tick_get_broadcast_device(void)
47 {
48         return &tick_broadcast_device;
49 }
50
51 struct cpumask *tick_get_broadcast_mask(void)
52 {
53         return tick_broadcast_mask;
54 }
55
56 /*
57  * Start the device in periodic mode
58  */
59 static void tick_broadcast_start_periodic(struct clock_event_device *bc)
60 {
61         if (bc)
62                 tick_setup_periodic(bc, 1);
63 }
64
65 /*
66  * Check, if the device can be utilized as broadcast device:
67  */
68 int tick_check_broadcast_device(struct clock_event_device *dev)
69 {
70         struct clock_event_device *cur = tick_broadcast_device.evtdev;
71
72         if ((dev->features & CLOCK_EVT_FEAT_DUMMY) ||
73             (tick_broadcast_device.evtdev &&
74              tick_broadcast_device.evtdev->rating >= dev->rating) ||
75              (dev->features & CLOCK_EVT_FEAT_C3STOP))
76                 return 0;
77
78         clockevents_exchange_device(tick_broadcast_device.evtdev, dev);
79         if (cur)
80                 cur->event_handler = clockevents_handle_noop;
81         tick_broadcast_device.evtdev = dev;
82         if (!cpumask_empty(tick_broadcast_mask))
83                 tick_broadcast_start_periodic(dev);
84         /*
85          * Inform all cpus about this. We might be in a situation
86          * where we did not switch to oneshot mode because the per cpu
87          * devices are affected by CLOCK_EVT_FEAT_C3STOP and the lack
88          * of a oneshot capable broadcast device. Without that
89          * notification the systems stays stuck in periodic mode
90          * forever.
91          */
92         if (dev->features & CLOCK_EVT_FEAT_ONESHOT)
93                 tick_clock_notify();
94         return 1;
95 }
96
97 /*
98  * Check, if the device is the broadcast device
99  */
100 int tick_is_broadcast_device(struct clock_event_device *dev)
101 {
102         return (dev && tick_broadcast_device.evtdev == dev);
103 }
104
105 static void err_broadcast(const struct cpumask *mask)
106 {
107         pr_crit_once("Failed to broadcast timer tick. Some CPUs may be unresponsive.\n");
108 }
109
110 static void tick_device_setup_broadcast_func(struct clock_event_device *dev)
111 {
112         if (!dev->broadcast)
113                 dev->broadcast = tick_broadcast;
114         if (!dev->broadcast) {
115                 pr_warn_once("%s depends on broadcast, but no broadcast function available\n",
116                              dev->name);
117                 dev->broadcast = err_broadcast;
118         }
119 }
120
121 /*
122  * Check, if the device is disfunctional and a place holder, which
123  * needs to be handled by the broadcast device.
124  */
125 int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu)
126 {
127         struct clock_event_device *bc = tick_broadcast_device.evtdev;
128         unsigned long flags;
129         int ret;
130
131         raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
132
133         /*
134          * Devices might be registered with both periodic and oneshot
135          * mode disabled. This signals, that the device needs to be
136          * operated from the broadcast device and is a placeholder for
137          * the cpu local device.
138          */
139         if (!tick_device_is_functional(dev)) {
140                 dev->event_handler = tick_handle_periodic;
141                 tick_device_setup_broadcast_func(dev);
142                 cpumask_set_cpu(cpu, tick_broadcast_mask);
143                 tick_broadcast_start_periodic(bc);
144                 ret = 1;
145         } else {
146                 /*
147                  * Clear the broadcast bit for this cpu if the
148                  * device is not power state affected.
149                  */
150                 if (!(dev->features & CLOCK_EVT_FEAT_C3STOP))
151                         cpumask_clear_cpu(cpu, tick_broadcast_mask);
152                 else
153                         tick_device_setup_broadcast_func(dev);
154
155                 /*
156                  * Clear the broadcast bit if the CPU is not in
157                  * periodic broadcast on state.
158                  */
159                 if (!cpumask_test_cpu(cpu, tick_broadcast_on))
160                         cpumask_clear_cpu(cpu, tick_broadcast_mask);
161
162                 switch (tick_broadcast_device.mode) {
163                 case TICKDEV_MODE_ONESHOT:
164                         /*
165                          * If the system is in oneshot mode we can
166                          * unconditionally clear the oneshot mask bit,
167                          * because the CPU is running and therefore
168                          * not in an idle state which causes the power
169                          * state affected device to stop. Let the
170                          * caller initialize the device.
171                          */
172                         tick_broadcast_clear_oneshot(cpu);
173                         ret = 0;
174                         break;
175
176                 case TICKDEV_MODE_PERIODIC:
177                         /*
178                          * If the system is in periodic mode, check
179                          * whether the broadcast device can be
180                          * switched off now.
181                          */
182                         if (cpumask_empty(tick_broadcast_mask) && bc)
183                                 clockevents_shutdown(bc);
184                         /*
185                          * If we kept the cpu in the broadcast mask,
186                          * tell the caller to leave the per cpu device
187                          * in shutdown state. The periodic interrupt
188                          * is delivered by the broadcast device.
189                          */
190                         ret = cpumask_test_cpu(cpu, tick_broadcast_mask);
191                         break;
192                 default:
193                         /* Nothing to do */
194                         ret = 0;
195                         break;
196                 }
197         }
198         raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
199         return ret;
200 }
201
202 #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
203 int tick_receive_broadcast(void)
204 {
205         struct tick_device *td = this_cpu_ptr(&tick_cpu_device);
206         struct clock_event_device *evt = td->evtdev;
207
208         if (!evt)
209                 return -ENODEV;
210
211         if (!evt->event_handler)
212                 return -EINVAL;
213
214         evt->event_handler(evt);
215         return 0;
216 }
217 #endif
218
219 /*
220  * Broadcast the event to the cpus, which are set in the mask (mangled).
221  */
222 static void tick_do_broadcast(struct cpumask *mask)
223 {
224         int cpu = smp_processor_id();
225         struct tick_device *td;
226
227         /*
228          * Check, if the current cpu is in the mask
229          */
230         if (cpumask_test_cpu(cpu, mask)) {
231                 cpumask_clear_cpu(cpu, mask);
232                 td = &per_cpu(tick_cpu_device, cpu);
233                 td->evtdev->event_handler(td->evtdev);
234         }
235
236         if (!cpumask_empty(mask)) {
237                 /*
238                  * It might be necessary to actually check whether the devices
239                  * have different broadcast functions. For now, just use the
240                  * one of the first device. This works as long as we have this
241                  * misfeature only on x86 (lapic)
242                  */
243                 td = &per_cpu(tick_cpu_device, cpumask_first(mask));
244                 td->evtdev->broadcast(mask);
245         }
246 }
247
248 /*
249  * Periodic broadcast:
250  * - invoke the broadcast handlers
251  */
252 static void tick_do_periodic_broadcast(void)
253 {
254         raw_spin_lock(&tick_broadcast_lock);
255
256         cpumask_and(tmpmask, cpu_online_mask, tick_broadcast_mask);
257         tick_do_broadcast(tmpmask);
258
259         raw_spin_unlock(&tick_broadcast_lock);
260 }
261
262 /*
263  * Event handler for periodic broadcast ticks
264  */
265 static void tick_handle_periodic_broadcast(struct clock_event_device *dev)
266 {
267         ktime_t next;
268
269         tick_do_periodic_broadcast();
270
271         /*
272          * The device is in periodic mode. No reprogramming necessary:
273          */
274         if (dev->mode == CLOCK_EVT_MODE_PERIODIC)
275                 return;
276
277         /*
278          * Setup the next period for devices, which do not have
279          * periodic mode. We read dev->next_event first and add to it
280          * when the event already expired. clockevents_program_event()
281          * sets dev->next_event only when the event is really
282          * programmed to the device.
283          */
284         for (next = dev->next_event; ;) {
285                 next = ktime_add(next, tick_period);
286
287                 if (!clockevents_program_event(dev, next, false))
288                         return;
289                 tick_do_periodic_broadcast();
290         }
291 }
292
293 /*
294  * Powerstate information: The system enters/leaves a state, where
295  * affected devices might stop
296  */
297 static void tick_do_broadcast_on_off(unsigned long *reason)
298 {
299         struct clock_event_device *bc, *dev;
300         struct tick_device *td;
301         unsigned long flags;
302         int cpu, bc_stopped;
303
304         raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
305
306         cpu = smp_processor_id();
307         td = &per_cpu(tick_cpu_device, cpu);
308         dev = td->evtdev;
309         bc = tick_broadcast_device.evtdev;
310
311         /*
312          * Is the device not affected by the powerstate ?
313          */
314         if (!dev || !(dev->features & CLOCK_EVT_FEAT_C3STOP))
315                 goto out;
316
317         if (!tick_device_is_functional(dev))
318                 goto out;
319
320         bc_stopped = cpumask_empty(tick_broadcast_mask);
321
322         switch (*reason) {
323         case CLOCK_EVT_NOTIFY_BROADCAST_ON:
324         case CLOCK_EVT_NOTIFY_BROADCAST_FORCE:
325                 cpumask_set_cpu(cpu, tick_broadcast_on);
326                 if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_mask)) {
327                         if (tick_broadcast_device.mode ==
328                             TICKDEV_MODE_PERIODIC)
329                                 clockevents_shutdown(dev);
330                 }
331                 if (*reason == CLOCK_EVT_NOTIFY_BROADCAST_FORCE)
332                         tick_broadcast_force = 1;
333                 break;
334         case CLOCK_EVT_NOTIFY_BROADCAST_OFF:
335                 if (tick_broadcast_force)
336                         break;
337                 cpumask_clear_cpu(cpu, tick_broadcast_on);
338                 if (!tick_device_is_functional(dev))
339                         break;
340                 if (cpumask_test_and_clear_cpu(cpu, tick_broadcast_mask)) {
341                         if (tick_broadcast_device.mode ==
342                             TICKDEV_MODE_PERIODIC)
343                                 tick_setup_periodic(dev, 0);
344                 }
345                 break;
346         }
347
348         if (cpumask_empty(tick_broadcast_mask)) {
349                 if (!bc_stopped)
350                         clockevents_shutdown(bc);
351         } else if (bc_stopped) {
352                 if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC)
353                         tick_broadcast_start_periodic(bc);
354                 else
355                         tick_broadcast_setup_oneshot(bc);
356         }
357 out:
358         raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
359 }
360
361 /*
362  * Powerstate information: The system enters/leaves a state, where
363  * affected devices might stop.
364  */
365 void tick_broadcast_on_off(unsigned long reason, int *oncpu)
366 {
367         if (!cpumask_test_cpu(*oncpu, cpu_online_mask))
368                 printk(KERN_ERR "tick-broadcast: ignoring broadcast for "
369                        "offline CPU #%d\n", *oncpu);
370         else
371                 tick_do_broadcast_on_off(&reason);
372 }
373
374 /*
375  * Set the periodic handler depending on broadcast on/off
376  */
377 void tick_set_periodic_handler(struct clock_event_device *dev, int broadcast)
378 {
379         if (!broadcast)
380                 dev->event_handler = tick_handle_periodic;
381         else
382                 dev->event_handler = tick_handle_periodic_broadcast;
383 }
384
385 /*
386  * Remove a CPU from broadcasting
387  */
388 void tick_shutdown_broadcast(unsigned int *cpup)
389 {
390         struct clock_event_device *bc;
391         unsigned long flags;
392         unsigned int cpu = *cpup;
393
394         raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
395
396         bc = tick_broadcast_device.evtdev;
397         cpumask_clear_cpu(cpu, tick_broadcast_mask);
398         cpumask_clear_cpu(cpu, tick_broadcast_on);
399
400         if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) {
401                 if (bc && cpumask_empty(tick_broadcast_mask))
402                         clockevents_shutdown(bc);
403         }
404
405         raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
406 }
407
408 void tick_suspend_broadcast(void)
409 {
410         struct clock_event_device *bc;
411         unsigned long flags;
412
413         raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
414
415         bc = tick_broadcast_device.evtdev;
416         if (bc)
417                 clockevents_shutdown(bc);
418
419         raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
420 }
421
422 int tick_resume_broadcast(void)
423 {
424         struct clock_event_device *bc;
425         unsigned long flags;
426         int broadcast = 0;
427
428         raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
429
430         bc = tick_broadcast_device.evtdev;
431
432         if (bc) {
433                 clockevents_set_mode(bc, CLOCK_EVT_MODE_RESUME);
434
435                 switch (tick_broadcast_device.mode) {
436                 case TICKDEV_MODE_PERIODIC:
437                         if (!cpumask_empty(tick_broadcast_mask))
438                                 tick_broadcast_start_periodic(bc);
439                         broadcast = cpumask_test_cpu(smp_processor_id(),
440                                                      tick_broadcast_mask);
441                         break;
442                 case TICKDEV_MODE_ONESHOT:
443                         if (!cpumask_empty(tick_broadcast_mask))
444                                 broadcast = tick_resume_broadcast_oneshot(bc);
445                         break;
446                 }
447         }
448         raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
449
450         return broadcast;
451 }
452
453
454 #ifdef CONFIG_TICK_ONESHOT
455
456 static cpumask_var_t tick_broadcast_oneshot_mask;
457 static cpumask_var_t tick_broadcast_pending_mask;
458 static cpumask_var_t tick_broadcast_force_mask;
459
460 /*
461  * Exposed for debugging: see timer_list.c
462  */
463 struct cpumask *tick_get_broadcast_oneshot_mask(void)
464 {
465         return tick_broadcast_oneshot_mask;
466 }
467
468 /*
469  * Called before going idle with interrupts disabled. Checks whether a
470  * broadcast event from the other core is about to happen. We detected
471  * that in tick_broadcast_oneshot_control(). The callsite can use this
472  * to avoid a deep idle transition as we are about to get the
473  * broadcast IPI right away.
474  */
475 int tick_check_broadcast_expired(void)
476 {
477         return cpumask_test_cpu(smp_processor_id(), tick_broadcast_force_mask);
478 }
479
480 /*
481  * Set broadcast interrupt affinity
482  */
483 static void tick_broadcast_set_affinity(struct clock_event_device *bc,
484                                         const struct cpumask *cpumask)
485 {
486         if (!(bc->features & CLOCK_EVT_FEAT_DYNIRQ))
487                 return;
488
489         if (cpumask_equal(bc->cpumask, cpumask))
490                 return;
491
492         bc->cpumask = cpumask;
493         irq_set_affinity(bc->irq, bc->cpumask);
494 }
495
496 static int tick_broadcast_set_event(struct clock_event_device *bc, int cpu,
497                                     ktime_t expires, int force)
498 {
499         int ret;
500
501         if (bc->mode != CLOCK_EVT_MODE_ONESHOT)
502                 clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT);
503
504         ret = clockevents_program_event(bc, expires, force);
505         if (!ret)
506                 tick_broadcast_set_affinity(bc, cpumask_of(cpu));
507         return ret;
508 }
509
510 int tick_resume_broadcast_oneshot(struct clock_event_device *bc)
511 {
512         clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT);
513         return 0;
514 }
515
516 /*
517  * Called from irq_enter() when idle was interrupted to reenable the
518  * per cpu device.
519  */
520 void tick_check_oneshot_broadcast(int cpu)
521 {
522         if (cpumask_test_cpu(cpu, tick_broadcast_oneshot_mask)) {
523                 struct tick_device *td = &per_cpu(tick_cpu_device, cpu);
524
525                 /*
526                  * We might be in the middle of switching over from
527                  * periodic to oneshot. If the CPU has not yet
528                  * switched over, leave the device alone.
529                  */
530                 if (td->mode == TICKDEV_MODE_ONESHOT) {
531                         clockevents_set_mode(td->evtdev,
532                                              CLOCK_EVT_MODE_ONESHOT);
533                 }
534         }
535 }
536
537 /*
538  * Handle oneshot mode broadcasting
539  */
540 static void tick_handle_oneshot_broadcast(struct clock_event_device *dev)
541 {
542         struct tick_device *td;
543         ktime_t now, next_event;
544         int cpu, next_cpu = 0;
545
546         raw_spin_lock(&tick_broadcast_lock);
547 again:
548         dev->next_event.tv64 = KTIME_MAX;
549         next_event.tv64 = KTIME_MAX;
550         cpumask_clear(tmpmask);
551         now = ktime_get();
552         /* Find all expired events */
553         for_each_cpu(cpu, tick_broadcast_oneshot_mask) {
554                 td = &per_cpu(tick_cpu_device, cpu);
555                 if (td->evtdev->next_event.tv64 <= now.tv64) {
556                         cpumask_set_cpu(cpu, tmpmask);
557                         /*
558                          * Mark the remote cpu in the pending mask, so
559                          * it can avoid reprogramming the cpu local
560                          * timer in tick_broadcast_oneshot_control().
561                          */
562                         cpumask_set_cpu(cpu, tick_broadcast_pending_mask);
563                 } else if (td->evtdev->next_event.tv64 < next_event.tv64) {
564                         next_event.tv64 = td->evtdev->next_event.tv64;
565                         next_cpu = cpu;
566                 }
567         }
568
569         /*
570          * Remove the current cpu from the pending mask. The event is
571          * delivered immediately in tick_do_broadcast() !
572          */
573         cpumask_clear_cpu(smp_processor_id(), tick_broadcast_pending_mask);
574
575         /* Take care of enforced broadcast requests */
576         cpumask_or(tmpmask, tmpmask, tick_broadcast_force_mask);
577         cpumask_clear(tick_broadcast_force_mask);
578
579         /*
580          * Wakeup the cpus which have an expired event.
581          */
582         tick_do_broadcast(tmpmask);
583
584         /*
585          * Two reasons for reprogram:
586          *
587          * - The global event did not expire any CPU local
588          * events. This happens in dyntick mode, as the maximum PIT
589          * delta is quite small.
590          *
591          * - There are pending events on sleeping CPUs which were not
592          * in the event mask
593          */
594         if (next_event.tv64 != KTIME_MAX) {
595                 /*
596                  * Rearm the broadcast device. If event expired,
597                  * repeat the above
598                  */
599                 if (tick_broadcast_set_event(dev, next_cpu, next_event, 0))
600                         goto again;
601         }
602         raw_spin_unlock(&tick_broadcast_lock);
603 }
604
605 /*
606  * Powerstate information: The system enters/leaves a state, where
607  * affected devices might stop
608  */
609 void tick_broadcast_oneshot_control(unsigned long reason)
610 {
611         struct clock_event_device *bc, *dev;
612         struct tick_device *td;
613         unsigned long flags;
614         ktime_t now;
615         int cpu;
616
617         /*
618          * Periodic mode does not care about the enter/exit of power
619          * states
620          */
621         if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC)
622                 return;
623
624         /*
625          * We are called with preemtion disabled from the depth of the
626          * idle code, so we can't be moved away.
627          */
628         cpu = smp_processor_id();
629         td = &per_cpu(tick_cpu_device, cpu);
630         dev = td->evtdev;
631
632         if (!(dev->features & CLOCK_EVT_FEAT_C3STOP))
633                 return;
634
635         bc = tick_broadcast_device.evtdev;
636
637         raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
638         if (reason == CLOCK_EVT_NOTIFY_BROADCAST_ENTER) {
639                 if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_oneshot_mask)) {
640                         WARN_ON_ONCE(cpumask_test_cpu(cpu, tick_broadcast_pending_mask));
641                         clockevents_set_mode(dev, CLOCK_EVT_MODE_SHUTDOWN);
642                         /*
643                          * We only reprogram the broadcast timer if we
644                          * did not mark ourself in the force mask and
645                          * if the cpu local event is earlier than the
646                          * broadcast event. If the current CPU is in
647                          * the force mask, then we are going to be
648                          * woken by the IPI right away.
649                          */
650                         if (!cpumask_test_cpu(cpu, tick_broadcast_force_mask) &&
651                             dev->next_event.tv64 < bc->next_event.tv64)
652                                 tick_broadcast_set_event(bc, cpu, dev->next_event, 1);
653                 }
654         } else {
655                 if (cpumask_test_and_clear_cpu(cpu, tick_broadcast_oneshot_mask)) {
656                         clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT);
657                         /*
658                          * The cpu which was handling the broadcast
659                          * timer marked this cpu in the broadcast
660                          * pending mask and fired the broadcast
661                          * IPI. So we are going to handle the expired
662                          * event anyway via the broadcast IPI
663                          * handler. No need to reprogram the timer
664                          * with an already expired event.
665                          */
666                         if (cpumask_test_and_clear_cpu(cpu,
667                                        tick_broadcast_pending_mask))
668                                 goto out;
669
670                         /*
671                          * Bail out if there is no next event.
672                          */
673                         if (dev->next_event.tv64 == KTIME_MAX)
674                                 goto out;
675                         /*
676                          * If the pending bit is not set, then we are
677                          * either the CPU handling the broadcast
678                          * interrupt or we got woken by something else.
679                          *
680                          * We are not longer in the broadcast mask, so
681                          * if the cpu local expiry time is already
682                          * reached, we would reprogram the cpu local
683                          * timer with an already expired event.
684                          *
685                          * This can lead to a ping-pong when we return
686                          * to idle and therefor rearm the broadcast
687                          * timer before the cpu local timer was able
688                          * to fire. This happens because the forced
689                          * reprogramming makes sure that the event
690                          * will happen in the future and depending on
691                          * the min_delta setting this might be far
692                          * enough out that the ping-pong starts.
693                          *
694                          * If the cpu local next_event has expired
695                          * then we know that the broadcast timer
696                          * next_event has expired as well and
697                          * broadcast is about to be handled. So we
698                          * avoid reprogramming and enforce that the
699                          * broadcast handler, which did not run yet,
700                          * will invoke the cpu local handler.
701                          *
702                          * We cannot call the handler directly from
703                          * here, because we might be in a NOHZ phase
704                          * and we did not go through the irq_enter()
705                          * nohz fixups.
706                          */
707                         now = ktime_get();
708                         if (dev->next_event.tv64 <= now.tv64) {
709                                 cpumask_set_cpu(cpu, tick_broadcast_force_mask);
710                                 goto out;
711                         }
712                         /*
713                          * We got woken by something else. Reprogram
714                          * the cpu local timer device.
715                          */
716                         tick_program_event(dev->next_event, 1);
717                 }
718         }
719 out:
720         raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
721 }
722
723 /*
724  * Reset the one shot broadcast for a cpu
725  *
726  * Called with tick_broadcast_lock held
727  */
728 static void tick_broadcast_clear_oneshot(int cpu)
729 {
730         cpumask_clear_cpu(cpu, tick_broadcast_oneshot_mask);
731 }
732
733 static void tick_broadcast_init_next_event(struct cpumask *mask,
734                                            ktime_t expires)
735 {
736         struct tick_device *td;
737         int cpu;
738
739         for_each_cpu(cpu, mask) {
740                 td = &per_cpu(tick_cpu_device, cpu);
741                 if (td->evtdev)
742                         td->evtdev->next_event = expires;
743         }
744 }
745
746 /**
747  * tick_broadcast_setup_oneshot - setup the broadcast device
748  */
749 void tick_broadcast_setup_oneshot(struct clock_event_device *bc)
750 {
751         int cpu = smp_processor_id();
752
753         /* Set it up only once ! */
754         if (bc->event_handler != tick_handle_oneshot_broadcast) {
755                 int was_periodic = bc->mode == CLOCK_EVT_MODE_PERIODIC;
756
757                 bc->event_handler = tick_handle_oneshot_broadcast;
758
759                 /*
760                  * We must be careful here. There might be other CPUs
761                  * waiting for periodic broadcast. We need to set the
762                  * oneshot_mask bits for those and program the
763                  * broadcast device to fire.
764                  */
765                 cpumask_copy(tmpmask, tick_broadcast_mask);
766                 cpumask_clear_cpu(cpu, tmpmask);
767                 cpumask_or(tick_broadcast_oneshot_mask,
768                            tick_broadcast_oneshot_mask, tmpmask);
769
770                 if (was_periodic && !cpumask_empty(tmpmask)) {
771                         clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT);
772                         tick_broadcast_init_next_event(tmpmask,
773                                                        tick_next_period);
774                         tick_broadcast_set_event(bc, cpu, tick_next_period, 1);
775                 } else
776                         bc->next_event.tv64 = KTIME_MAX;
777         } else {
778                 /*
779                  * The first cpu which switches to oneshot mode sets
780                  * the bit for all other cpus which are in the general
781                  * (periodic) broadcast mask. So the bit is set and
782                  * would prevent the first broadcast enter after this
783                  * to program the bc device.
784                  */
785                 tick_broadcast_clear_oneshot(cpu);
786         }
787 }
788
789 /*
790  * Select oneshot operating mode for the broadcast device
791  */
792 void tick_broadcast_switch_to_oneshot(void)
793 {
794         struct clock_event_device *bc;
795         unsigned long flags;
796
797         raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
798
799         tick_broadcast_device.mode = TICKDEV_MODE_ONESHOT;
800         bc = tick_broadcast_device.evtdev;
801         if (bc)
802                 tick_broadcast_setup_oneshot(bc);
803
804         raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
805 }
806
807
808 /*
809  * Remove a dead CPU from broadcasting
810  */
811 void tick_shutdown_broadcast_oneshot(unsigned int *cpup)
812 {
813         unsigned long flags;
814         unsigned int cpu = *cpup;
815
816         raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
817
818         /*
819          * Clear the broadcast mask flag for the dead cpu, but do not
820          * stop the broadcast device!
821          */
822         cpumask_clear_cpu(cpu, tick_broadcast_oneshot_mask);
823
824         raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
825 }
826
827 /*
828  * Check, whether the broadcast device is in one shot mode
829  */
830 int tick_broadcast_oneshot_active(void)
831 {
832         return tick_broadcast_device.mode == TICKDEV_MODE_ONESHOT;
833 }
834
835 /*
836  * Check whether the broadcast device supports oneshot.
837  */
838 bool tick_broadcast_oneshot_available(void)
839 {
840         struct clock_event_device *bc = tick_broadcast_device.evtdev;
841
842         return bc ? bc->features & CLOCK_EVT_FEAT_ONESHOT : false;
843 }
844
845 #endif
846
847 void __init tick_broadcast_init(void)
848 {
849         zalloc_cpumask_var(&tick_broadcast_mask, GFP_NOWAIT);
850         zalloc_cpumask_var(&tick_broadcast_on, GFP_NOWAIT);
851         zalloc_cpumask_var(&tmpmask, GFP_NOWAIT);
852 #ifdef CONFIG_TICK_ONESHOT
853         zalloc_cpumask_var(&tick_broadcast_oneshot_mask, GFP_NOWAIT);
854         zalloc_cpumask_var(&tick_broadcast_pending_mask, GFP_NOWAIT);
855         zalloc_cpumask_var(&tick_broadcast_force_mask, GFP_NOWAIT);
856 #endif
857 }