workqueue: implement worker states
[linux-2.6.git] / kernel / workqueue.c
1 /*
2  * linux/kernel/workqueue.c
3  *
4  * Generic mechanism for defining kernel helper threads for running
5  * arbitrary tasks in process context.
6  *
7  * Started by Ingo Molnar, Copyright (C) 2002
8  *
9  * Derived from the taskqueue/keventd code by:
10  *
11  *   David Woodhouse <dwmw2@infradead.org>
12  *   Andrew Morton
13  *   Kai Petzke <wpp@marie.physik.tu-berlin.de>
14  *   Theodore Ts'o <tytso@mit.edu>
15  *
16  * Made to use alloc_percpu by Christoph Lameter.
17  */
18
19 #include <linux/module.h>
20 #include <linux/kernel.h>
21 #include <linux/sched.h>
22 #include <linux/init.h>
23 #include <linux/signal.h>
24 #include <linux/completion.h>
25 #include <linux/workqueue.h>
26 #include <linux/slab.h>
27 #include <linux/cpu.h>
28 #include <linux/notifier.h>
29 #include <linux/kthread.h>
30 #include <linux/hardirq.h>
31 #include <linux/mempolicy.h>
32 #include <linux/freezer.h>
33 #include <linux/kallsyms.h>
34 #include <linux/debug_locks.h>
35 #include <linux/lockdep.h>
36 #include <linux/idr.h>
37
38 enum {
39         /* worker flags */
40         WORKER_STARTED          = 1 << 0,       /* started */
41         WORKER_DIE              = 1 << 1,       /* die die die */
42         WORKER_IDLE             = 1 << 2,       /* is idle */
43
44         BUSY_WORKER_HASH_ORDER  = 6,            /* 64 pointers */
45         BUSY_WORKER_HASH_SIZE   = 1 << BUSY_WORKER_HASH_ORDER,
46         BUSY_WORKER_HASH_MASK   = BUSY_WORKER_HASH_SIZE - 1,
47 };
48
49 /*
50  * Structure fields follow one of the following exclusion rules.
51  *
52  * I: Set during initialization and read-only afterwards.
53  *
54  * L: gcwq->lock protected.  Access with gcwq->lock held.
55  *
56  * F: wq->flush_mutex protected.
57  *
58  * W: workqueue_lock protected.
59  */
60
61 struct global_cwq;
62 struct cpu_workqueue_struct;
63
64 struct worker {
65         /* on idle list while idle, on busy hash table while busy */
66         union {
67                 struct list_head        entry;  /* L: while idle */
68                 struct hlist_node       hentry; /* L: while busy */
69         };
70
71         struct work_struct      *current_work;  /* L: work being processed */
72         struct list_head        scheduled;      /* L: scheduled works */
73         struct task_struct      *task;          /* I: worker task */
74         struct global_cwq       *gcwq;          /* I: the associated gcwq */
75         struct cpu_workqueue_struct *cwq;       /* I: the associated cwq */
76         unsigned int            flags;          /* L: flags */
77         int                     id;             /* I: worker id */
78 };
79
80 /*
81  * Global per-cpu workqueue.
82  */
83 struct global_cwq {
84         spinlock_t              lock;           /* the gcwq lock */
85         unsigned int            cpu;            /* I: the associated cpu */
86
87         int                     nr_workers;     /* L: total number of workers */
88         int                     nr_idle;        /* L: currently idle ones */
89
90         /* workers are chained either in the idle_list or busy_hash */
91         struct list_head        idle_list;      /* L: list of idle workers */
92         struct hlist_head       busy_hash[BUSY_WORKER_HASH_SIZE];
93                                                 /* L: hash of busy workers */
94
95         struct ida              worker_ida;     /* L: for worker IDs */
96 } ____cacheline_aligned_in_smp;
97
98 /*
99  * The per-CPU workqueue (if single thread, we always use the first
100  * possible cpu).  The lower WORK_STRUCT_FLAG_BITS of
101  * work_struct->data are used for flags and thus cwqs need to be
102  * aligned at two's power of the number of flag bits.
103  */
104 struct cpu_workqueue_struct {
105         struct global_cwq       *gcwq;          /* I: the associated gcwq */
106         struct list_head worklist;
107         struct worker           *worker;
108         struct workqueue_struct *wq;            /* I: the owning workqueue */
109         int                     work_color;     /* L: current color */
110         int                     flush_color;    /* L: flushing color */
111         int                     nr_in_flight[WORK_NR_COLORS];
112                                                 /* L: nr of in_flight works */
113         int                     nr_active;      /* L: nr of active works */
114         int                     max_active;     /* L: max active works */
115         struct list_head        delayed_works;  /* L: delayed works */
116 };
117
118 /*
119  * Structure used to wait for workqueue flush.
120  */
121 struct wq_flusher {
122         struct list_head        list;           /* F: list of flushers */
123         int                     flush_color;    /* F: flush color waiting for */
124         struct completion       done;           /* flush completion */
125 };
126
127 /*
128  * The externally visible workqueue abstraction is an array of
129  * per-CPU workqueues:
130  */
131 struct workqueue_struct {
132         unsigned int            flags;          /* I: WQ_* flags */
133         struct cpu_workqueue_struct *cpu_wq;    /* I: cwq's */
134         struct list_head        list;           /* W: list of all workqueues */
135
136         struct mutex            flush_mutex;    /* protects wq flushing */
137         int                     work_color;     /* F: current work color */
138         int                     flush_color;    /* F: current flush color */
139         atomic_t                nr_cwqs_to_flush; /* flush in progress */
140         struct wq_flusher       *first_flusher; /* F: first flusher */
141         struct list_head        flusher_queue;  /* F: flush waiters */
142         struct list_head        flusher_overflow; /* F: flush overflow list */
143
144         int                     saved_max_active; /* I: saved cwq max_active */
145         const char              *name;          /* I: workqueue name */
146 #ifdef CONFIG_LOCKDEP
147         struct lockdep_map      lockdep_map;
148 #endif
149 };
150
151 #ifdef CONFIG_DEBUG_OBJECTS_WORK
152
153 static struct debug_obj_descr work_debug_descr;
154
155 /*
156  * fixup_init is called when:
157  * - an active object is initialized
158  */
159 static int work_fixup_init(void *addr, enum debug_obj_state state)
160 {
161         struct work_struct *work = addr;
162
163         switch (state) {
164         case ODEBUG_STATE_ACTIVE:
165                 cancel_work_sync(work);
166                 debug_object_init(work, &work_debug_descr);
167                 return 1;
168         default:
169                 return 0;
170         }
171 }
172
173 /*
174  * fixup_activate is called when:
175  * - an active object is activated
176  * - an unknown object is activated (might be a statically initialized object)
177  */
178 static int work_fixup_activate(void *addr, enum debug_obj_state state)
179 {
180         struct work_struct *work = addr;
181
182         switch (state) {
183
184         case ODEBUG_STATE_NOTAVAILABLE:
185                 /*
186                  * This is not really a fixup. The work struct was
187                  * statically initialized. We just make sure that it
188                  * is tracked in the object tracker.
189                  */
190                 if (test_bit(WORK_STRUCT_STATIC_BIT, work_data_bits(work))) {
191                         debug_object_init(work, &work_debug_descr);
192                         debug_object_activate(work, &work_debug_descr);
193                         return 0;
194                 }
195                 WARN_ON_ONCE(1);
196                 return 0;
197
198         case ODEBUG_STATE_ACTIVE:
199                 WARN_ON(1);
200
201         default:
202                 return 0;
203         }
204 }
205
206 /*
207  * fixup_free is called when:
208  * - an active object is freed
209  */
210 static int work_fixup_free(void *addr, enum debug_obj_state state)
211 {
212         struct work_struct *work = addr;
213
214         switch (state) {
215         case ODEBUG_STATE_ACTIVE:
216                 cancel_work_sync(work);
217                 debug_object_free(work, &work_debug_descr);
218                 return 1;
219         default:
220                 return 0;
221         }
222 }
223
224 static struct debug_obj_descr work_debug_descr = {
225         .name           = "work_struct",
226         .fixup_init     = work_fixup_init,
227         .fixup_activate = work_fixup_activate,
228         .fixup_free     = work_fixup_free,
229 };
230
231 static inline void debug_work_activate(struct work_struct *work)
232 {
233         debug_object_activate(work, &work_debug_descr);
234 }
235
236 static inline void debug_work_deactivate(struct work_struct *work)
237 {
238         debug_object_deactivate(work, &work_debug_descr);
239 }
240
241 void __init_work(struct work_struct *work, int onstack)
242 {
243         if (onstack)
244                 debug_object_init_on_stack(work, &work_debug_descr);
245         else
246                 debug_object_init(work, &work_debug_descr);
247 }
248 EXPORT_SYMBOL_GPL(__init_work);
249
250 void destroy_work_on_stack(struct work_struct *work)
251 {
252         debug_object_free(work, &work_debug_descr);
253 }
254 EXPORT_SYMBOL_GPL(destroy_work_on_stack);
255
256 #else
257 static inline void debug_work_activate(struct work_struct *work) { }
258 static inline void debug_work_deactivate(struct work_struct *work) { }
259 #endif
260
261 /* Serializes the accesses to the list of workqueues. */
262 static DEFINE_SPINLOCK(workqueue_lock);
263 static LIST_HEAD(workqueues);
264 static bool workqueue_freezing;         /* W: have wqs started freezing? */
265
266 static DEFINE_PER_CPU(struct global_cwq, global_cwq);
267
268 static int worker_thread(void *__worker);
269
270 static int singlethread_cpu __read_mostly;
271
272 static struct global_cwq *get_gcwq(unsigned int cpu)
273 {
274         return &per_cpu(global_cwq, cpu);
275 }
276
277 static struct cpu_workqueue_struct *get_cwq(unsigned int cpu,
278                                             struct workqueue_struct *wq)
279 {
280         return per_cpu_ptr(wq->cpu_wq, cpu);
281 }
282
283 static struct cpu_workqueue_struct *target_cwq(unsigned int cpu,
284                                                struct workqueue_struct *wq)
285 {
286         if (unlikely(wq->flags & WQ_SINGLE_THREAD))
287                 cpu = singlethread_cpu;
288         return get_cwq(cpu, wq);
289 }
290
291 static unsigned int work_color_to_flags(int color)
292 {
293         return color << WORK_STRUCT_COLOR_SHIFT;
294 }
295
296 static int get_work_color(struct work_struct *work)
297 {
298         return (*work_data_bits(work) >> WORK_STRUCT_COLOR_SHIFT) &
299                 ((1 << WORK_STRUCT_COLOR_BITS) - 1);
300 }
301
302 static int work_next_color(int color)
303 {
304         return (color + 1) % WORK_NR_COLORS;
305 }
306
307 /*
308  * Set the workqueue on which a work item is to be run
309  * - Must *only* be called if the pending flag is set
310  */
311 static inline void set_wq_data(struct work_struct *work,
312                                struct cpu_workqueue_struct *cwq,
313                                unsigned long extra_flags)
314 {
315         BUG_ON(!work_pending(work));
316
317         atomic_long_set(&work->data, (unsigned long)cwq | work_static(work) |
318                         WORK_STRUCT_PENDING | extra_flags);
319 }
320
321 /*
322  * Clear WORK_STRUCT_PENDING and the workqueue on which it was queued.
323  */
324 static inline void clear_wq_data(struct work_struct *work)
325 {
326         atomic_long_set(&work->data, work_static(work));
327 }
328
329 static inline struct cpu_workqueue_struct *get_wq_data(struct work_struct *work)
330 {
331         return (void *)(atomic_long_read(&work->data) &
332                         WORK_STRUCT_WQ_DATA_MASK);
333 }
334
335 /**
336  * busy_worker_head - return the busy hash head for a work
337  * @gcwq: gcwq of interest
338  * @work: work to be hashed
339  *
340  * Return hash head of @gcwq for @work.
341  *
342  * CONTEXT:
343  * spin_lock_irq(gcwq->lock).
344  *
345  * RETURNS:
346  * Pointer to the hash head.
347  */
348 static struct hlist_head *busy_worker_head(struct global_cwq *gcwq,
349                                            struct work_struct *work)
350 {
351         const int base_shift = ilog2(sizeof(struct work_struct));
352         unsigned long v = (unsigned long)work;
353
354         /* simple shift and fold hash, do we need something better? */
355         v >>= base_shift;
356         v += v >> BUSY_WORKER_HASH_ORDER;
357         v &= BUSY_WORKER_HASH_MASK;
358
359         return &gcwq->busy_hash[v];
360 }
361
362 /**
363  * insert_work - insert a work into cwq
364  * @cwq: cwq @work belongs to
365  * @work: work to insert
366  * @head: insertion point
367  * @extra_flags: extra WORK_STRUCT_* flags to set
368  *
369  * Insert @work into @cwq after @head.
370  *
371  * CONTEXT:
372  * spin_lock_irq(gcwq->lock).
373  */
374 static void insert_work(struct cpu_workqueue_struct *cwq,
375                         struct work_struct *work, struct list_head *head,
376                         unsigned int extra_flags)
377 {
378         /* we own @work, set data and link */
379         set_wq_data(work, cwq, extra_flags);
380
381         /*
382          * Ensure that we get the right work->data if we see the
383          * result of list_add() below, see try_to_grab_pending().
384          */
385         smp_wmb();
386
387         list_add_tail(&work->entry, head);
388         wake_up_process(cwq->worker->task);
389 }
390
391 static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
392                          struct work_struct *work)
393 {
394         struct cpu_workqueue_struct *cwq = target_cwq(cpu, wq);
395         struct global_cwq *gcwq = cwq->gcwq;
396         struct list_head *worklist;
397         unsigned long flags;
398
399         debug_work_activate(work);
400
401         spin_lock_irqsave(&gcwq->lock, flags);
402         BUG_ON(!list_empty(&work->entry));
403
404         cwq->nr_in_flight[cwq->work_color]++;
405
406         if (likely(cwq->nr_active < cwq->max_active)) {
407                 cwq->nr_active++;
408                 worklist = &cwq->worklist;
409         } else
410                 worklist = &cwq->delayed_works;
411
412         insert_work(cwq, work, worklist, work_color_to_flags(cwq->work_color));
413
414         spin_unlock_irqrestore(&gcwq->lock, flags);
415 }
416
417 /**
418  * queue_work - queue work on a workqueue
419  * @wq: workqueue to use
420  * @work: work to queue
421  *
422  * Returns 0 if @work was already on a queue, non-zero otherwise.
423  *
424  * We queue the work to the CPU on which it was submitted, but if the CPU dies
425  * it can be processed by another CPU.
426  */
427 int queue_work(struct workqueue_struct *wq, struct work_struct *work)
428 {
429         int ret;
430
431         ret = queue_work_on(get_cpu(), wq, work);
432         put_cpu();
433
434         return ret;
435 }
436 EXPORT_SYMBOL_GPL(queue_work);
437
438 /**
439  * queue_work_on - queue work on specific cpu
440  * @cpu: CPU number to execute work on
441  * @wq: workqueue to use
442  * @work: work to queue
443  *
444  * Returns 0 if @work was already on a queue, non-zero otherwise.
445  *
446  * We queue the work to a specific CPU, the caller must ensure it
447  * can't go away.
448  */
449 int
450 queue_work_on(int cpu, struct workqueue_struct *wq, struct work_struct *work)
451 {
452         int ret = 0;
453
454         if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
455                 __queue_work(cpu, wq, work);
456                 ret = 1;
457         }
458         return ret;
459 }
460 EXPORT_SYMBOL_GPL(queue_work_on);
461
462 static void delayed_work_timer_fn(unsigned long __data)
463 {
464         struct delayed_work *dwork = (struct delayed_work *)__data;
465         struct cpu_workqueue_struct *cwq = get_wq_data(&dwork->work);
466
467         __queue_work(smp_processor_id(), cwq->wq, &dwork->work);
468 }
469
470 /**
471  * queue_delayed_work - queue work on a workqueue after delay
472  * @wq: workqueue to use
473  * @dwork: delayable work to queue
474  * @delay: number of jiffies to wait before queueing
475  *
476  * Returns 0 if @work was already on a queue, non-zero otherwise.
477  */
478 int queue_delayed_work(struct workqueue_struct *wq,
479                         struct delayed_work *dwork, unsigned long delay)
480 {
481         if (delay == 0)
482                 return queue_work(wq, &dwork->work);
483
484         return queue_delayed_work_on(-1, wq, dwork, delay);
485 }
486 EXPORT_SYMBOL_GPL(queue_delayed_work);
487
488 /**
489  * queue_delayed_work_on - queue work on specific CPU after delay
490  * @cpu: CPU number to execute work on
491  * @wq: workqueue to use
492  * @dwork: work to queue
493  * @delay: number of jiffies to wait before queueing
494  *
495  * Returns 0 if @work was already on a queue, non-zero otherwise.
496  */
497 int queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
498                         struct delayed_work *dwork, unsigned long delay)
499 {
500         int ret = 0;
501         struct timer_list *timer = &dwork->timer;
502         struct work_struct *work = &dwork->work;
503
504         if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
505                 BUG_ON(timer_pending(timer));
506                 BUG_ON(!list_empty(&work->entry));
507
508                 timer_stats_timer_set_start_info(&dwork->timer);
509
510                 /* This stores cwq for the moment, for the timer_fn */
511                 set_wq_data(work, target_cwq(raw_smp_processor_id(), wq), 0);
512                 timer->expires = jiffies + delay;
513                 timer->data = (unsigned long)dwork;
514                 timer->function = delayed_work_timer_fn;
515
516                 if (unlikely(cpu >= 0))
517                         add_timer_on(timer, cpu);
518                 else
519                         add_timer(timer);
520                 ret = 1;
521         }
522         return ret;
523 }
524 EXPORT_SYMBOL_GPL(queue_delayed_work_on);
525
526 /**
527  * worker_enter_idle - enter idle state
528  * @worker: worker which is entering idle state
529  *
530  * @worker is entering idle state.  Update stats and idle timer if
531  * necessary.
532  *
533  * LOCKING:
534  * spin_lock_irq(gcwq->lock).
535  */
536 static void worker_enter_idle(struct worker *worker)
537 {
538         struct global_cwq *gcwq = worker->gcwq;
539
540         BUG_ON(worker->flags & WORKER_IDLE);
541         BUG_ON(!list_empty(&worker->entry) &&
542                (worker->hentry.next || worker->hentry.pprev));
543
544         worker->flags |= WORKER_IDLE;
545         gcwq->nr_idle++;
546
547         /* idle_list is LIFO */
548         list_add(&worker->entry, &gcwq->idle_list);
549 }
550
551 /**
552  * worker_leave_idle - leave idle state
553  * @worker: worker which is leaving idle state
554  *
555  * @worker is leaving idle state.  Update stats.
556  *
557  * LOCKING:
558  * spin_lock_irq(gcwq->lock).
559  */
560 static void worker_leave_idle(struct worker *worker)
561 {
562         struct global_cwq *gcwq = worker->gcwq;
563
564         BUG_ON(!(worker->flags & WORKER_IDLE));
565         worker->flags &= ~WORKER_IDLE;
566         gcwq->nr_idle--;
567         list_del_init(&worker->entry);
568 }
569
570 static struct worker *alloc_worker(void)
571 {
572         struct worker *worker;
573
574         worker = kzalloc(sizeof(*worker), GFP_KERNEL);
575         if (worker) {
576                 INIT_LIST_HEAD(&worker->entry);
577                 INIT_LIST_HEAD(&worker->scheduled);
578         }
579         return worker;
580 }
581
582 /**
583  * create_worker - create a new workqueue worker
584  * @cwq: cwq the new worker will belong to
585  * @bind: whether to set affinity to @cpu or not
586  *
587  * Create a new worker which is bound to @cwq.  The returned worker
588  * can be started by calling start_worker() or destroyed using
589  * destroy_worker().
590  *
591  * CONTEXT:
592  * Might sleep.  Does GFP_KERNEL allocations.
593  *
594  * RETURNS:
595  * Pointer to the newly created worker.
596  */
597 static struct worker *create_worker(struct cpu_workqueue_struct *cwq, bool bind)
598 {
599         struct global_cwq *gcwq = cwq->gcwq;
600         int id = -1;
601         struct worker *worker = NULL;
602
603         spin_lock_irq(&gcwq->lock);
604         while (ida_get_new(&gcwq->worker_ida, &id)) {
605                 spin_unlock_irq(&gcwq->lock);
606                 if (!ida_pre_get(&gcwq->worker_ida, GFP_KERNEL))
607                         goto fail;
608                 spin_lock_irq(&gcwq->lock);
609         }
610         spin_unlock_irq(&gcwq->lock);
611
612         worker = alloc_worker();
613         if (!worker)
614                 goto fail;
615
616         worker->gcwq = gcwq;
617         worker->cwq = cwq;
618         worker->id = id;
619
620         worker->task = kthread_create(worker_thread, worker, "kworker/%u:%d",
621                                       gcwq->cpu, id);
622         if (IS_ERR(worker->task))
623                 goto fail;
624
625         if (bind)
626                 kthread_bind(worker->task, gcwq->cpu);
627
628         return worker;
629 fail:
630         if (id >= 0) {
631                 spin_lock_irq(&gcwq->lock);
632                 ida_remove(&gcwq->worker_ida, id);
633                 spin_unlock_irq(&gcwq->lock);
634         }
635         kfree(worker);
636         return NULL;
637 }
638
639 /**
640  * start_worker - start a newly created worker
641  * @worker: worker to start
642  *
643  * Make the gcwq aware of @worker and start it.
644  *
645  * CONTEXT:
646  * spin_lock_irq(gcwq->lock).
647  */
648 static void start_worker(struct worker *worker)
649 {
650         worker->flags |= WORKER_STARTED;
651         worker->gcwq->nr_workers++;
652         worker_enter_idle(worker);
653         wake_up_process(worker->task);
654 }
655
656 /**
657  * destroy_worker - destroy a workqueue worker
658  * @worker: worker to be destroyed
659  *
660  * Destroy @worker and adjust @gcwq stats accordingly.
661  *
662  * CONTEXT:
663  * spin_lock_irq(gcwq->lock) which is released and regrabbed.
664  */
665 static void destroy_worker(struct worker *worker)
666 {
667         struct global_cwq *gcwq = worker->gcwq;
668         int id = worker->id;
669
670         /* sanity check frenzy */
671         BUG_ON(worker->current_work);
672         BUG_ON(!list_empty(&worker->scheduled));
673
674         if (worker->flags & WORKER_STARTED)
675                 gcwq->nr_workers--;
676         if (worker->flags & WORKER_IDLE)
677                 gcwq->nr_idle--;
678
679         list_del_init(&worker->entry);
680         worker->flags |= WORKER_DIE;
681
682         spin_unlock_irq(&gcwq->lock);
683
684         kthread_stop(worker->task);
685         kfree(worker);
686
687         spin_lock_irq(&gcwq->lock);
688         ida_remove(&gcwq->worker_ida, id);
689 }
690
691 /**
692  * move_linked_works - move linked works to a list
693  * @work: start of series of works to be scheduled
694  * @head: target list to append @work to
695  * @nextp: out paramter for nested worklist walking
696  *
697  * Schedule linked works starting from @work to @head.  Work series to
698  * be scheduled starts at @work and includes any consecutive work with
699  * WORK_STRUCT_LINKED set in its predecessor.
700  *
701  * If @nextp is not NULL, it's updated to point to the next work of
702  * the last scheduled work.  This allows move_linked_works() to be
703  * nested inside outer list_for_each_entry_safe().
704  *
705  * CONTEXT:
706  * spin_lock_irq(gcwq->lock).
707  */
708 static void move_linked_works(struct work_struct *work, struct list_head *head,
709                               struct work_struct **nextp)
710 {
711         struct work_struct *n;
712
713         /*
714          * Linked worklist will always end before the end of the list,
715          * use NULL for list head.
716          */
717         list_for_each_entry_safe_from(work, n, NULL, entry) {
718                 list_move_tail(&work->entry, head);
719                 if (!(*work_data_bits(work) & WORK_STRUCT_LINKED))
720                         break;
721         }
722
723         /*
724          * If we're already inside safe list traversal and have moved
725          * multiple works to the scheduled queue, the next position
726          * needs to be updated.
727          */
728         if (nextp)
729                 *nextp = n;
730 }
731
732 static void cwq_activate_first_delayed(struct cpu_workqueue_struct *cwq)
733 {
734         struct work_struct *work = list_first_entry(&cwq->delayed_works,
735                                                     struct work_struct, entry);
736
737         move_linked_works(work, &cwq->worklist, NULL);
738         cwq->nr_active++;
739 }
740
741 /**
742  * cwq_dec_nr_in_flight - decrement cwq's nr_in_flight
743  * @cwq: cwq of interest
744  * @color: color of work which left the queue
745  *
746  * A work either has completed or is removed from pending queue,
747  * decrement nr_in_flight of its cwq and handle workqueue flushing.
748  *
749  * CONTEXT:
750  * spin_lock_irq(gcwq->lock).
751  */
752 static void cwq_dec_nr_in_flight(struct cpu_workqueue_struct *cwq, int color)
753 {
754         /* ignore uncolored works */
755         if (color == WORK_NO_COLOR)
756                 return;
757
758         cwq->nr_in_flight[color]--;
759         cwq->nr_active--;
760
761         /* one down, submit a delayed one */
762         if (!list_empty(&cwq->delayed_works) &&
763             cwq->nr_active < cwq->max_active)
764                 cwq_activate_first_delayed(cwq);
765
766         /* is flush in progress and are we at the flushing tip? */
767         if (likely(cwq->flush_color != color))
768                 return;
769
770         /* are there still in-flight works? */
771         if (cwq->nr_in_flight[color])
772                 return;
773
774         /* this cwq is done, clear flush_color */
775         cwq->flush_color = -1;
776
777         /*
778          * If this was the last cwq, wake up the first flusher.  It
779          * will handle the rest.
780          */
781         if (atomic_dec_and_test(&cwq->wq->nr_cwqs_to_flush))
782                 complete(&cwq->wq->first_flusher->done);
783 }
784
785 /**
786  * process_one_work - process single work
787  * @worker: self
788  * @work: work to process
789  *
790  * Process @work.  This function contains all the logics necessary to
791  * process a single work including synchronization against and
792  * interaction with other workers on the same cpu, queueing and
793  * flushing.  As long as context requirement is met, any worker can
794  * call this function to process a work.
795  *
796  * CONTEXT:
797  * spin_lock_irq(gcwq->lock) which is released and regrabbed.
798  */
799 static void process_one_work(struct worker *worker, struct work_struct *work)
800 {
801         struct cpu_workqueue_struct *cwq = worker->cwq;
802         struct global_cwq *gcwq = cwq->gcwq;
803         struct hlist_head *bwh = busy_worker_head(gcwq, work);
804         work_func_t f = work->func;
805         int work_color;
806 #ifdef CONFIG_LOCKDEP
807         /*
808          * It is permissible to free the struct work_struct from
809          * inside the function that is called from it, this we need to
810          * take into account for lockdep too.  To avoid bogus "held
811          * lock freed" warnings as well as problems when looking into
812          * work->lockdep_map, make a copy and use that here.
813          */
814         struct lockdep_map lockdep_map = work->lockdep_map;
815 #endif
816         /* claim and process */
817         debug_work_deactivate(work);
818         hlist_add_head(&worker->hentry, bwh);
819         worker->current_work = work;
820         work_color = get_work_color(work);
821         list_del_init(&work->entry);
822
823         spin_unlock_irq(&gcwq->lock);
824
825         BUG_ON(get_wq_data(work) != cwq);
826         work_clear_pending(work);
827         lock_map_acquire(&cwq->wq->lockdep_map);
828         lock_map_acquire(&lockdep_map);
829         f(work);
830         lock_map_release(&lockdep_map);
831         lock_map_release(&cwq->wq->lockdep_map);
832
833         if (unlikely(in_atomic() || lockdep_depth(current) > 0)) {
834                 printk(KERN_ERR "BUG: workqueue leaked lock or atomic: "
835                        "%s/0x%08x/%d\n",
836                        current->comm, preempt_count(), task_pid_nr(current));
837                 printk(KERN_ERR "    last function: ");
838                 print_symbol("%s\n", (unsigned long)f);
839                 debug_show_held_locks(current);
840                 dump_stack();
841         }
842
843         spin_lock_irq(&gcwq->lock);
844
845         /* we're done with it, release */
846         hlist_del_init(&worker->hentry);
847         worker->current_work = NULL;
848         cwq_dec_nr_in_flight(cwq, work_color);
849 }
850
851 /**
852  * process_scheduled_works - process scheduled works
853  * @worker: self
854  *
855  * Process all scheduled works.  Please note that the scheduled list
856  * may change while processing a work, so this function repeatedly
857  * fetches a work from the top and executes it.
858  *
859  * CONTEXT:
860  * spin_lock_irq(gcwq->lock) which may be released and regrabbed
861  * multiple times.
862  */
863 static void process_scheduled_works(struct worker *worker)
864 {
865         while (!list_empty(&worker->scheduled)) {
866                 struct work_struct *work = list_first_entry(&worker->scheduled,
867                                                 struct work_struct, entry);
868                 process_one_work(worker, work);
869         }
870 }
871
872 /**
873  * worker_thread - the worker thread function
874  * @__worker: self
875  *
876  * The cwq worker thread function.
877  */
878 static int worker_thread(void *__worker)
879 {
880         struct worker *worker = __worker;
881         struct global_cwq *gcwq = worker->gcwq;
882         struct cpu_workqueue_struct *cwq = worker->cwq;
883
884 woke_up:
885         if (unlikely(!cpumask_equal(&worker->task->cpus_allowed,
886                                     get_cpu_mask(gcwq->cpu))))
887                 set_cpus_allowed_ptr(worker->task, get_cpu_mask(gcwq->cpu));
888
889         spin_lock_irq(&gcwq->lock);
890
891         /* DIE can be set only while we're idle, checking here is enough */
892         if (worker->flags & WORKER_DIE) {
893                 spin_unlock_irq(&gcwq->lock);
894                 return 0;
895         }
896
897         worker_leave_idle(worker);
898
899         /*
900          * ->scheduled list can only be filled while a worker is
901          * preparing to process a work or actually processing it.
902          * Make sure nobody diddled with it while I was sleeping.
903          */
904         BUG_ON(!list_empty(&worker->scheduled));
905
906         while (!list_empty(&cwq->worklist)) {
907                 struct work_struct *work =
908                         list_first_entry(&cwq->worklist,
909                                          struct work_struct, entry);
910
911                 if (likely(!(*work_data_bits(work) & WORK_STRUCT_LINKED))) {
912                         /* optimization path, not strictly necessary */
913                         process_one_work(worker, work);
914                         if (unlikely(!list_empty(&worker->scheduled)))
915                                 process_scheduled_works(worker);
916                 } else {
917                         move_linked_works(work, &worker->scheduled, NULL);
918                         process_scheduled_works(worker);
919                 }
920         }
921
922         /*
923          * gcwq->lock is held and there's no work to process, sleep.
924          * Workers are woken up only while holding gcwq->lock, so
925          * setting the current state before releasing gcwq->lock is
926          * enough to prevent losing any event.
927          */
928         worker_enter_idle(worker);
929         __set_current_state(TASK_INTERRUPTIBLE);
930         spin_unlock_irq(&gcwq->lock);
931         schedule();
932         goto woke_up;
933 }
934
935 struct wq_barrier {
936         struct work_struct      work;
937         struct completion       done;
938 };
939
940 static void wq_barrier_func(struct work_struct *work)
941 {
942         struct wq_barrier *barr = container_of(work, struct wq_barrier, work);
943         complete(&barr->done);
944 }
945
946 /**
947  * insert_wq_barrier - insert a barrier work
948  * @cwq: cwq to insert barrier into
949  * @barr: wq_barrier to insert
950  * @target: target work to attach @barr to
951  * @worker: worker currently executing @target, NULL if @target is not executing
952  *
953  * @barr is linked to @target such that @barr is completed only after
954  * @target finishes execution.  Please note that the ordering
955  * guarantee is observed only with respect to @target and on the local
956  * cpu.
957  *
958  * Currently, a queued barrier can't be canceled.  This is because
959  * try_to_grab_pending() can't determine whether the work to be
960  * grabbed is at the head of the queue and thus can't clear LINKED
961  * flag of the previous work while there must be a valid next work
962  * after a work with LINKED flag set.
963  *
964  * Note that when @worker is non-NULL, @target may be modified
965  * underneath us, so we can't reliably determine cwq from @target.
966  *
967  * CONTEXT:
968  * spin_lock_irq(gcwq->lock).
969  */
970 static void insert_wq_barrier(struct cpu_workqueue_struct *cwq,
971                               struct wq_barrier *barr,
972                               struct work_struct *target, struct worker *worker)
973 {
974         struct list_head *head;
975         unsigned int linked = 0;
976
977         /*
978          * debugobject calls are safe here even with gcwq->lock locked
979          * as we know for sure that this will not trigger any of the
980          * checks and call back into the fixup functions where we
981          * might deadlock.
982          */
983         INIT_WORK_ON_STACK(&barr->work, wq_barrier_func);
984         __set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(&barr->work));
985         init_completion(&barr->done);
986
987         /*
988          * If @target is currently being executed, schedule the
989          * barrier to the worker; otherwise, put it after @target.
990          */
991         if (worker)
992                 head = worker->scheduled.next;
993         else {
994                 unsigned long *bits = work_data_bits(target);
995
996                 head = target->entry.next;
997                 /* there can already be other linked works, inherit and set */
998                 linked = *bits & WORK_STRUCT_LINKED;
999                 __set_bit(WORK_STRUCT_LINKED_BIT, bits);
1000         }
1001
1002         debug_work_activate(&barr->work);
1003         insert_work(cwq, &barr->work, head,
1004                     work_color_to_flags(WORK_NO_COLOR) | linked);
1005 }
1006
1007 /**
1008  * flush_workqueue_prep_cwqs - prepare cwqs for workqueue flushing
1009  * @wq: workqueue being flushed
1010  * @flush_color: new flush color, < 0 for no-op
1011  * @work_color: new work color, < 0 for no-op
1012  *
1013  * Prepare cwqs for workqueue flushing.
1014  *
1015  * If @flush_color is non-negative, flush_color on all cwqs should be
1016  * -1.  If no cwq has in-flight commands at the specified color, all
1017  * cwq->flush_color's stay at -1 and %false is returned.  If any cwq
1018  * has in flight commands, its cwq->flush_color is set to
1019  * @flush_color, @wq->nr_cwqs_to_flush is updated accordingly, cwq
1020  * wakeup logic is armed and %true is returned.
1021  *
1022  * The caller should have initialized @wq->first_flusher prior to
1023  * calling this function with non-negative @flush_color.  If
1024  * @flush_color is negative, no flush color update is done and %false
1025  * is returned.
1026  *
1027  * If @work_color is non-negative, all cwqs should have the same
1028  * work_color which is previous to @work_color and all will be
1029  * advanced to @work_color.
1030  *
1031  * CONTEXT:
1032  * mutex_lock(wq->flush_mutex).
1033  *
1034  * RETURNS:
1035  * %true if @flush_color >= 0 and there's something to flush.  %false
1036  * otherwise.
1037  */
1038 static bool flush_workqueue_prep_cwqs(struct workqueue_struct *wq,
1039                                       int flush_color, int work_color)
1040 {
1041         bool wait = false;
1042         unsigned int cpu;
1043
1044         if (flush_color >= 0) {
1045                 BUG_ON(atomic_read(&wq->nr_cwqs_to_flush));
1046                 atomic_set(&wq->nr_cwqs_to_flush, 1);
1047         }
1048
1049         for_each_possible_cpu(cpu) {
1050                 struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
1051                 struct global_cwq *gcwq = cwq->gcwq;
1052
1053                 spin_lock_irq(&gcwq->lock);
1054
1055                 if (flush_color >= 0) {
1056                         BUG_ON(cwq->flush_color != -1);
1057
1058                         if (cwq->nr_in_flight[flush_color]) {
1059                                 cwq->flush_color = flush_color;
1060                                 atomic_inc(&wq->nr_cwqs_to_flush);
1061                                 wait = true;
1062                         }
1063                 }
1064
1065                 if (work_color >= 0) {
1066                         BUG_ON(work_color != work_next_color(cwq->work_color));
1067                         cwq->work_color = work_color;
1068                 }
1069
1070                 spin_unlock_irq(&gcwq->lock);
1071         }
1072
1073         if (flush_color >= 0 && atomic_dec_and_test(&wq->nr_cwqs_to_flush))
1074                 complete(&wq->first_flusher->done);
1075
1076         return wait;
1077 }
1078
1079 /**
1080  * flush_workqueue - ensure that any scheduled work has run to completion.
1081  * @wq: workqueue to flush
1082  *
1083  * Forces execution of the workqueue and blocks until its completion.
1084  * This is typically used in driver shutdown handlers.
1085  *
1086  * We sleep until all works which were queued on entry have been handled,
1087  * but we are not livelocked by new incoming ones.
1088  */
1089 void flush_workqueue(struct workqueue_struct *wq)
1090 {
1091         struct wq_flusher this_flusher = {
1092                 .list = LIST_HEAD_INIT(this_flusher.list),
1093                 .flush_color = -1,
1094                 .done = COMPLETION_INITIALIZER_ONSTACK(this_flusher.done),
1095         };
1096         int next_color;
1097
1098         lock_map_acquire(&wq->lockdep_map);
1099         lock_map_release(&wq->lockdep_map);
1100
1101         mutex_lock(&wq->flush_mutex);
1102
1103         /*
1104          * Start-to-wait phase
1105          */
1106         next_color = work_next_color(wq->work_color);
1107
1108         if (next_color != wq->flush_color) {
1109                 /*
1110                  * Color space is not full.  The current work_color
1111                  * becomes our flush_color and work_color is advanced
1112                  * by one.
1113                  */
1114                 BUG_ON(!list_empty(&wq->flusher_overflow));
1115                 this_flusher.flush_color = wq->work_color;
1116                 wq->work_color = next_color;
1117
1118                 if (!wq->first_flusher) {
1119                         /* no flush in progress, become the first flusher */
1120                         BUG_ON(wq->flush_color != this_flusher.flush_color);
1121
1122                         wq->first_flusher = &this_flusher;
1123
1124                         if (!flush_workqueue_prep_cwqs(wq, wq->flush_color,
1125                                                        wq->work_color)) {
1126                                 /* nothing to flush, done */
1127                                 wq->flush_color = next_color;
1128                                 wq->first_flusher = NULL;
1129                                 goto out_unlock;
1130                         }
1131                 } else {
1132                         /* wait in queue */
1133                         BUG_ON(wq->flush_color == this_flusher.flush_color);
1134                         list_add_tail(&this_flusher.list, &wq->flusher_queue);
1135                         flush_workqueue_prep_cwqs(wq, -1, wq->work_color);
1136                 }
1137         } else {
1138                 /*
1139                  * Oops, color space is full, wait on overflow queue.
1140                  * The next flush completion will assign us
1141                  * flush_color and transfer to flusher_queue.
1142                  */
1143                 list_add_tail(&this_flusher.list, &wq->flusher_overflow);
1144         }
1145
1146         mutex_unlock(&wq->flush_mutex);
1147
1148         wait_for_completion(&this_flusher.done);
1149
1150         /*
1151          * Wake-up-and-cascade phase
1152          *
1153          * First flushers are responsible for cascading flushes and
1154          * handling overflow.  Non-first flushers can simply return.
1155          */
1156         if (wq->first_flusher != &this_flusher)
1157                 return;
1158
1159         mutex_lock(&wq->flush_mutex);
1160
1161         wq->first_flusher = NULL;
1162
1163         BUG_ON(!list_empty(&this_flusher.list));
1164         BUG_ON(wq->flush_color != this_flusher.flush_color);
1165
1166         while (true) {
1167                 struct wq_flusher *next, *tmp;
1168
1169                 /* complete all the flushers sharing the current flush color */
1170                 list_for_each_entry_safe(next, tmp, &wq->flusher_queue, list) {
1171                         if (next->flush_color != wq->flush_color)
1172                                 break;
1173                         list_del_init(&next->list);
1174                         complete(&next->done);
1175                 }
1176
1177                 BUG_ON(!list_empty(&wq->flusher_overflow) &&
1178                        wq->flush_color != work_next_color(wq->work_color));
1179
1180                 /* this flush_color is finished, advance by one */
1181                 wq->flush_color = work_next_color(wq->flush_color);
1182
1183                 /* one color has been freed, handle overflow queue */
1184                 if (!list_empty(&wq->flusher_overflow)) {
1185                         /*
1186                          * Assign the same color to all overflowed
1187                          * flushers, advance work_color and append to
1188                          * flusher_queue.  This is the start-to-wait
1189                          * phase for these overflowed flushers.
1190                          */
1191                         list_for_each_entry(tmp, &wq->flusher_overflow, list)
1192                                 tmp->flush_color = wq->work_color;
1193
1194                         wq->work_color = work_next_color(wq->work_color);
1195
1196                         list_splice_tail_init(&wq->flusher_overflow,
1197                                               &wq->flusher_queue);
1198                         flush_workqueue_prep_cwqs(wq, -1, wq->work_color);
1199                 }
1200
1201                 if (list_empty(&wq->flusher_queue)) {
1202                         BUG_ON(wq->flush_color != wq->work_color);
1203                         break;
1204                 }
1205
1206                 /*
1207                  * Need to flush more colors.  Make the next flusher
1208                  * the new first flusher and arm cwqs.
1209                  */
1210                 BUG_ON(wq->flush_color == wq->work_color);
1211                 BUG_ON(wq->flush_color != next->flush_color);
1212
1213                 list_del_init(&next->list);
1214                 wq->first_flusher = next;
1215
1216                 if (flush_workqueue_prep_cwqs(wq, wq->flush_color, -1))
1217                         break;
1218
1219                 /*
1220                  * Meh... this color is already done, clear first
1221                  * flusher and repeat cascading.
1222                  */
1223                 wq->first_flusher = NULL;
1224         }
1225
1226 out_unlock:
1227         mutex_unlock(&wq->flush_mutex);
1228 }
1229 EXPORT_SYMBOL_GPL(flush_workqueue);
1230
1231 /**
1232  * flush_work - block until a work_struct's callback has terminated
1233  * @work: the work which is to be flushed
1234  *
1235  * Returns false if @work has already terminated.
1236  *
1237  * It is expected that, prior to calling flush_work(), the caller has
1238  * arranged for the work to not be requeued, otherwise it doesn't make
1239  * sense to use this function.
1240  */
1241 int flush_work(struct work_struct *work)
1242 {
1243         struct worker *worker = NULL;
1244         struct cpu_workqueue_struct *cwq;
1245         struct global_cwq *gcwq;
1246         struct wq_barrier barr;
1247
1248         might_sleep();
1249         cwq = get_wq_data(work);
1250         if (!cwq)
1251                 return 0;
1252         gcwq = cwq->gcwq;
1253
1254         lock_map_acquire(&cwq->wq->lockdep_map);
1255         lock_map_release(&cwq->wq->lockdep_map);
1256
1257         spin_lock_irq(&gcwq->lock);
1258         if (!list_empty(&work->entry)) {
1259                 /*
1260                  * See the comment near try_to_grab_pending()->smp_rmb().
1261                  * If it was re-queued under us we are not going to wait.
1262                  */
1263                 smp_rmb();
1264                 if (unlikely(cwq != get_wq_data(work)))
1265                         goto already_gone;
1266         } else {
1267                 if (cwq->worker && cwq->worker->current_work == work)
1268                         worker = cwq->worker;
1269                 if (!worker)
1270                         goto already_gone;
1271         }
1272
1273         insert_wq_barrier(cwq, &barr, work, worker);
1274         spin_unlock_irq(&gcwq->lock);
1275         wait_for_completion(&barr.done);
1276         destroy_work_on_stack(&barr.work);
1277         return 1;
1278 already_gone:
1279         spin_unlock_irq(&gcwq->lock);
1280         return 0;
1281 }
1282 EXPORT_SYMBOL_GPL(flush_work);
1283
1284 /*
1285  * Upon a successful return (>= 0), the caller "owns" WORK_STRUCT_PENDING bit,
1286  * so this work can't be re-armed in any way.
1287  */
1288 static int try_to_grab_pending(struct work_struct *work)
1289 {
1290         struct global_cwq *gcwq;
1291         struct cpu_workqueue_struct *cwq;
1292         int ret = -1;
1293
1294         if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work)))
1295                 return 0;
1296
1297         /*
1298          * The queueing is in progress, or it is already queued. Try to
1299          * steal it from ->worklist without clearing WORK_STRUCT_PENDING.
1300          */
1301
1302         cwq = get_wq_data(work);
1303         if (!cwq)
1304                 return ret;
1305         gcwq = cwq->gcwq;
1306
1307         spin_lock_irq(&gcwq->lock);
1308         if (!list_empty(&work->entry)) {
1309                 /*
1310                  * This work is queued, but perhaps we locked the wrong cwq.
1311                  * In that case we must see the new value after rmb(), see
1312                  * insert_work()->wmb().
1313                  */
1314                 smp_rmb();
1315                 if (cwq == get_wq_data(work)) {
1316                         debug_work_deactivate(work);
1317                         list_del_init(&work->entry);
1318                         cwq_dec_nr_in_flight(cwq, get_work_color(work));
1319                         ret = 1;
1320                 }
1321         }
1322         spin_unlock_irq(&gcwq->lock);
1323
1324         return ret;
1325 }
1326
1327 static void wait_on_cpu_work(struct cpu_workqueue_struct *cwq,
1328                                 struct work_struct *work)
1329 {
1330         struct global_cwq *gcwq = cwq->gcwq;
1331         struct wq_barrier barr;
1332         struct worker *worker;
1333
1334         spin_lock_irq(&gcwq->lock);
1335
1336         worker = NULL;
1337         if (unlikely(cwq->worker && cwq->worker->current_work == work)) {
1338                 worker = cwq->worker;
1339                 insert_wq_barrier(cwq, &barr, work, worker);
1340         }
1341
1342         spin_unlock_irq(&gcwq->lock);
1343
1344         if (unlikely(worker)) {
1345                 wait_for_completion(&barr.done);
1346                 destroy_work_on_stack(&barr.work);
1347         }
1348 }
1349
1350 static void wait_on_work(struct work_struct *work)
1351 {
1352         struct cpu_workqueue_struct *cwq;
1353         struct workqueue_struct *wq;
1354         int cpu;
1355
1356         might_sleep();
1357
1358         lock_map_acquire(&work->lockdep_map);
1359         lock_map_release(&work->lockdep_map);
1360
1361         cwq = get_wq_data(work);
1362         if (!cwq)
1363                 return;
1364
1365         wq = cwq->wq;
1366
1367         for_each_possible_cpu(cpu)
1368                 wait_on_cpu_work(get_cwq(cpu, wq), work);
1369 }
1370
1371 static int __cancel_work_timer(struct work_struct *work,
1372                                 struct timer_list* timer)
1373 {
1374         int ret;
1375
1376         do {
1377                 ret = (timer && likely(del_timer(timer)));
1378                 if (!ret)
1379                         ret = try_to_grab_pending(work);
1380                 wait_on_work(work);
1381         } while (unlikely(ret < 0));
1382
1383         clear_wq_data(work);
1384         return ret;
1385 }
1386
1387 /**
1388  * cancel_work_sync - block until a work_struct's callback has terminated
1389  * @work: the work which is to be flushed
1390  *
1391  * Returns true if @work was pending.
1392  *
1393  * cancel_work_sync() will cancel the work if it is queued. If the work's
1394  * callback appears to be running, cancel_work_sync() will block until it
1395  * has completed.
1396  *
1397  * It is possible to use this function if the work re-queues itself. It can
1398  * cancel the work even if it migrates to another workqueue, however in that
1399  * case it only guarantees that work->func() has completed on the last queued
1400  * workqueue.
1401  *
1402  * cancel_work_sync(&delayed_work->work) should be used only if ->timer is not
1403  * pending, otherwise it goes into a busy-wait loop until the timer expires.
1404  *
1405  * The caller must ensure that workqueue_struct on which this work was last
1406  * queued can't be destroyed before this function returns.
1407  */
1408 int cancel_work_sync(struct work_struct *work)
1409 {
1410         return __cancel_work_timer(work, NULL);
1411 }
1412 EXPORT_SYMBOL_GPL(cancel_work_sync);
1413
1414 /**
1415  * cancel_delayed_work_sync - reliably kill off a delayed work.
1416  * @dwork: the delayed work struct
1417  *
1418  * Returns true if @dwork was pending.
1419  *
1420  * It is possible to use this function if @dwork rearms itself via queue_work()
1421  * or queue_delayed_work(). See also the comment for cancel_work_sync().
1422  */
1423 int cancel_delayed_work_sync(struct delayed_work *dwork)
1424 {
1425         return __cancel_work_timer(&dwork->work, &dwork->timer);
1426 }
1427 EXPORT_SYMBOL(cancel_delayed_work_sync);
1428
1429 static struct workqueue_struct *keventd_wq __read_mostly;
1430
1431 /**
1432  * schedule_work - put work task in global workqueue
1433  * @work: job to be done
1434  *
1435  * Returns zero if @work was already on the kernel-global workqueue and
1436  * non-zero otherwise.
1437  *
1438  * This puts a job in the kernel-global workqueue if it was not already
1439  * queued and leaves it in the same position on the kernel-global
1440  * workqueue otherwise.
1441  */
1442 int schedule_work(struct work_struct *work)
1443 {
1444         return queue_work(keventd_wq, work);
1445 }
1446 EXPORT_SYMBOL(schedule_work);
1447
1448 /*
1449  * schedule_work_on - put work task on a specific cpu
1450  * @cpu: cpu to put the work task on
1451  * @work: job to be done
1452  *
1453  * This puts a job on a specific cpu
1454  */
1455 int schedule_work_on(int cpu, struct work_struct *work)
1456 {
1457         return queue_work_on(cpu, keventd_wq, work);
1458 }
1459 EXPORT_SYMBOL(schedule_work_on);
1460
1461 /**
1462  * schedule_delayed_work - put work task in global workqueue after delay
1463  * @dwork: job to be done
1464  * @delay: number of jiffies to wait or 0 for immediate execution
1465  *
1466  * After waiting for a given time this puts a job in the kernel-global
1467  * workqueue.
1468  */
1469 int schedule_delayed_work(struct delayed_work *dwork,
1470                                         unsigned long delay)
1471 {
1472         return queue_delayed_work(keventd_wq, dwork, delay);
1473 }
1474 EXPORT_SYMBOL(schedule_delayed_work);
1475
1476 /**
1477  * flush_delayed_work - block until a dwork_struct's callback has terminated
1478  * @dwork: the delayed work which is to be flushed
1479  *
1480  * Any timeout is cancelled, and any pending work is run immediately.
1481  */
1482 void flush_delayed_work(struct delayed_work *dwork)
1483 {
1484         if (del_timer_sync(&dwork->timer)) {
1485                 __queue_work(get_cpu(), get_wq_data(&dwork->work)->wq,
1486                              &dwork->work);
1487                 put_cpu();
1488         }
1489         flush_work(&dwork->work);
1490 }
1491 EXPORT_SYMBOL(flush_delayed_work);
1492
1493 /**
1494  * schedule_delayed_work_on - queue work in global workqueue on CPU after delay
1495  * @cpu: cpu to use
1496  * @dwork: job to be done
1497  * @delay: number of jiffies to wait
1498  *
1499  * After waiting for a given time this puts a job in the kernel-global
1500  * workqueue on the specified CPU.
1501  */
1502 int schedule_delayed_work_on(int cpu,
1503                         struct delayed_work *dwork, unsigned long delay)
1504 {
1505         return queue_delayed_work_on(cpu, keventd_wq, dwork, delay);
1506 }
1507 EXPORT_SYMBOL(schedule_delayed_work_on);
1508
1509 /**
1510  * schedule_on_each_cpu - call a function on each online CPU from keventd
1511  * @func: the function to call
1512  *
1513  * Returns zero on success.
1514  * Returns -ve errno on failure.
1515  *
1516  * schedule_on_each_cpu() is very slow.
1517  */
1518 int schedule_on_each_cpu(work_func_t func)
1519 {
1520         int cpu;
1521         int orig = -1;
1522         struct work_struct *works;
1523
1524         works = alloc_percpu(struct work_struct);
1525         if (!works)
1526                 return -ENOMEM;
1527
1528         get_online_cpus();
1529
1530         /*
1531          * When running in keventd don't schedule a work item on
1532          * itself.  Can just call directly because the work queue is
1533          * already bound.  This also is faster.
1534          */
1535         if (current_is_keventd())
1536                 orig = raw_smp_processor_id();
1537
1538         for_each_online_cpu(cpu) {
1539                 struct work_struct *work = per_cpu_ptr(works, cpu);
1540
1541                 INIT_WORK(work, func);
1542                 if (cpu != orig)
1543                         schedule_work_on(cpu, work);
1544         }
1545         if (orig >= 0)
1546                 func(per_cpu_ptr(works, orig));
1547
1548         for_each_online_cpu(cpu)
1549                 flush_work(per_cpu_ptr(works, cpu));
1550
1551         put_online_cpus();
1552         free_percpu(works);
1553         return 0;
1554 }
1555
1556 /**
1557  * flush_scheduled_work - ensure that any scheduled work has run to completion.
1558  *
1559  * Forces execution of the kernel-global workqueue and blocks until its
1560  * completion.
1561  *
1562  * Think twice before calling this function!  It's very easy to get into
1563  * trouble if you don't take great care.  Either of the following situations
1564  * will lead to deadlock:
1565  *
1566  *      One of the work items currently on the workqueue needs to acquire
1567  *      a lock held by your code or its caller.
1568  *
1569  *      Your code is running in the context of a work routine.
1570  *
1571  * They will be detected by lockdep when they occur, but the first might not
1572  * occur very often.  It depends on what work items are on the workqueue and
1573  * what locks they need, which you have no control over.
1574  *
1575  * In most situations flushing the entire workqueue is overkill; you merely
1576  * need to know that a particular work item isn't queued and isn't running.
1577  * In such cases you should use cancel_delayed_work_sync() or
1578  * cancel_work_sync() instead.
1579  */
1580 void flush_scheduled_work(void)
1581 {
1582         flush_workqueue(keventd_wq);
1583 }
1584 EXPORT_SYMBOL(flush_scheduled_work);
1585
1586 /**
1587  * execute_in_process_context - reliably execute the routine with user context
1588  * @fn:         the function to execute
1589  * @ew:         guaranteed storage for the execute work structure (must
1590  *              be available when the work executes)
1591  *
1592  * Executes the function immediately if process context is available,
1593  * otherwise schedules the function for delayed execution.
1594  *
1595  * Returns:     0 - function was executed
1596  *              1 - function was scheduled for execution
1597  */
1598 int execute_in_process_context(work_func_t fn, struct execute_work *ew)
1599 {
1600         if (!in_interrupt()) {
1601                 fn(&ew->work);
1602                 return 0;
1603         }
1604
1605         INIT_WORK(&ew->work, fn);
1606         schedule_work(&ew->work);
1607
1608         return 1;
1609 }
1610 EXPORT_SYMBOL_GPL(execute_in_process_context);
1611
1612 int keventd_up(void)
1613 {
1614         return keventd_wq != NULL;
1615 }
1616
1617 int current_is_keventd(void)
1618 {
1619         struct cpu_workqueue_struct *cwq;
1620         int cpu = raw_smp_processor_id(); /* preempt-safe: keventd is per-cpu */
1621         int ret = 0;
1622
1623         BUG_ON(!keventd_wq);
1624
1625         cwq = get_cwq(cpu, keventd_wq);
1626         if (current == cwq->worker->task)
1627                 ret = 1;
1628
1629         return ret;
1630
1631 }
1632
1633 static struct cpu_workqueue_struct *alloc_cwqs(void)
1634 {
1635         /*
1636          * cwqs are forced aligned according to WORK_STRUCT_FLAG_BITS.
1637          * Make sure that the alignment isn't lower than that of
1638          * unsigned long long.
1639          */
1640         const size_t size = sizeof(struct cpu_workqueue_struct);
1641         const size_t align = max_t(size_t, 1 << WORK_STRUCT_FLAG_BITS,
1642                                    __alignof__(unsigned long long));
1643         struct cpu_workqueue_struct *cwqs;
1644 #ifndef CONFIG_SMP
1645         void *ptr;
1646
1647         /*
1648          * On UP, percpu allocator doesn't honor alignment parameter
1649          * and simply uses arch-dependent default.  Allocate enough
1650          * room to align cwq and put an extra pointer at the end
1651          * pointing back to the originally allocated pointer which
1652          * will be used for free.
1653          *
1654          * FIXME: This really belongs to UP percpu code.  Update UP
1655          * percpu code to honor alignment and remove this ugliness.
1656          */
1657         ptr = __alloc_percpu(size + align + sizeof(void *), 1);
1658         cwqs = PTR_ALIGN(ptr, align);
1659         *(void **)per_cpu_ptr(cwqs + 1, 0) = ptr;
1660 #else
1661         /* On SMP, percpu allocator can do it itself */
1662         cwqs = __alloc_percpu(size, align);
1663 #endif
1664         /* just in case, make sure it's actually aligned */
1665         BUG_ON(!IS_ALIGNED((unsigned long)cwqs, align));
1666         return cwqs;
1667 }
1668
1669 static void free_cwqs(struct cpu_workqueue_struct *cwqs)
1670 {
1671 #ifndef CONFIG_SMP
1672         /* on UP, the pointer to free is stored right after the cwq */
1673         if (cwqs)
1674                 free_percpu(*(void **)per_cpu_ptr(cwqs + 1, 0));
1675 #else
1676         free_percpu(cwqs);
1677 #endif
1678 }
1679
1680 struct workqueue_struct *__create_workqueue_key(const char *name,
1681                                                 unsigned int flags,
1682                                                 int max_active,
1683                                                 struct lock_class_key *key,
1684                                                 const char *lock_name)
1685 {
1686         bool singlethread = flags & WQ_SINGLE_THREAD;
1687         struct workqueue_struct *wq;
1688         bool failed = false;
1689         unsigned int cpu;
1690
1691         max_active = clamp_val(max_active, 1, INT_MAX);
1692
1693         wq = kzalloc(sizeof(*wq), GFP_KERNEL);
1694         if (!wq)
1695                 goto err;
1696
1697         wq->cpu_wq = alloc_cwqs();
1698         if (!wq->cpu_wq)
1699                 goto err;
1700
1701         wq->flags = flags;
1702         wq->saved_max_active = max_active;
1703         mutex_init(&wq->flush_mutex);
1704         atomic_set(&wq->nr_cwqs_to_flush, 0);
1705         INIT_LIST_HEAD(&wq->flusher_queue);
1706         INIT_LIST_HEAD(&wq->flusher_overflow);
1707         wq->name = name;
1708         lockdep_init_map(&wq->lockdep_map, lock_name, key, 0);
1709         INIT_LIST_HEAD(&wq->list);
1710
1711         cpu_maps_update_begin();
1712         /*
1713          * We must initialize cwqs for each possible cpu even if we
1714          * are going to call destroy_workqueue() finally. Otherwise
1715          * cpu_up() can hit the uninitialized cwq once we drop the
1716          * lock.
1717          */
1718         for_each_possible_cpu(cpu) {
1719                 struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
1720                 struct global_cwq *gcwq = get_gcwq(cpu);
1721
1722                 BUG_ON((unsigned long)cwq & WORK_STRUCT_FLAG_MASK);
1723                 cwq->gcwq = gcwq;
1724                 cwq->wq = wq;
1725                 cwq->flush_color = -1;
1726                 cwq->max_active = max_active;
1727                 INIT_LIST_HEAD(&cwq->worklist);
1728                 INIT_LIST_HEAD(&cwq->delayed_works);
1729
1730                 if (failed)
1731                         continue;
1732                 cwq->worker = create_worker(cwq,
1733                                             cpu_online(cpu) && !singlethread);
1734                 if (cwq->worker)
1735                         start_worker(cwq->worker);
1736                 else
1737                         failed = true;
1738         }
1739
1740         /*
1741          * workqueue_lock protects global freeze state and workqueues
1742          * list.  Grab it, set max_active accordingly and add the new
1743          * workqueue to workqueues list.
1744          */
1745         spin_lock(&workqueue_lock);
1746
1747         if (workqueue_freezing && wq->flags & WQ_FREEZEABLE)
1748                 for_each_possible_cpu(cpu)
1749                         get_cwq(cpu, wq)->max_active = 0;
1750
1751         list_add(&wq->list, &workqueues);
1752
1753         spin_unlock(&workqueue_lock);
1754
1755         cpu_maps_update_done();
1756
1757         if (failed) {
1758                 destroy_workqueue(wq);
1759                 wq = NULL;
1760         }
1761         return wq;
1762 err:
1763         if (wq) {
1764                 free_cwqs(wq->cpu_wq);
1765                 kfree(wq);
1766         }
1767         return NULL;
1768 }
1769 EXPORT_SYMBOL_GPL(__create_workqueue_key);
1770
1771 /**
1772  * destroy_workqueue - safely terminate a workqueue
1773  * @wq: target workqueue
1774  *
1775  * Safely destroy a workqueue. All work currently pending will be done first.
1776  */
1777 void destroy_workqueue(struct workqueue_struct *wq)
1778 {
1779         unsigned int cpu;
1780
1781         flush_workqueue(wq);
1782
1783         /*
1784          * wq list is used to freeze wq, remove from list after
1785          * flushing is complete in case freeze races us.
1786          */
1787         cpu_maps_update_begin();
1788         spin_lock(&workqueue_lock);
1789         list_del(&wq->list);
1790         spin_unlock(&workqueue_lock);
1791         cpu_maps_update_done();
1792
1793         for_each_possible_cpu(cpu) {
1794                 struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
1795                 int i;
1796
1797                 if (cwq->worker) {
1798                         spin_lock_irq(&cwq->gcwq->lock);
1799                         destroy_worker(cwq->worker);
1800                         cwq->worker = NULL;
1801                         spin_unlock_irq(&cwq->gcwq->lock);
1802                 }
1803
1804                 for (i = 0; i < WORK_NR_COLORS; i++)
1805                         BUG_ON(cwq->nr_in_flight[i]);
1806                 BUG_ON(cwq->nr_active);
1807                 BUG_ON(!list_empty(&cwq->delayed_works));
1808         }
1809
1810         free_cwqs(wq->cpu_wq);
1811         kfree(wq);
1812 }
1813 EXPORT_SYMBOL_GPL(destroy_workqueue);
1814
1815 static int __devinit workqueue_cpu_callback(struct notifier_block *nfb,
1816                                                 unsigned long action,
1817                                                 void *hcpu)
1818 {
1819         unsigned int cpu = (unsigned long)hcpu;
1820         struct cpu_workqueue_struct *cwq;
1821         struct workqueue_struct *wq;
1822
1823         action &= ~CPU_TASKS_FROZEN;
1824
1825         list_for_each_entry(wq, &workqueues, list) {
1826                 if (wq->flags & WQ_SINGLE_THREAD)
1827                         continue;
1828
1829                 cwq = get_cwq(cpu, wq);
1830
1831                 switch (action) {
1832                 case CPU_POST_DEAD:
1833                         flush_workqueue(wq);
1834                         break;
1835                 }
1836         }
1837
1838         return notifier_from_errno(0);
1839 }
1840
1841 #ifdef CONFIG_SMP
1842
1843 struct work_for_cpu {
1844         struct completion completion;
1845         long (*fn)(void *);
1846         void *arg;
1847         long ret;
1848 };
1849
1850 static int do_work_for_cpu(void *_wfc)
1851 {
1852         struct work_for_cpu *wfc = _wfc;
1853         wfc->ret = wfc->fn(wfc->arg);
1854         complete(&wfc->completion);
1855         return 0;
1856 }
1857
1858 /**
1859  * work_on_cpu - run a function in user context on a particular cpu
1860  * @cpu: the cpu to run on
1861  * @fn: the function to run
1862  * @arg: the function arg
1863  *
1864  * This will return the value @fn returns.
1865  * It is up to the caller to ensure that the cpu doesn't go offline.
1866  * The caller must not hold any locks which would prevent @fn from completing.
1867  */
1868 long work_on_cpu(unsigned int cpu, long (*fn)(void *), void *arg)
1869 {
1870         struct task_struct *sub_thread;
1871         struct work_for_cpu wfc = {
1872                 .completion = COMPLETION_INITIALIZER_ONSTACK(wfc.completion),
1873                 .fn = fn,
1874                 .arg = arg,
1875         };
1876
1877         sub_thread = kthread_create(do_work_for_cpu, &wfc, "work_for_cpu");
1878         if (IS_ERR(sub_thread))
1879                 return PTR_ERR(sub_thread);
1880         kthread_bind(sub_thread, cpu);
1881         wake_up_process(sub_thread);
1882         wait_for_completion(&wfc.completion);
1883         return wfc.ret;
1884 }
1885 EXPORT_SYMBOL_GPL(work_on_cpu);
1886 #endif /* CONFIG_SMP */
1887
1888 #ifdef CONFIG_FREEZER
1889
1890 /**
1891  * freeze_workqueues_begin - begin freezing workqueues
1892  *
1893  * Start freezing workqueues.  After this function returns, all
1894  * freezeable workqueues will queue new works to their frozen_works
1895  * list instead of the cwq ones.
1896  *
1897  * CONTEXT:
1898  * Grabs and releases workqueue_lock and gcwq->lock's.
1899  */
1900 void freeze_workqueues_begin(void)
1901 {
1902         struct workqueue_struct *wq;
1903         unsigned int cpu;
1904
1905         spin_lock(&workqueue_lock);
1906
1907         BUG_ON(workqueue_freezing);
1908         workqueue_freezing = true;
1909
1910         for_each_possible_cpu(cpu) {
1911                 struct global_cwq *gcwq = get_gcwq(cpu);
1912
1913                 spin_lock_irq(&gcwq->lock);
1914
1915                 list_for_each_entry(wq, &workqueues, list) {
1916                         struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
1917
1918                         if (wq->flags & WQ_FREEZEABLE)
1919                                 cwq->max_active = 0;
1920                 }
1921
1922                 spin_unlock_irq(&gcwq->lock);
1923         }
1924
1925         spin_unlock(&workqueue_lock);
1926 }
1927
1928 /**
1929  * freeze_workqueues_busy - are freezeable workqueues still busy?
1930  *
1931  * Check whether freezing is complete.  This function must be called
1932  * between freeze_workqueues_begin() and thaw_workqueues().
1933  *
1934  * CONTEXT:
1935  * Grabs and releases workqueue_lock.
1936  *
1937  * RETURNS:
1938  * %true if some freezeable workqueues are still busy.  %false if
1939  * freezing is complete.
1940  */
1941 bool freeze_workqueues_busy(void)
1942 {
1943         struct workqueue_struct *wq;
1944         unsigned int cpu;
1945         bool busy = false;
1946
1947         spin_lock(&workqueue_lock);
1948
1949         BUG_ON(!workqueue_freezing);
1950
1951         for_each_possible_cpu(cpu) {
1952                 /*
1953                  * nr_active is monotonically decreasing.  It's safe
1954                  * to peek without lock.
1955                  */
1956                 list_for_each_entry(wq, &workqueues, list) {
1957                         struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
1958
1959                         if (!(wq->flags & WQ_FREEZEABLE))
1960                                 continue;
1961
1962                         BUG_ON(cwq->nr_active < 0);
1963                         if (cwq->nr_active) {
1964                                 busy = true;
1965                                 goto out_unlock;
1966                         }
1967                 }
1968         }
1969 out_unlock:
1970         spin_unlock(&workqueue_lock);
1971         return busy;
1972 }
1973
1974 /**
1975  * thaw_workqueues - thaw workqueues
1976  *
1977  * Thaw workqueues.  Normal queueing is restored and all collected
1978  * frozen works are transferred to their respective cwq worklists.
1979  *
1980  * CONTEXT:
1981  * Grabs and releases workqueue_lock and gcwq->lock's.
1982  */
1983 void thaw_workqueues(void)
1984 {
1985         struct workqueue_struct *wq;
1986         unsigned int cpu;
1987
1988         spin_lock(&workqueue_lock);
1989
1990         if (!workqueue_freezing)
1991                 goto out_unlock;
1992
1993         for_each_possible_cpu(cpu) {
1994                 struct global_cwq *gcwq = get_gcwq(cpu);
1995
1996                 spin_lock_irq(&gcwq->lock);
1997
1998                 list_for_each_entry(wq, &workqueues, list) {
1999                         struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
2000
2001                         if (!(wq->flags & WQ_FREEZEABLE))
2002                                 continue;
2003
2004                         /* restore max_active and repopulate worklist */
2005                         cwq->max_active = wq->saved_max_active;
2006
2007                         while (!list_empty(&cwq->delayed_works) &&
2008                                cwq->nr_active < cwq->max_active)
2009                                 cwq_activate_first_delayed(cwq);
2010
2011                         wake_up_process(cwq->worker->task);
2012                 }
2013
2014                 spin_unlock_irq(&gcwq->lock);
2015         }
2016
2017         workqueue_freezing = false;
2018 out_unlock:
2019         spin_unlock(&workqueue_lock);
2020 }
2021 #endif /* CONFIG_FREEZER */
2022
2023 void __init init_workqueues(void)
2024 {
2025         unsigned int cpu;
2026         int i;
2027
2028         singlethread_cpu = cpumask_first(cpu_possible_mask);
2029         hotcpu_notifier(workqueue_cpu_callback, 0);
2030
2031         /* initialize gcwqs */
2032         for_each_possible_cpu(cpu) {
2033                 struct global_cwq *gcwq = get_gcwq(cpu);
2034
2035                 spin_lock_init(&gcwq->lock);
2036                 gcwq->cpu = cpu;
2037
2038                 INIT_LIST_HEAD(&gcwq->idle_list);
2039                 for (i = 0; i < BUSY_WORKER_HASH_SIZE; i++)
2040                         INIT_HLIST_HEAD(&gcwq->busy_hash[i]);
2041
2042                 ida_init(&gcwq->worker_ida);
2043         }
2044
2045         keventd_wq = create_workqueue("events");
2046         BUG_ON(!keventd_wq);
2047 }