9489a0a9b1be69ed21234d9211df1f05a9857ec5
[linux-2.6.git] / kernel / tracepoint.c
1 /*
2  * Copyright (C) 2008 Mathieu Desnoyers
3  *
4  * This program is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License as published by
6  * the Free Software Foundation; either version 2 of the License, or
7  * (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write to the Free Software
16  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17  */
18 #include <linux/module.h>
19 #include <linux/mutex.h>
20 #include <linux/types.h>
21 #include <linux/jhash.h>
22 #include <linux/list.h>
23 #include <linux/rcupdate.h>
24 #include <linux/tracepoint.h>
25 #include <linux/err.h>
26 #include <linux/slab.h>
27 #include <linux/sched.h>
28
29 extern struct tracepoint __start___tracepoints[];
30 extern struct tracepoint __stop___tracepoints[];
31
32 /* Set to 1 to enable tracepoint debug output */
33 static const int tracepoint_debug;
34
35 /*
36  * tracepoints_mutex nests inside module_mutex. Tracepoints mutex protects the
37  * builtin and module tracepoints and the hash table.
38  */
39 static DEFINE_MUTEX(tracepoints_mutex);
40
41 /*
42  * Tracepoint hash table, containing the active tracepoints.
43  * Protected by tracepoints_mutex.
44  */
45 #define TRACEPOINT_HASH_BITS 6
46 #define TRACEPOINT_TABLE_SIZE (1 << TRACEPOINT_HASH_BITS)
47 static struct hlist_head tracepoint_table[TRACEPOINT_TABLE_SIZE];
48
49 /*
50  * Note about RCU :
51  * It is used to to delay the free of multiple probes array until a quiescent
52  * state is reached.
53  * Tracepoint entries modifications are protected by the tracepoints_mutex.
54  */
55 struct tracepoint_entry {
56         struct hlist_node hlist;
57         void **funcs;
58         int refcount;   /* Number of times armed. 0 if disarmed. */
59         char name[0];
60 };
61
62 struct tp_probes {
63         union {
64                 struct rcu_head rcu;
65                 struct list_head list;
66         } u;
67         void *probes[0];
68 };
69
70 static inline void *allocate_probes(int count)
71 {
72         struct tp_probes *p  = kmalloc(count * sizeof(void *)
73                         + sizeof(struct tp_probes), GFP_KERNEL);
74         return p == NULL ? NULL : p->probes;
75 }
76
77 static void rcu_free_old_probes(struct rcu_head *head)
78 {
79         kfree(container_of(head, struct tp_probes, u.rcu));
80 }
81
82 static inline void release_probes(void *old)
83 {
84         if (old) {
85                 struct tp_probes *tp_probes = container_of(old,
86                         struct tp_probes, probes[0]);
87                 call_rcu_sched(&tp_probes->u.rcu, rcu_free_old_probes);
88         }
89 }
90
91 static void debug_print_probes(struct tracepoint_entry *entry)
92 {
93         int i;
94
95         if (!tracepoint_debug || !entry->funcs)
96                 return;
97
98         for (i = 0; entry->funcs[i]; i++)
99                 printk(KERN_DEBUG "Probe %d : %p\n", i, entry->funcs[i]);
100 }
101
102 static void *
103 tracepoint_entry_add_probe(struct tracepoint_entry *entry, void *probe)
104 {
105         int nr_probes = 0;
106         void **old, **new;
107
108         WARN_ON(!probe);
109
110         debug_print_probes(entry);
111         old = entry->funcs;
112         if (old) {
113                 /* (N -> N+1), (N != 0, 1) probes */
114                 for (nr_probes = 0; old[nr_probes]; nr_probes++)
115                         if (old[nr_probes] == probe)
116                                 return ERR_PTR(-EEXIST);
117         }
118         /* + 2 : one for new probe, one for NULL func */
119         new = allocate_probes(nr_probes + 2);
120         if (new == NULL)
121                 return ERR_PTR(-ENOMEM);
122         if (old)
123                 memcpy(new, old, nr_probes * sizeof(void *));
124         new[nr_probes] = probe;
125         new[nr_probes + 1] = NULL;
126         entry->refcount = nr_probes + 1;
127         entry->funcs = new;
128         debug_print_probes(entry);
129         return old;
130 }
131
132 static void *
133 tracepoint_entry_remove_probe(struct tracepoint_entry *entry, void *probe)
134 {
135         int nr_probes = 0, nr_del = 0, i;
136         void **old, **new;
137
138         old = entry->funcs;
139
140         if (!old)
141                 return ERR_PTR(-ENOENT);
142
143         debug_print_probes(entry);
144         /* (N -> M), (N > 1, M >= 0) probes */
145         for (nr_probes = 0; old[nr_probes]; nr_probes++) {
146                 if ((!probe || old[nr_probes] == probe))
147                         nr_del++;
148         }
149
150         if (nr_probes - nr_del == 0) {
151                 /* N -> 0, (N > 1) */
152                 entry->funcs = NULL;
153                 entry->refcount = 0;
154                 debug_print_probes(entry);
155                 return old;
156         } else {
157                 int j = 0;
158                 /* N -> M, (N > 1, M > 0) */
159                 /* + 1 for NULL */
160                 new = allocate_probes(nr_probes - nr_del + 1);
161                 if (new == NULL)
162                         return ERR_PTR(-ENOMEM);
163                 for (i = 0; old[i]; i++)
164                         if ((probe && old[i] != probe))
165                                 new[j++] = old[i];
166                 new[nr_probes - nr_del] = NULL;
167                 entry->refcount = nr_probes - nr_del;
168                 entry->funcs = new;
169         }
170         debug_print_probes(entry);
171         return old;
172 }
173
174 /*
175  * Get tracepoint if the tracepoint is present in the tracepoint hash table.
176  * Must be called with tracepoints_mutex held.
177  * Returns NULL if not present.
178  */
179 static struct tracepoint_entry *get_tracepoint(const char *name)
180 {
181         struct hlist_head *head;
182         struct hlist_node *node;
183         struct tracepoint_entry *e;
184         u32 hash = jhash(name, strlen(name), 0);
185
186         head = &tracepoint_table[hash & (TRACEPOINT_TABLE_SIZE - 1)];
187         hlist_for_each_entry(e, node, head, hlist) {
188                 if (!strcmp(name, e->name))
189                         return e;
190         }
191         return NULL;
192 }
193
194 /*
195  * Add the tracepoint to the tracepoint hash table. Must be called with
196  * tracepoints_mutex held.
197  */
198 static struct tracepoint_entry *add_tracepoint(const char *name)
199 {
200         struct hlist_head *head;
201         struct hlist_node *node;
202         struct tracepoint_entry *e;
203         size_t name_len = strlen(name) + 1;
204         u32 hash = jhash(name, name_len-1, 0);
205
206         head = &tracepoint_table[hash & (TRACEPOINT_TABLE_SIZE - 1)];
207         hlist_for_each_entry(e, node, head, hlist) {
208                 if (!strcmp(name, e->name)) {
209                         printk(KERN_NOTICE
210                                 "tracepoint %s busy\n", name);
211                         return ERR_PTR(-EEXIST);        /* Already there */
212                 }
213         }
214         /*
215          * Using kmalloc here to allocate a variable length element. Could
216          * cause some memory fragmentation if overused.
217          */
218         e = kmalloc(sizeof(struct tracepoint_entry) + name_len, GFP_KERNEL);
219         if (!e)
220                 return ERR_PTR(-ENOMEM);
221         memcpy(&e->name[0], name, name_len);
222         e->funcs = NULL;
223         e->refcount = 0;
224         hlist_add_head(&e->hlist, head);
225         return e;
226 }
227
228 /*
229  * Remove the tracepoint from the tracepoint hash table. Must be called with
230  * mutex_lock held.
231  */
232 static inline void remove_tracepoint(struct tracepoint_entry *e)
233 {
234         hlist_del(&e->hlist);
235         kfree(e);
236 }
237
238 /*
239  * Sets the probe callback corresponding to one tracepoint.
240  */
241 static void set_tracepoint(struct tracepoint_entry **entry,
242         struct tracepoint *elem, int active)
243 {
244         WARN_ON(strcmp((*entry)->name, elem->name) != 0);
245
246         if (elem->regfunc && !elem->state && active)
247                 elem->regfunc();
248         else if (elem->unregfunc && elem->state && !active)
249                 elem->unregfunc();
250
251         /*
252          * rcu_assign_pointer has a smp_wmb() which makes sure that the new
253          * probe callbacks array is consistent before setting a pointer to it.
254          * This array is referenced by __DO_TRACE from
255          * include/linux/tracepoints.h. A matching smp_read_barrier_depends()
256          * is used.
257          */
258         rcu_assign_pointer(elem->funcs, (*entry)->funcs);
259         elem->state = active;
260 }
261
262 /*
263  * Disable a tracepoint and its probe callback.
264  * Note: only waiting an RCU period after setting elem->call to the empty
265  * function insures that the original callback is not used anymore. This insured
266  * by preempt_disable around the call site.
267  */
268 static void disable_tracepoint(struct tracepoint *elem)
269 {
270         if (elem->unregfunc && elem->state)
271                 elem->unregfunc();
272
273         elem->state = 0;
274         rcu_assign_pointer(elem->funcs, NULL);
275 }
276
277 /**
278  * tracepoint_update_probe_range - Update a probe range
279  * @begin: beginning of the range
280  * @end: end of the range
281  *
282  * Updates the probe callback corresponding to a range of tracepoints.
283  */
284 void
285 tracepoint_update_probe_range(struct tracepoint *begin, struct tracepoint *end)
286 {
287         struct tracepoint *iter;
288         struct tracepoint_entry *mark_entry;
289
290         if (!begin)
291                 return;
292
293         mutex_lock(&tracepoints_mutex);
294         for (iter = begin; iter < end; iter++) {
295                 mark_entry = get_tracepoint(iter->name);
296                 if (mark_entry) {
297                         set_tracepoint(&mark_entry, iter,
298                                         !!mark_entry->refcount);
299                 } else {
300                         disable_tracepoint(iter);
301                 }
302         }
303         mutex_unlock(&tracepoints_mutex);
304 }
305
306 /*
307  * Update probes, removing the faulty probes.
308  */
309 static void tracepoint_update_probes(void)
310 {
311         /* Core kernel tracepoints */
312         tracepoint_update_probe_range(__start___tracepoints,
313                 __stop___tracepoints);
314         /* tracepoints in modules. */
315         module_update_tracepoints();
316 }
317
318 static void *tracepoint_add_probe(const char *name, void *probe)
319 {
320         struct tracepoint_entry *entry;
321         void *old;
322
323         entry = get_tracepoint(name);
324         if (!entry) {
325                 entry = add_tracepoint(name);
326                 if (IS_ERR(entry))
327                         return entry;
328         }
329         old = tracepoint_entry_add_probe(entry, probe);
330         if (IS_ERR(old) && !entry->refcount)
331                 remove_tracepoint(entry);
332         return old;
333 }
334
335 /**
336  * tracepoint_probe_register -  Connect a probe to a tracepoint
337  * @name: tracepoint name
338  * @probe: probe handler
339  *
340  * Returns 0 if ok, error value on error.
341  * The probe address must at least be aligned on the architecture pointer size.
342  */
343 int tracepoint_probe_register(const char *name, void *probe)
344 {
345         void *old;
346
347         mutex_lock(&tracepoints_mutex);
348         old = tracepoint_add_probe(name, probe);
349         mutex_unlock(&tracepoints_mutex);
350         if (IS_ERR(old))
351                 return PTR_ERR(old);
352
353         tracepoint_update_probes();             /* may update entry */
354         release_probes(old);
355         return 0;
356 }
357 EXPORT_SYMBOL_GPL(tracepoint_probe_register);
358
359 static void *tracepoint_remove_probe(const char *name, void *probe)
360 {
361         struct tracepoint_entry *entry;
362         void *old;
363
364         entry = get_tracepoint(name);
365         if (!entry)
366                 return ERR_PTR(-ENOENT);
367         old = tracepoint_entry_remove_probe(entry, probe);
368         if (IS_ERR(old))
369                 return old;
370         if (!entry->refcount)
371                 remove_tracepoint(entry);
372         return old;
373 }
374
375 /**
376  * tracepoint_probe_unregister -  Disconnect a probe from a tracepoint
377  * @name: tracepoint name
378  * @probe: probe function pointer
379  *
380  * We do not need to call a synchronize_sched to make sure the probes have
381  * finished running before doing a module unload, because the module unload
382  * itself uses stop_machine(), which insures that every preempt disabled section
383  * have finished.
384  */
385 int tracepoint_probe_unregister(const char *name, void *probe)
386 {
387         void *old;
388
389         mutex_lock(&tracepoints_mutex);
390         old = tracepoint_remove_probe(name, probe);
391         mutex_unlock(&tracepoints_mutex);
392         if (IS_ERR(old))
393                 return PTR_ERR(old);
394
395         tracepoint_update_probes();             /* may update entry */
396         release_probes(old);
397         return 0;
398 }
399 EXPORT_SYMBOL_GPL(tracepoint_probe_unregister);
400
401 static LIST_HEAD(old_probes);
402 static int need_update;
403
404 static void tracepoint_add_old_probes(void *old)
405 {
406         need_update = 1;
407         if (old) {
408                 struct tp_probes *tp_probes = container_of(old,
409                         struct tp_probes, probes[0]);
410                 list_add(&tp_probes->u.list, &old_probes);
411         }
412 }
413
414 /**
415  * tracepoint_probe_register_noupdate -  register a probe but not connect
416  * @name: tracepoint name
417  * @probe: probe handler
418  *
419  * caller must call tracepoint_probe_update_all()
420  */
421 int tracepoint_probe_register_noupdate(const char *name, void *probe)
422 {
423         void *old;
424
425         mutex_lock(&tracepoints_mutex);
426         old = tracepoint_add_probe(name, probe);
427         if (IS_ERR(old)) {
428                 mutex_unlock(&tracepoints_mutex);
429                 return PTR_ERR(old);
430         }
431         tracepoint_add_old_probes(old);
432         mutex_unlock(&tracepoints_mutex);
433         return 0;
434 }
435 EXPORT_SYMBOL_GPL(tracepoint_probe_register_noupdate);
436
437 /**
438  * tracepoint_probe_unregister_noupdate -  remove a probe but not disconnect
439  * @name: tracepoint name
440  * @probe: probe function pointer
441  *
442  * caller must call tracepoint_probe_update_all()
443  */
444 int tracepoint_probe_unregister_noupdate(const char *name, void *probe)
445 {
446         void *old;
447
448         mutex_lock(&tracepoints_mutex);
449         old = tracepoint_remove_probe(name, probe);
450         if (IS_ERR(old)) {
451                 mutex_unlock(&tracepoints_mutex);
452                 return PTR_ERR(old);
453         }
454         tracepoint_add_old_probes(old);
455         mutex_unlock(&tracepoints_mutex);
456         return 0;
457 }
458 EXPORT_SYMBOL_GPL(tracepoint_probe_unregister_noupdate);
459
460 /**
461  * tracepoint_probe_update_all -  update tracepoints
462  */
463 void tracepoint_probe_update_all(void)
464 {
465         LIST_HEAD(release_probes);
466         struct tp_probes *pos, *next;
467
468         mutex_lock(&tracepoints_mutex);
469         if (!need_update) {
470                 mutex_unlock(&tracepoints_mutex);
471                 return;
472         }
473         if (!list_empty(&old_probes))
474                 list_replace_init(&old_probes, &release_probes);
475         need_update = 0;
476         mutex_unlock(&tracepoints_mutex);
477
478         tracepoint_update_probes();
479         list_for_each_entry_safe(pos, next, &release_probes, u.list) {
480                 list_del(&pos->u.list);
481                 call_rcu_sched(&pos->u.rcu, rcu_free_old_probes);
482         }
483 }
484 EXPORT_SYMBOL_GPL(tracepoint_probe_update_all);
485
486 /**
487  * tracepoint_get_iter_range - Get a next tracepoint iterator given a range.
488  * @tracepoint: current tracepoints (in), next tracepoint (out)
489  * @begin: beginning of the range
490  * @end: end of the range
491  *
492  * Returns whether a next tracepoint has been found (1) or not (0).
493  * Will return the first tracepoint in the range if the input tracepoint is
494  * NULL.
495  */
496 int tracepoint_get_iter_range(struct tracepoint **tracepoint,
497         struct tracepoint *begin, struct tracepoint *end)
498 {
499         if (!*tracepoint && begin != end) {
500                 *tracepoint = begin;
501                 return 1;
502         }
503         if (*tracepoint >= begin && *tracepoint < end)
504                 return 1;
505         return 0;
506 }
507 EXPORT_SYMBOL_GPL(tracepoint_get_iter_range);
508
509 static void tracepoint_get_iter(struct tracepoint_iter *iter)
510 {
511         int found = 0;
512
513         /* Core kernel tracepoints */
514         if (!iter->module) {
515                 found = tracepoint_get_iter_range(&iter->tracepoint,
516                                 __start___tracepoints, __stop___tracepoints);
517                 if (found)
518                         goto end;
519         }
520         /* tracepoints in modules. */
521         found = module_get_iter_tracepoints(iter);
522 end:
523         if (!found)
524                 tracepoint_iter_reset(iter);
525 }
526
527 void tracepoint_iter_start(struct tracepoint_iter *iter)
528 {
529         tracepoint_get_iter(iter);
530 }
531 EXPORT_SYMBOL_GPL(tracepoint_iter_start);
532
533 void tracepoint_iter_next(struct tracepoint_iter *iter)
534 {
535         iter->tracepoint++;
536         /*
537          * iter->tracepoint may be invalid because we blindly incremented it.
538          * Make sure it is valid by marshalling on the tracepoints, getting the
539          * tracepoints from following modules if necessary.
540          */
541         tracepoint_get_iter(iter);
542 }
543 EXPORT_SYMBOL_GPL(tracepoint_iter_next);
544
545 void tracepoint_iter_stop(struct tracepoint_iter *iter)
546 {
547 }
548 EXPORT_SYMBOL_GPL(tracepoint_iter_stop);
549
550 void tracepoint_iter_reset(struct tracepoint_iter *iter)
551 {
552         iter->module = NULL;
553         iter->tracepoint = NULL;
554 }
555 EXPORT_SYMBOL_GPL(tracepoint_iter_reset);
556
557 #ifdef CONFIG_MODULES
558
559 int tracepoint_module_notify(struct notifier_block *self,
560                              unsigned long val, void *data)
561 {
562         struct module *mod = data;
563
564         switch (val) {
565         case MODULE_STATE_COMING:
566         case MODULE_STATE_GOING:
567                 tracepoint_update_probe_range(mod->tracepoints,
568                         mod->tracepoints + mod->num_tracepoints);
569                 break;
570         }
571         return 0;
572 }
573
574 struct notifier_block tracepoint_module_nb = {
575         .notifier_call = tracepoint_module_notify,
576         .priority = 0,
577 };
578
579 static int init_tracepoints(void)
580 {
581         return register_module_notifier(&tracepoint_module_nb);
582 }
583 __initcall(init_tracepoints);
584
585 #endif /* CONFIG_MODULES */
586
587 #ifdef CONFIG_HAVE_SYSCALL_TRACEPOINTS
588
589 /* NB: reg/unreg are called while guarded with the tracepoints_mutex */
590 static int sys_tracepoint_refcount;
591
592 void syscall_regfunc(void)
593 {
594         unsigned long flags;
595         struct task_struct *g, *t;
596
597         if (!sys_tracepoint_refcount) {
598                 read_lock_irqsave(&tasklist_lock, flags);
599                 do_each_thread(g, t) {
600                         /* Skip kernel threads. */
601                         if (t->mm)
602                                 set_tsk_thread_flag(t, TIF_SYSCALL_TRACEPOINT);
603                 } while_each_thread(g, t);
604                 read_unlock_irqrestore(&tasklist_lock, flags);
605         }
606         sys_tracepoint_refcount++;
607 }
608
609 void syscall_unregfunc(void)
610 {
611         unsigned long flags;
612         struct task_struct *g, *t;
613
614         sys_tracepoint_refcount--;
615         if (!sys_tracepoint_refcount) {
616                 read_lock_irqsave(&tasklist_lock, flags);
617                 do_each_thread(g, t) {
618                         clear_tsk_thread_flag(t, TIF_SYSCALL_TRACEPOINT);
619                 } while_each_thread(g, t);
620                 read_unlock_irqrestore(&tasklist_lock, flags);
621         }
622 }
623 #endif