tracing: Let tracepoints have data passed to tracepoint callbacks
[linux-2.6.git] / kernel / trace / ftrace.c
index 1e6640f..6d2cb14 100644 (file)
 #include <linux/hardirq.h>
 #include <linux/kthread.h>
 #include <linux/uaccess.h>
-#include <linux/kprobes.h>
 #include <linux/ftrace.h>
 #include <linux/sysctl.h>
+#include <linux/slab.h>
 #include <linux/ctype.h>
 #include <linux/list.h>
 #include <linux/hash.h>
+#include <linux/rcupdate.h>
 
 #include <trace/events/sched.h>
 
@@ -85,22 +86,22 @@ ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub;
 ftrace_func_t __ftrace_trace_function __read_mostly = ftrace_stub;
 ftrace_func_t ftrace_pid_function __read_mostly = ftrace_stub;
 
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-static int ftrace_set_func(unsigned long *array, int *idx, char *buffer);
-#endif
-
+/*
+ * Traverse the ftrace_list, invoking all entries.  The reason that we
+ * can use rcu_dereference_raw() is that elements removed from this list
+ * are simply leaked, so there is no need to interact with a grace-period
+ * mechanism.  The rcu_dereference_raw() calls are needed to handle
+ * concurrent insertions into the ftrace_list.
+ *
+ * Silly Alpha and silly pointer-speculation compiler optimizations!
+ */
 static void ftrace_list_func(unsigned long ip, unsigned long parent_ip)
 {
-       struct ftrace_ops *op = ftrace_list;
-
-       /* in case someone actually ports this to alpha! */
-       read_barrier_depends();
+       struct ftrace_ops *op = rcu_dereference_raw(ftrace_list); /*see above*/
 
        while (op != &ftrace_list_end) {
-               /* silly alpha */
-               read_barrier_depends();
                op->func(ip, parent_ip);
-               op = op->next;
+               op = rcu_dereference_raw(op->next); /*see above*/
        };
 }
 
@@ -155,8 +156,7 @@ static int __register_ftrace_function(struct ftrace_ops *ops)
         * the ops->next pointer is valid before another CPU sees
         * the ops pointer included into the ftrace_list.
         */
-       smp_wmb();
-       ftrace_list = ops;
+       rcu_assign_pointer(ftrace_list, ops);
 
        if (ftrace_enabled) {
                ftrace_func_t func;
@@ -264,6 +264,7 @@ struct ftrace_profile {
        unsigned long                   counter;
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
        unsigned long long              time;
+       unsigned long long              time_squared;
 #endif
 };
 
@@ -366,9 +367,9 @@ static int function_stat_headers(struct seq_file *m)
 {
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
        seq_printf(m, "  Function                               "
-                  "Hit    Time            Avg\n"
+                  "Hit    Time            Avg             s^2\n"
                      "  --------                               "
-                  "---    ----            ---\n");
+                  "---    ----            ---             ---\n");
 #else
        seq_printf(m, "  Function                               Hit\n"
                      "  --------                               ---\n");
@@ -384,6 +385,7 @@ static int function_stat_show(struct seq_file *m, void *v)
        static DEFINE_MUTEX(mutex);
        static struct trace_seq s;
        unsigned long long avg;
+       unsigned long long stddev;
 #endif
 
        kallsyms_lookup(rec->ip, NULL, NULL, NULL, str);
@@ -394,11 +396,25 @@ static int function_stat_show(struct seq_file *m, void *v)
        avg = rec->time;
        do_div(avg, rec->counter);
 
+       /* Sample standard deviation (s^2) */
+       if (rec->counter <= 1)
+               stddev = 0;
+       else {
+               stddev = rec->time_squared - rec->counter * avg * avg;
+               /*
+                * Divide only 1000 for ns^2 -> us^2 conversion.
+                * trace_print_graph_duration will divide 1000 again.
+                */
+               do_div(stddev, (rec->counter - 1) * 1000);
+       }
+
        mutex_lock(&mutex);
        trace_seq_init(&s);
        trace_print_graph_duration(rec->time, &s);
        trace_seq_puts(&s, "    ");
        trace_print_graph_duration(avg, &s);
+       trace_seq_puts(&s, "    ");
+       trace_print_graph_duration(stddev, &s);
        trace_print_seq(m, &s);
        mutex_unlock(&mutex);
 #endif
@@ -650,6 +666,10 @@ static void profile_graph_return(struct ftrace_graph_ret *trace)
        if (!stat->hash || !ftrace_profile_enabled)
                goto out;
 
+       /* If the calltime was zero'd ignore it */
+       if (!trace->calltime)
+               goto out;
+
        calltime = trace->rettime - trace->calltime;
 
        if (!(trace_flags & TRACE_ITER_GRAPH_TIME)) {
@@ -668,8 +688,10 @@ static void profile_graph_return(struct ftrace_graph_ret *trace)
        }
 
        rec = ftrace_find_profiled_func(stat, trace->func);
-       if (rec)
+       if (rec) {
                rec->time += calltime;
+               rec->time_squared += calltime * calltime;
+       }
 
  out:
        local_irq_restore(flags);
@@ -898,36 +920,6 @@ static struct dyn_ftrace *ftrace_free_records;
                }                               \
        }
 
-#ifdef CONFIG_KPROBES
-
-static int frozen_record_count;
-
-static inline void freeze_record(struct dyn_ftrace *rec)
-{
-       if (!(rec->flags & FTRACE_FL_FROZEN)) {
-               rec->flags |= FTRACE_FL_FROZEN;
-               frozen_record_count++;
-       }
-}
-
-static inline void unfreeze_record(struct dyn_ftrace *rec)
-{
-       if (rec->flags & FTRACE_FL_FROZEN) {
-               rec->flags &= ~FTRACE_FL_FROZEN;
-               frozen_record_count--;
-       }
-}
-
-static inline int record_frozen(struct dyn_ftrace *rec)
-{
-       return rec->flags & FTRACE_FL_FROZEN;
-}
-#else
-# define freeze_record(rec)                    ({ 0; })
-# define unfreeze_record(rec)                  ({ 0; })
-# define record_frozen(rec)                    ({ 0; })
-#endif /* CONFIG_KPROBES */
-
 static void ftrace_free_rec(struct dyn_ftrace *rec)
 {
        rec->freelist = ftrace_free_records;
@@ -1025,6 +1017,21 @@ static void ftrace_bug(int failed, unsigned long ip)
 }
 
 
+/* Return 1 if the address range is reserved for ftrace */
+int ftrace_text_reserved(void *start, void *end)
+{
+       struct dyn_ftrace *rec;
+       struct ftrace_page *pg;
+
+       do_for_each_ftrace_rec(pg, rec) {
+               if (rec->ip <= (unsigned long)end &&
+                   rec->ip + MCOUNT_INSN_SIZE > (unsigned long)start)
+                       return 1;
+       } while_for_each_ftrace_rec();
+       return 0;
+}
+
+
 static int
 __ftrace_replace_code(struct dyn_ftrace *rec, int enable)
 {
@@ -1076,14 +1083,6 @@ static void ftrace_replace_code(int enable)
                    !(rec->flags & FTRACE_FL_CONVERTED))
                        continue;
 
-               /* ignore updates to this record's mcount site */
-               if (get_kprobe((void *)rec->ip)) {
-                       freeze_record(rec);
-                       continue;
-               } else {
-                       unfreeze_record(rec);
-               }
-
                failed = __ftrace_replace_code(rec, enable);
                if (failed) {
                        rec->flags |= FTRACE_FL_FAILED;
@@ -2300,6 +2299,8 @@ __setup("ftrace_filter=", set_ftrace_filter);
 
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 static char ftrace_graph_buf[FTRACE_FILTER_SIZE] __initdata;
+static int ftrace_set_func(unsigned long *array, int *idx, char *buffer);
+
 static int __init set_graph_function(char *str)
 {
        strlcpy(ftrace_graph_buf, str, FTRACE_FILTER_SIZE);
@@ -2426,6 +2427,7 @@ static const struct file_operations ftrace_notrace_fops = {
 static DEFINE_MUTEX(graph_lock);
 
 int ftrace_graph_count;
+int ftrace_graph_filter_enabled;
 unsigned long ftrace_graph_funcs[FTRACE_GRAPH_MAX_FUNCS] __read_mostly;
 
 static void *
@@ -2448,7 +2450,7 @@ static void *g_start(struct seq_file *m, loff_t *pos)
        mutex_lock(&graph_lock);
 
        /* Nothing, tell g_show to print all functions are enabled */
-       if (!ftrace_graph_count && !*pos)
+       if (!ftrace_graph_filter_enabled && !*pos)
                return (void *)1;
 
        return __g_next(m, pos);
@@ -2494,6 +2496,7 @@ ftrace_graph_open(struct inode *inode, struct file *file)
        mutex_lock(&graph_lock);
        if ((file->f_mode & FMODE_WRITE) &&
            (file->f_flags & O_TRUNC)) {
+               ftrace_graph_filter_enabled = 0;
                ftrace_graph_count = 0;
                memset(ftrace_graph_funcs, 0, sizeof(ftrace_graph_funcs));
        }
@@ -2519,7 +2522,7 @@ ftrace_set_func(unsigned long *array, int *idx, char *buffer)
        struct dyn_ftrace *rec;
        struct ftrace_page *pg;
        int search_len;
-       int found = 0;
+       int fail = 1;
        int type, not;
        char *search;
        bool exists;
@@ -2530,37 +2533,51 @@ ftrace_set_func(unsigned long *array, int *idx, char *buffer)
 
        /* decode regex */
        type = filter_parse_regex(buffer, strlen(buffer), &search, &not);
-       if (not)
-               return -EINVAL;
+       if (!not && *idx >= FTRACE_GRAPH_MAX_FUNCS)
+               return -EBUSY;
 
        search_len = strlen(search);
 
        mutex_lock(&ftrace_lock);
        do_for_each_ftrace_rec(pg, rec) {
 
-               if (*idx >= FTRACE_GRAPH_MAX_FUNCS)
-                       break;
-
                if (rec->flags & (FTRACE_FL_FAILED | FTRACE_FL_FREE))
                        continue;
 
                if (ftrace_match_record(rec, search, search_len, type)) {
-                       /* ensure it is not already in the array */
+                       /* if it is in the array */
                        exists = false;
-                       for (i = 0; i < *idx; i++)
+                       for (i = 0; i < *idx; i++) {
                                if (array[i] == rec->ip) {
                                        exists = true;
                                        break;
                                }
-                       if (!exists)
-                               array[(*idx)++] = rec->ip;
-                       found = 1;
+                       }
+
+                       if (!not) {
+                               fail = 0;
+                               if (!exists) {
+                                       array[(*idx)++] = rec->ip;
+                                       if (*idx >= FTRACE_GRAPH_MAX_FUNCS)
+                                               goto out;
+                               }
+                       } else {
+                               if (exists) {
+                                       array[i] = array[--(*idx)];
+                                       array[*idx] = 0;
+                                       fail = 0;
+                               }
+                       }
                }
        } while_for_each_ftrace_rec();
-
+out:
        mutex_unlock(&ftrace_lock);
 
-       return found ? 0 : -EINVAL;
+       if (fail)
+               return -EINVAL;
+
+       ftrace_graph_filter_enabled = 1;
+       return 0;
 }
 
 static ssize_t
@@ -2570,16 +2587,11 @@ ftrace_graph_write(struct file *file, const char __user *ubuf,
        struct trace_parser parser;
        ssize_t read, ret;
 
-       if (!cnt || cnt < 0)
+       if (!cnt)
                return 0;
 
        mutex_lock(&graph_lock);
 
-       if (ftrace_graph_count >= FTRACE_GRAPH_MAX_FUNCS) {
-               ret = -EBUSY;
-               goto out_unlock;
-       }
-
        if (trace_parser_get_init(&parser, FTRACE_BUFF_MAX)) {
                ret = -ENOMEM;
                goto out_unlock;
@@ -3222,8 +3234,8 @@ free:
 }
 
 static void
-ftrace_graph_probe_sched_switch(struct rq *__rq, struct task_struct *prev,
-                               struct task_struct *next)
+ftrace_graph_probe_sched_switch(void *ignore,
+                       struct task_struct *prev, struct task_struct *next)
 {
        unsigned long long timestamp;
        int index;
@@ -3277,7 +3289,7 @@ static int start_graph_tracing(void)
        } while (ret == -EAGAIN);
 
        if (!ret) {
-               ret = register_trace_sched_switch(ftrace_graph_probe_sched_switch);
+               ret = register_trace_sched_switch(ftrace_graph_probe_sched_switch, NULL);
                if (ret)
                        pr_info("ftrace_graph: Couldn't activate tracepoint"
                                " probe to kernel_sched_switch\n");
@@ -3349,11 +3361,11 @@ void unregister_ftrace_graph(void)
                goto out;
 
        ftrace_graph_active--;
-       unregister_trace_sched_switch(ftrace_graph_probe_sched_switch);
        ftrace_graph_return = (trace_func_graph_ret_t)ftrace_stub;
        ftrace_graph_entry = ftrace_graph_entry_stub;
        ftrace_shutdown(FTRACE_STOP_FUNC_RET);
        unregister_pm_notifier(&ftrace_suspend_notifier);
+       unregister_trace_sched_switch(ftrace_graph_probe_sched_switch, NULL);
 
  out:
        mutex_unlock(&ftrace_lock);
@@ -3364,6 +3376,7 @@ void ftrace_graph_init_task(struct task_struct *t)
 {
        /* Make sure we do not use the parent ret_stack */
        t->ret_stack = NULL;
+       t->curr_ret_stack = -1;
 
        if (ftrace_graph_active) {
                struct ftrace_ret_stack *ret_stack;
@@ -3373,7 +3386,6 @@ void ftrace_graph_init_task(struct task_struct *t)
                                GFP_KERNEL);
                if (!ret_stack)
                        return;
-               t->curr_ret_stack = -1;
                atomic_set(&t->tracing_graph_pause, 0);
                atomic_set(&t->trace_overrun, 0);
                t->ftrace_timestamp = 0;