Merge branch '/tip/perf/filter' of git://git.kernel.org/pub/scm/linux/kernel/git...
[linux-2.6.git] / kernel / trace / trace_sched_switch.c
index d25ffa5..7e62c0a 100644 (file)
 #include <linux/debugfs.h>
 #include <linux/kallsyms.h>
 #include <linux/uaccess.h>
-#include <linux/marker.h>
 #include <linux/ftrace.h>
+#include <trace/events/sched.h>
 
 #include "trace.h"
 
 static struct trace_array      *ctx_trace;
 static int __read_mostly       tracer_enabled;
-static atomic_t                        sched_ref;
+static int                     sched_ref;
+static DEFINE_MUTEX(sched_register_mutex);
+static int                     sched_stopped;
 
-static void
-sched_switch_func(void *private, void *__rq, struct task_struct *prev,
-                       struct task_struct *next)
-{
-       struct trace_array **ptr = private;
-       struct trace_array *tr = *ptr;
-       struct trace_array_cpu *data;
-       unsigned long flags;
-       long disabled;
-       int cpu;
-
-       if (!tracer_enabled)
-               return;
-
-       local_irq_save(flags);
-       cpu = raw_smp_processor_id();
-       data = tr->data[cpu];
-       disabled = atomic_inc_return(&data->disabled);
-
-       if (likely(disabled == 1))
-               tracing_sched_switch_trace(tr, data, prev, next, flags);
-
-       atomic_dec(&data->disabled);
-       local_irq_restore(flags);
-}
-
-static notrace void
-sched_switch_callback(void *probe_data, void *call_data,
-                     const char *format, va_list *args)
-{
-       struct task_struct *prev;
-       struct task_struct *next;
-       struct rq *__rq;
 
-       if (!atomic_read(&sched_ref))
+void
+tracing_sched_switch_trace(struct trace_array *tr,
+                          struct task_struct *prev,
+                          struct task_struct *next,
+                          unsigned long flags, int pc)
+{
+       struct ftrace_event_call *call = &event_context_switch;
+       struct ring_buffer *buffer = tr->buffer;
+       struct ring_buffer_event *event;
+       struct ctx_switch_entry *entry;
+
+       event = trace_buffer_lock_reserve(buffer, TRACE_CTX,
+                                         sizeof(*entry), flags, pc);
+       if (!event)
                return;
+       entry   = ring_buffer_event_data(event);
+       entry->prev_pid                 = prev->pid;
+       entry->prev_prio                = prev->prio;
+       entry->prev_state               = prev->state;
+       entry->next_pid                 = next->pid;
+       entry->next_prio                = next->prio;
+       entry->next_state               = next->state;
+       entry->next_cpu = task_cpu(next);
 
-       /* skip prev_pid %d next_pid %d prev_state %ld */
-       (void)va_arg(*args, int);
-       (void)va_arg(*args, int);
-       (void)va_arg(*args, long);
-       __rq = va_arg(*args, typeof(__rq));
-       prev = va_arg(*args, typeof(prev));
-       next = va_arg(*args, typeof(next));
-
-       tracing_record_cmdline(prev);
-
-       /*
-        * If tracer_switch_func only points to the local
-        * switch func, it still needs the ptr passed to it.
-        */
-       sched_switch_func(probe_data, __rq, prev, next);
+       if (!filter_check_discard(call, entry, buffer, event))
+               trace_buffer_unlock_commit(buffer, event, flags, pc);
 }
 
 static void
-wakeup_func(void *private, void *__rq, struct task_struct *wakee, struct
-                       task_struct *curr)
+probe_sched_switch(void *ignore, struct task_struct *prev, struct task_struct *next)
 {
-       struct trace_array **ptr = private;
-       struct trace_array *tr = *ptr;
        struct trace_array_cpu *data;
        unsigned long flags;
-       long disabled;
        int cpu;
+       int pc;
 
-       if (!tracer_enabled)
+       if (unlikely(!sched_ref))
                return;
 
-       tracing_record_cmdline(curr);
+       tracing_record_cmdline(prev);
+       tracing_record_cmdline(next);
+
+       if (!tracer_enabled || sched_stopped)
+               return;
 
+       pc = preempt_count();
        local_irq_save(flags);
        cpu = raw_smp_processor_id();
-       data = tr->data[cpu];
-       disabled = atomic_inc_return(&data->disabled);
+       data = ctx_trace->data[cpu];
 
-       if (likely(disabled == 1))
-               tracing_sched_wakeup_trace(tr, data, wakee, curr, flags);
+       if (likely(!atomic_read(&data->disabled)))
+               tracing_sched_switch_trace(ctx_trace, prev, next, flags, pc);
 
-       atomic_dec(&data->disabled);
        local_irq_restore(flags);
 }
 
-static notrace void
-wake_up_callback(void *probe_data, void *call_data,
-                const char *format, va_list *args)
-{
-       struct task_struct *curr;
-       struct task_struct *task;
-       struct rq *__rq;
-
-       if (likely(!tracer_enabled))
+void
+tracing_sched_wakeup_trace(struct trace_array *tr,
+                          struct task_struct *wakee,
+                          struct task_struct *curr,
+                          unsigned long flags, int pc)
+{
+       struct ftrace_event_call *call = &event_wakeup;
+       struct ring_buffer_event *event;
+       struct ctx_switch_entry *entry;
+       struct ring_buffer *buffer = tr->buffer;
+
+       event = trace_buffer_lock_reserve(buffer, TRACE_WAKE,
+                                         sizeof(*entry), flags, pc);
+       if (!event)
                return;
+       entry   = ring_buffer_event_data(event);
+       entry->prev_pid                 = curr->pid;
+       entry->prev_prio                = curr->prio;
+       entry->prev_state               = curr->state;
+       entry->next_pid                 = wakee->pid;
+       entry->next_prio                = wakee->prio;
+       entry->next_state               = wakee->state;
+       entry->next_cpu                 = task_cpu(wakee);
 
-       /* Skip pid %d state %ld */
-       (void)va_arg(*args, int);
-       (void)va_arg(*args, long);
-       /* now get the meat: "rq %p task %p rq->curr %p" */
-       __rq = va_arg(*args, typeof(__rq));
-       task = va_arg(*args, typeof(task));
-       curr = va_arg(*args, typeof(curr));
-
-       tracing_record_cmdline(task);
-       tracing_record_cmdline(curr);
-
-       wakeup_func(probe_data, __rq, task, curr);
+       if (!filter_check_discard(call, entry, buffer, event))
+               ring_buffer_unlock_commit(buffer, event);
+       ftrace_trace_stack(tr->buffer, flags, 6, pc);
+       ftrace_trace_userstack(tr->buffer, flags, pc);
 }
 
-void
-ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3)
+static void
+probe_sched_wakeup(void *ignore, struct task_struct *wakee, int success)
 {
-       struct trace_array *tr = ctx_trace;
        struct trace_array_cpu *data;
        unsigned long flags;
-       long disabled;
-       int cpu;
+       int cpu, pc;
 
-       if (!tracer_enabled)
+       if (unlikely(!sched_ref))
                return;
 
+       tracing_record_cmdline(current);
+
+       if (!tracer_enabled || sched_stopped)
+               return;
+
+       pc = preempt_count();
        local_irq_save(flags);
        cpu = raw_smp_processor_id();
-       data = tr->data[cpu];
-       disabled = atomic_inc_return(&data->disabled);
+       data = ctx_trace->data[cpu];
 
-       if (likely(disabled == 1))
-               __trace_special(tr, data, arg1, arg2, arg3);
+       if (likely(!atomic_read(&data->disabled)))
+               tracing_sched_wakeup_trace(ctx_trace, wakee, current,
+                                          flags, pc);
 
-       atomic_dec(&data->disabled);
        local_irq_restore(flags);
 }
 
-static void sched_switch_reset(struct trace_array *tr)
-{
-       int cpu;
-
-       tr->time_start = ftrace_now(tr->cpu);
-
-       for_each_online_cpu(cpu)
-               tracing_reset(tr->data[cpu]);
-}
-
 static int tracing_sched_register(void)
 {
        int ret;
 
-       ret = marker_probe_register("kernel_sched_wakeup",
-                       "pid %d state %ld ## rq %p task %p rq->curr %p",
-                       wake_up_callback,
-                       &ctx_trace);
+       ret = register_trace_sched_wakeup(probe_sched_wakeup, NULL);
        if (ret) {
-               pr_info("wakeup trace: Couldn't add marker"
+               pr_info("wakeup trace: Couldn't activate tracepoint"
                        " probe to kernel_sched_wakeup\n");
                return ret;
        }
 
-       ret = marker_probe_register("kernel_sched_wakeup_new",
-                       "pid %d state %ld ## rq %p task %p rq->curr %p",
-                       wake_up_callback,
-                       &ctx_trace);
+       ret = register_trace_sched_wakeup_new(probe_sched_wakeup, NULL);
        if (ret) {
-               pr_info("wakeup trace: Couldn't add marker"
+               pr_info("wakeup trace: Couldn't activate tracepoint"
                        " probe to kernel_sched_wakeup_new\n");
                goto fail_deprobe;
        }
 
-       ret = marker_probe_register("kernel_sched_schedule",
-               "prev_pid %d next_pid %d prev_state %ld "
-               "## rq %p prev %p next %p",
-               sched_switch_callback,
-               &ctx_trace);
+       ret = register_trace_sched_switch(probe_sched_switch, NULL);
        if (ret) {
-               pr_info("sched trace: Couldn't add marker"
-                       " probe to kernel_sched_schedule\n");
+               pr_info("sched trace: Couldn't activate tracepoint"
+                       " probe to kernel_sched_switch\n");
                goto fail_deprobe_wake_new;
        }
 
        return ret;
 fail_deprobe_wake_new:
-       marker_probe_unregister("kernel_sched_wakeup_new",
-                               wake_up_callback,
-                               &ctx_trace);
+       unregister_trace_sched_wakeup_new(probe_sched_wakeup, NULL);
 fail_deprobe:
-       marker_probe_unregister("kernel_sched_wakeup",
-                               wake_up_callback,
-                               &ctx_trace);
+       unregister_trace_sched_wakeup(probe_sched_wakeup, NULL);
        return ret;
 }
 
 static void tracing_sched_unregister(void)
 {
-       marker_probe_unregister("kernel_sched_schedule",
-                               sched_switch_callback,
-                               &ctx_trace);
-       marker_probe_unregister("kernel_sched_wakeup_new",
-                               wake_up_callback,
-                               &ctx_trace);
-       marker_probe_unregister("kernel_sched_wakeup",
-                               wake_up_callback,
-                               &ctx_trace);
+       unregister_trace_sched_switch(probe_sched_switch, NULL);
+       unregister_trace_sched_wakeup_new(probe_sched_wakeup, NULL);
+       unregister_trace_sched_wakeup(probe_sched_wakeup, NULL);
 }
 
-void tracing_start_sched_switch(void)
+static void tracing_start_sched_switch(void)
 {
-       long ref;
-
-       ref = atomic_inc_return(&sched_ref);
-       if (ref == 1)
+       mutex_lock(&sched_register_mutex);
+       if (!(sched_ref++))
                tracing_sched_register();
+       mutex_unlock(&sched_register_mutex);
 }
 
-void tracing_stop_sched_switch(void)
+static void tracing_stop_sched_switch(void)
 {
-       long ref;
-
-       ref = atomic_dec_and_test(&sched_ref);
-       if (ref)
+       mutex_lock(&sched_register_mutex);
+       if (!(--sched_ref))
                tracing_sched_unregister();
+       mutex_unlock(&sched_register_mutex);
 }
 
-static void start_sched_trace(struct trace_array *tr)
+void tracing_start_cmdline_record(void)
 {
-       sched_switch_reset(tr);
-       atomic_inc(&trace_record_cmdline_enabled);
-       tracer_enabled = 1;
        tracing_start_sched_switch();
 }
 
-static void stop_sched_trace(struct trace_array *tr)
+void tracing_stop_cmdline_record(void)
 {
        tracing_stop_sched_switch();
-       atomic_dec(&trace_record_cmdline_enabled);
-       tracer_enabled = 0;
 }
 
-static void sched_switch_trace_init(struct trace_array *tr)
+/**
+ * tracing_start_sched_switch_record - start tracing context switches
+ *
+ * Turns on context switch tracing for a tracer.
+ */
+void tracing_start_sched_switch_record(void)
 {
-       ctx_trace = tr;
+       if (unlikely(!ctx_trace)) {
+               WARN_ON(1);
+               return;
+       }
 
-       if (tr->ctrl)
-               start_sched_trace(tr);
-}
+       tracing_start_sched_switch();
 
-static void sched_switch_trace_reset(struct trace_array *tr)
-{
-       if (tr->ctrl)
-               stop_sched_trace(tr);
+       mutex_lock(&sched_register_mutex);
+       tracer_enabled++;
+       mutex_unlock(&sched_register_mutex);
 }
 
-static void sched_switch_trace_ctrl_update(struct trace_array *tr)
+/**
+ * tracing_stop_sched_switch_record - start tracing context switches
+ *
+ * Turns off context switch tracing for a tracer.
+ */
+void tracing_stop_sched_switch_record(void)
 {
-       /* When starting a new trace, reset the buffers */
-       if (tr->ctrl)
-               start_sched_trace(tr);
-       else
-               stop_sched_trace(tr);
+       mutex_lock(&sched_register_mutex);
+       tracer_enabled--;
+       WARN_ON(tracer_enabled < 0);
+       mutex_unlock(&sched_register_mutex);
+
+       tracing_stop_sched_switch();
 }
 
-static struct tracer sched_switch_trace __read_mostly =
-{
-       .name           = "sched_switch",
-       .init           = sched_switch_trace_init,
-       .reset          = sched_switch_trace_reset,
-       .ctrl_update    = sched_switch_trace_ctrl_update,
-#ifdef CONFIG_FTRACE_SELFTEST
-       .selftest    = trace_selftest_startup_sched_switch,
-#endif
-};
-
-__init static int init_sched_switch_trace(void)
+/**
+ * tracing_sched_switch_assign_trace - assign a trace array for ctx switch
+ * @tr: trace array pointer to assign
+ *
+ * Some tracers might want to record the context switches in their
+ * trace. This function lets those tracers assign the trace array
+ * to use.
+ */
+void tracing_sched_switch_assign_trace(struct trace_array *tr)
 {
-       int ret = 0;
-
-       if (atomic_read(&sched_ref))
-               ret = tracing_sched_register();
-       if (ret) {
-               pr_info("error registering scheduler trace\n");
-               return ret;
-       }
-       return register_tracer(&sched_switch_trace);
+       ctx_trace = tr;
 }
-device_initcall(init_sched_switch_trace);
+