tracing: implement trace_clock_*() APIs
Ingo Molnar [Thu, 26 Feb 2009 17:47:11 +0000 (18:47 +0100)]
Impact: implement new tracing timestamp APIs

Add three trace clock variants, with differing scalability/precision
tradeoffs:

 -   local: CPU-local trace clock
 -  medium: scalable global clock with some jitter
 -  global: globally monotonic, serialized clock

Make the ring-buffer use the local trace clock internally.

Acked-by: Peter Zijlstra <peterz@infradead.org>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

include/linux/trace_clock.h [new file with mode: 0644]
kernel/trace/Makefile
kernel/trace/ring_buffer.c
kernel/trace/trace_clock.c [new file with mode: 0644]

diff --git a/include/linux/trace_clock.h b/include/linux/trace_clock.h
new file mode 100644 (file)
index 0000000..7a81303
--- /dev/null
@@ -0,0 +1,19 @@
+#ifndef _LINUX_TRACE_CLOCK_H
+#define _LINUX_TRACE_CLOCK_H
+
+/*
+ * 3 trace clock variants, with differing scalability/precision
+ * tradeoffs:
+ *
+ *  -   local: CPU-local trace clock
+ *  -  medium: scalable global clock with some jitter
+ *  -  global: globally monotonic, serialized clock
+ */
+#include <linux/compiler.h>
+#include <linux/types.h>
+
+extern u64 notrace trace_clock_local(void);
+extern u64 notrace trace_clock(void);
+extern u64 notrace trace_clock_global(void);
+
+#endif /* _LINUX_TRACE_CLOCK_H */
index 664b6c0..c931fe0 100644 (file)
@@ -19,6 +19,7 @@ obj-$(CONFIG_FUNCTION_TRACER) += libftrace.o
 obj-$(CONFIG_RING_BUFFER) += ring_buffer.o
 
 obj-$(CONFIG_TRACING) += trace.o
+obj-$(CONFIG_TRACING) += trace_clock.o
 obj-$(CONFIG_TRACING) += trace_output.o
 obj-$(CONFIG_TRACING) += trace_stat.o
 obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o
index 8f19f1a..a8c275c 100644 (file)
@@ -4,6 +4,7 @@
  * Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com>
  */
 #include <linux/ring_buffer.h>
+#include <linux/trace_clock.h>
 #include <linux/ftrace_irq.h>
 #include <linux/spinlock.h>
 #include <linux/debugfs.h>
@@ -12,7 +13,6 @@
 #include <linux/module.h>
 #include <linux/percpu.h>
 #include <linux/mutex.h>
-#include <linux/sched.h>       /* used for sched_clock() (for now) */
 #include <linux/init.h>
 #include <linux/hash.h>
 #include <linux/list.h>
@@ -112,14 +112,13 @@ EXPORT_SYMBOL_GPL(tracing_is_on);
 /* Up this if you want to test the TIME_EXTENTS and normalization */
 #define DEBUG_SHIFT 0
 
-/* FIXME!!! */
 u64 ring_buffer_time_stamp(int cpu)
 {
        u64 time;
 
        preempt_disable_notrace();
        /* shift to debug/test normalization and TIME_EXTENTS */
-       time = sched_clock() << DEBUG_SHIFT;
+       time = trace_clock_local() << DEBUG_SHIFT;
        preempt_enable_no_resched_notrace();
 
        return time;
diff --git a/kernel/trace/trace_clock.c b/kernel/trace/trace_clock.c
new file mode 100644 (file)
index 0000000..2d4953f
--- /dev/null
@@ -0,0 +1,101 @@
+/*
+ * tracing clocks
+ *
+ *  Copyright (C) 2009 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
+ *
+ * Implements 3 trace clock variants, with differing scalability/precision
+ * tradeoffs:
+ *
+ *  -   local: CPU-local trace clock
+ *  -  medium: scalable global clock with some jitter
+ *  -  global: globally monotonic, serialized clock
+ *
+ * Tracer plugins will chose a default from these clocks.
+ */
+#include <linux/spinlock.h>
+#include <linux/hardirq.h>
+#include <linux/module.h>
+#include <linux/percpu.h>
+#include <linux/sched.h>
+#include <linux/ktime.h>
+
+/*
+ * trace_clock_local(): the simplest and least coherent tracing clock.
+ *
+ * Useful for tracing that does not cross to other CPUs nor
+ * does it go through idle events.
+ */
+u64 notrace trace_clock_local(void)
+{
+       /*
+        * sched_clock() is an architecture implemented, fast, scalable,
+        * lockless clock. It is not guaranteed to be coherent across
+        * CPUs, nor across CPU idle events.
+        */
+       return sched_clock();
+}
+
+/*
+ * trace_clock(): 'inbetween' trace clock. Not completely serialized,
+ * but not completely incorrect when crossing CPUs either.
+ *
+ * This is based on cpu_clock(), which will allow at most ~1 jiffy of
+ * jitter between CPUs. So it's a pretty scalable clock, but there
+ * can be offsets in the trace data.
+ */
+u64 notrace trace_clock(void)
+{
+       return cpu_clock(raw_smp_processor_id());
+}
+
+
+/*
+ * trace_clock_global(): special globally coherent trace clock
+ *
+ * It has higher overhead than the other trace clocks but is still
+ * an order of magnitude faster than GTOD derived hardware clocks.
+ *
+ * Used by plugins that need globally coherent timestamps.
+ */
+
+static u64 prev_trace_clock_time;
+
+static raw_spinlock_t trace_clock_lock ____cacheline_aligned_in_smp =
+       (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
+
+u64 notrace trace_clock_global(void)
+{
+       unsigned long flags;
+       int this_cpu;
+       u64 now;
+
+       raw_local_irq_save(flags);
+
+       this_cpu = raw_smp_processor_id();
+       now = cpu_clock(this_cpu);
+       /*
+        * If in an NMI context then dont risk lockups and return the
+        * cpu_clock() time:
+        */
+       if (unlikely(in_nmi()))
+               goto out;
+
+       __raw_spin_lock(&trace_clock_lock);
+
+       /*
+        * TODO: if this happens often then maybe we should reset
+        * my_scd->clock to prev_trace_clock_time+1, to make sure
+        * we start ticking with the local clock from now on?
+        */
+       if ((s64)(now - prev_trace_clock_time) < 0)
+               now = prev_trace_clock_time + 1;
+
+       prev_trace_clock_time = now;
+
+       __raw_spin_unlock(&trace_clock_lock);
+
+ out:
+       raw_local_irq_restore(flags);
+
+       return now;
+}