Merge branch 'tracing/ftrace'; commit 'v2.6.29-rc2' into tracing/core
Ingo Molnar [Sun, 18 Jan 2009 19:15:05 +0000 (20:15 +0100)]
51 files changed:
Documentation/ABI/testing/debugfs-kmemtrace [new file with mode: 0644]
Documentation/kernel-parameters.txt
Documentation/sysrq.txt
Documentation/tracers/mmiotrace.txt
Documentation/vm/kmemtrace.txt [new file with mode: 0644]
MAINTAINERS
arch/ia64/Kconfig
arch/ia64/include/asm/ftrace.h [new file with mode: 0644]
arch/ia64/kernel/Makefile
arch/ia64/kernel/entry.S
arch/ia64/kernel/ftrace.c [new file with mode: 0644]
arch/ia64/kernel/ia64_ksyms.c
arch/x86/Kconfig.debug
arch/x86/kvm/Kconfig
drivers/char/sysrq.c
include/linux/ftrace.h
include/linux/slab_def.h
include/linux/slob_def.h
include/linux/slub_def.h
include/trace/kmemtrace.h [new file with mode: 0644]
include/trace/workqueue.h [new file with mode: 0644]
init/main.c
kernel/relay.c
kernel/trace/Kconfig
kernel/trace/Makefile
kernel/trace/ftrace.c
kernel/trace/kmemtrace.c [new file with mode: 0644]
kernel/trace/ring_buffer.c
kernel/trace/trace.c
kernel/trace/trace.h
kernel/trace/trace_boot.c
kernel/trace/trace_branch.c
kernel/trace/trace_functions.c
kernel/trace/trace_functions_graph.c
kernel/trace/trace_hw_branches.c
kernel/trace/trace_irqsoff.c
kernel/trace/trace_mmiotrace.c
kernel/trace/trace_output.c [new file with mode: 0644]
kernel/trace/trace_output.h [new file with mode: 0644]
kernel/trace/trace_power.c
kernel/trace/trace_sched_wakeup.c
kernel/trace/trace_selftest.c
kernel/trace/trace_stat.c [new file with mode: 0644]
kernel/trace/trace_stat.h [new file with mode: 0644]
kernel/trace/trace_workqueue.c [new file with mode: 0644]
kernel/workqueue.c
mm/slab.c
mm/slob.c
mm/slub.c
scripts/Makefile.build
scripts/recordmcount.pl

diff --git a/Documentation/ABI/testing/debugfs-kmemtrace b/Documentation/ABI/testing/debugfs-kmemtrace
new file mode 100644 (file)
index 0000000..5e6a92a
--- /dev/null
@@ -0,0 +1,71 @@
+What:          /sys/kernel/debug/kmemtrace/
+Date:          July 2008
+Contact:       Eduard - Gabriel Munteanu <eduard.munteanu@linux360.ro>
+Description:
+
+In kmemtrace-enabled kernels, the following files are created:
+
+/sys/kernel/debug/kmemtrace/
+       cpu<n>          (0400)  Per-CPU tracing data, see below. (binary)
+       total_overruns  (0400)  Total number of bytes which were dropped from
+                               cpu<n> files because of full buffer condition,
+                               non-binary. (text)
+       abi_version     (0400)  Kernel's kmemtrace ABI version. (text)
+
+Each per-CPU file should be read according to the relay interface. That is,
+the reader should set affinity to that specific CPU and, as currently done by
+the userspace application (though there are other methods), use poll() with
+an infinite timeout before every read(). Otherwise, erroneous data may be
+read. The binary data has the following _core_ format:
+
+       Event ID        (1 byte)        Unsigned integer, one of:
+               0 - represents an allocation (KMEMTRACE_EVENT_ALLOC)
+               1 - represents a freeing of previously allocated memory
+                   (KMEMTRACE_EVENT_FREE)
+       Type ID         (1 byte)        Unsigned integer, one of:
+               0 - this is a kmalloc() / kfree()
+               1 - this is a kmem_cache_alloc() / kmem_cache_free()
+               2 - this is a __get_free_pages() et al.
+       Event size      (2 bytes)       Unsigned integer representing the
+                                       size of this event. Used to extend
+                                       kmemtrace. Discard the bytes you
+                                       don't know about.
+       Sequence number (4 bytes)       Signed integer used to reorder data
+                                       logged on SMP machines. Wraparound
+                                       must be taken into account, although
+                                       it is unlikely.
+       Caller address  (8 bytes)       Return address to the caller.
+       Pointer to mem  (8 bytes)       Pointer to target memory area. Can be
+                                       NULL, but not all such calls might be
+                                       recorded.
+
+In case of KMEMTRACE_EVENT_ALLOC events, the next fields follow:
+
+       Requested bytes (8 bytes)       Total number of requested bytes,
+                                       unsigned, must not be zero.
+       Allocated bytes (8 bytes)       Total number of actually allocated
+                                       bytes, unsigned, must not be lower
+                                       than requested bytes.
+       Requested flags (4 bytes)       GFP flags supplied by the caller.
+       Target CPU      (4 bytes)       Signed integer, valid for event id 1.
+                                       If equal to -1, target CPU is the same
+                                       as origin CPU, but the reverse might
+                                       not be true.
+
+The data is made available in the same endianness the machine has.
+
+Other event ids and type ids may be defined and added. Other fields may be
+added by increasing event size, but see below for details.
+Every modification to the ABI, including new id definitions, are followed
+by bumping the ABI version by one.
+
+Adding new data to the packet (features) is done at the end of the mandatory
+data:
+       Feature size    (2 byte)
+       Feature ID      (1 byte)
+       Feature data    (Feature size - 3 bytes)
+
+
+Users:
+       kmemtrace-user - git://repo.or.cz/kmemtrace-user.git
+
index 8511d35..ac613a6 100644 (file)
@@ -49,6 +49,7 @@ parameter is applicable:
        ISAPNP  ISA PnP code is enabled.
        ISDN    Appropriate ISDN support is enabled.
        JOY     Appropriate joystick support is enabled.
+       KMEMTRACE kmemtrace is enabled.
        LIBATA  Libata driver is enabled
        LP      Printer support is enabled.
        LOOP    Loopback device support is enabled.
@@ -1050,6 +1051,15 @@ and is between 256 and 4096 characters. It is defined in the file
                        use the HighMem zone if it exists, and the Normal
                        zone if it does not.
 
+       kmemtrace.enable=       [KNL,KMEMTRACE] Format: { yes | no }
+                               Controls whether kmemtrace is enabled
+                               at boot-time.
+
+       kmemtrace.subbufs=n     [KNL,KMEMTRACE] Overrides the number of
+                       subbufs kmemtrace's relay channel has. Set this
+                       higher than default (KMEMTRACE_N_SUBBUFS in code) if
+                       you experience buffer overruns.
+
        movablecore=nn[KMG]     [KNL,X86-32,IA-64,PPC,X86-64] This parameter
                        is similar to kernelcore except it specifies the
                        amount of memory used for migratable allocations.
index 9e592c7..535aeb9 100644 (file)
@@ -113,6 +113,8 @@ On all -  write a character to /proc/sysrq-trigger.  e.g.:
 
 'x'    - Used by xmon interface on ppc/powerpc platforms.
 
+'z'    - Dump the ftrace buffer
+
 '0'-'9' - Sets the console log level, controlling which kernel messages
           will be printed to your console. ('0', for example would make
           it so that only emergency messages like PANICs or OOPSes would
index cde23b4..5731c67 100644 (file)
@@ -78,12 +78,10 @@ to view your kernel log and look for "mmiotrace has lost events" warning. If
 events were lost, the trace is incomplete. You should enlarge the buffers and
 try again. Buffers are enlarged by first seeing how large the current buffers
 are:
-$ cat /debug/tracing/trace_entries
+$ cat /debug/tracing/buffer_size_kb
 gives you a number. Approximately double this number and write it back, for
 instance:
-$ echo 0 > /debug/tracing/tracing_enabled
-$ echo 128000 > /debug/tracing/trace_entries
-$ echo 1 > /debug/tracing/tracing_enabled
+$ echo 128000 > /debug/tracing/buffer_size_kb
 Then start again from the top.
 
 If you are doing a trace for a driver project, e.g. Nouveau, you should also
diff --git a/Documentation/vm/kmemtrace.txt b/Documentation/vm/kmemtrace.txt
new file mode 100644 (file)
index 0000000..a956d9b
--- /dev/null
@@ -0,0 +1,126 @@
+                       kmemtrace - Kernel Memory Tracer
+
+                         by Eduard - Gabriel Munteanu
+                            <eduard.munteanu@linux360.ro>
+
+I. Introduction
+===============
+
+kmemtrace helps kernel developers figure out two things:
+1) how different allocators (SLAB, SLUB etc.) perform
+2) how kernel code allocates memory and how much
+
+To do this, we trace every allocation and export information to the userspace
+through the relay interface. We export things such as the number of requested
+bytes, the number of bytes actually allocated (i.e. including internal
+fragmentation), whether this is a slab allocation or a plain kmalloc() and so
+on.
+
+The actual analysis is performed by a userspace tool (see section III for
+details on where to get it from). It logs the data exported by the kernel,
+processes it and (as of writing this) can provide the following information:
+- the total amount of memory allocated and fragmentation per call-site
+- the amount of memory allocated and fragmentation per allocation
+- total memory allocated and fragmentation in the collected dataset
+- number of cross-CPU allocation and frees (makes sense in NUMA environments)
+
+Moreover, it can potentially find inconsistent and erroneous behavior in
+kernel code, such as using slab free functions on kmalloc'ed memory or
+allocating less memory than requested (but not truly failed allocations).
+
+kmemtrace also makes provisions for tracing on some arch and analysing the
+data on another.
+
+II. Design and goals
+====================
+
+kmemtrace was designed to handle rather large amounts of data. Thus, it uses
+the relay interface to export whatever is logged to userspace, which then
+stores it. Analysis and reporting is done asynchronously, that is, after the
+data is collected and stored. By design, it allows one to log and analyse
+on different machines and different arches.
+
+As of writing this, the ABI is not considered stable, though it might not
+change much. However, no guarantees are made about compatibility yet. When
+deemed stable, the ABI should still allow easy extension while maintaining
+backward compatibility. This is described further in Documentation/ABI.
+
+Summary of design goals:
+       - allow logging and analysis to be done across different machines
+       - be fast and anticipate usage in high-load environments (*)
+       - be reasonably extensible
+       - make it possible for GNU/Linux distributions to have kmemtrace
+       included in their repositories
+
+(*) - one of the reasons Pekka Enberg's original userspace data analysis
+    tool's code was rewritten from Perl to C (although this is more than a
+    simple conversion)
+
+
+III. Quick usage guide
+======================
+
+1) Get a kernel that supports kmemtrace and build it accordingly (i.e. enable
+CONFIG_KMEMTRACE).
+
+2) Get the userspace tool and build it:
+$ git-clone git://repo.or.cz/kmemtrace-user.git                # current repository
+$ cd kmemtrace-user/
+$ ./autogen.sh
+$ ./configure
+$ make
+
+3) Boot the kmemtrace-enabled kernel if you haven't, preferably in the
+'single' runlevel (so that relay buffers don't fill up easily), and run
+kmemtrace:
+# '$' does not mean user, but root here.
+$ mount -t debugfs none /sys/kernel/debug
+$ mount -t proc none /proc
+$ cd path/to/kmemtrace-user/
+$ ./kmemtraced
+Wait a bit, then stop it with CTRL+C.
+$ cat /sys/kernel/debug/kmemtrace/total_overruns       # Check if we didn't
+                                                       # overrun, should
+                                                       # be zero.
+$ (Optionally) [Run kmemtrace_check separately on each cpu[0-9]*.out file to
+               check its correctness]
+$ ./kmemtrace-report
+
+Now you should have a nice and short summary of how the allocator performs.
+
+IV. FAQ and known issues
+========================
+
+Q: 'cat /sys/kernel/debug/kmemtrace/total_overruns' is non-zero, how do I fix
+this? Should I worry?
+A: If it's non-zero, this affects kmemtrace's accuracy, depending on how
+large the number is. You can fix it by supplying a higher
+'kmemtrace.subbufs=N' kernel parameter.
+---
+
+Q: kmemtrace_check reports errors, how do I fix this? Should I worry?
+A: This is a bug and should be reported. It can occur for a variety of
+reasons:
+       - possible bugs in relay code
+       - possible misuse of relay by kmemtrace
+       - timestamps being collected unorderly
+Or you may fix it yourself and send us a patch.
+---
+
+Q: kmemtrace_report shows many errors, how do I fix this? Should I worry?
+A: This is a known issue and I'm working on it. These might be true errors
+in kernel code, which may have inconsistent behavior (e.g. allocating memory
+with kmem_cache_alloc() and freeing it with kfree()). Pekka Enberg pointed
+out this behavior may work with SLAB, but may fail with other allocators.
+
+It may also be due to lack of tracing in some unusual allocator functions.
+
+We don't want bug reports regarding this issue yet.
+---
+
+V. See also
+===========
+
+Documentation/kernel-parameters.txt
+Documentation/ABI/testing/debugfs-kmemtrace
+
index 3fe4dc2..c3c5277 100644 (file)
@@ -2605,6 +2605,12 @@ M:       jason.wessel@windriver.com
 L:     kgdb-bugreport@lists.sourceforge.net
 S:     Maintained
 
+KMEMTRACE
+P:     Eduard - Gabriel Munteanu
+M:     eduard.munteanu@linux360.ro
+L:     linux-kernel@vger.kernel.org
+S:     Maintained
+
 KPROBES
 P:     Ananth N Mavinakayanahalli
 M:     ananth@in.ibm.com
index 6183aec..8b6a8a5 100644 (file)
@@ -22,6 +22,9 @@ config IA64
        select HAVE_OPROFILE
        select HAVE_KPROBES
        select HAVE_KRETPROBES
+       select HAVE_FTRACE_MCOUNT_RECORD
+       select HAVE_DYNAMIC_FTRACE if (!ITANIUM)
+       select HAVE_FUNCTION_TRACER
        select HAVE_DMA_ATTRS
        select HAVE_KVM
        select HAVE_ARCH_TRACEHOOK
diff --git a/arch/ia64/include/asm/ftrace.h b/arch/ia64/include/asm/ftrace.h
new file mode 100644 (file)
index 0000000..d20db3c
--- /dev/null
@@ -0,0 +1,28 @@
+#ifndef _ASM_IA64_FTRACE_H
+#define _ASM_IA64_FTRACE_H
+
+#ifdef CONFIG_FUNCTION_TRACER
+#define MCOUNT_INSN_SIZE        32 /* sizeof mcount call */
+
+#ifndef __ASSEMBLY__
+extern void _mcount(unsigned long pfs, unsigned long r1, unsigned long b0, unsigned long r0);
+#define mcount _mcount
+
+#include <asm/kprobes.h>
+/* In IA64, MCOUNT_ADDR is set in link time, so it's not a constant at compile time */
+#define MCOUNT_ADDR (((struct fnptr *)mcount)->ip)
+#define FTRACE_ADDR (((struct fnptr *)ftrace_caller)->ip)
+
+static inline unsigned long ftrace_call_adjust(unsigned long addr)
+{
+       /* second bundle, insn 2 */
+       return addr - 0x12;
+}
+
+struct dyn_arch_ftrace {
+};
+#endif
+
+#endif /* CONFIG_FUNCTION_TRACER */
+
+#endif /* _ASM_IA64_FTRACE_H */
index c381ea9..ab6e7ec 100644 (file)
@@ -2,6 +2,10 @@
 # Makefile for the linux kernel.
 #
 
+ifdef CONFIG_DYNAMIC_FTRACE
+CFLAGS_REMOVE_ftrace.o = -pg
+endif
+
 extra-y        := head.o init_task.o vmlinux.lds
 
 obj-y := acpi.o entry.o efi.o efi_stub.o gate-data.o fsys.o ia64_ksyms.o irq.o irq_ia64.o      \
@@ -28,6 +32,7 @@ obj-$(CONFIG_IA64_CYCLONE)    += cyclone.o
 obj-$(CONFIG_CPU_FREQ)         += cpufreq/
 obj-$(CONFIG_IA64_MCA_RECOVERY)        += mca_recovery.o
 obj-$(CONFIG_KPROBES)          += kprobes.o jprobes.o
+obj-$(CONFIG_DYNAMIC_FTRACE)   += ftrace.o
 obj-$(CONFIG_KEXEC)            += machine_kexec.o relocate_kernel.o crash.o
 obj-$(CONFIG_CRASH_DUMP)       += crash_dump.o
 obj-$(CONFIG_IA64_UNCACHED_ALLOCATOR)  += uncached.o
index e5341e2..7e3382b 100644 (file)
@@ -47,6 +47,7 @@
 #include <asm/processor.h>
 #include <asm/thread_info.h>
 #include <asm/unistd.h>
+#include <asm/ftrace.h>
 
 #include "minstate.h"
 
@@ -1404,6 +1405,105 @@ GLOBAL_ENTRY(unw_init_running)
        br.ret.sptk.many rp
 END(unw_init_running)
 
+#ifdef CONFIG_FUNCTION_TRACER
+#ifdef CONFIG_DYNAMIC_FTRACE
+GLOBAL_ENTRY(_mcount)
+       br ftrace_stub
+END(_mcount)
+
+.here:
+       br.ret.sptk.many b0
+
+GLOBAL_ENTRY(ftrace_caller)
+       alloc out0 = ar.pfs, 8, 0, 4, 0
+       mov out3 = r0
+       ;;
+       mov out2 = b0
+       add r3 = 0x20, r3
+       mov out1 = r1;
+       br.call.sptk.many b0 = ftrace_patch_gp
+       //this might be called from module, so we must patch gp
+ftrace_patch_gp:
+       movl gp=__gp
+       mov b0 = r3
+       ;;
+.global ftrace_call;
+ftrace_call:
+{
+       .mlx
+       nop.m 0x0
+       movl r3 = .here;;
+}
+       alloc loc0 = ar.pfs, 4, 4, 2, 0
+       ;;
+       mov loc1 = b0
+       mov out0 = b0
+       mov loc2 = r8
+       mov loc3 = r15
+       ;;
+       adds out0 = -MCOUNT_INSN_SIZE, out0
+       mov out1 = in2
+       mov b6 = r3
+
+       br.call.sptk.many b0 = b6
+       ;;
+       mov ar.pfs = loc0
+       mov b0 = loc1
+       mov r8 = loc2
+       mov r15 = loc3
+       br ftrace_stub
+       ;;
+END(ftrace_caller)
+
+#else
+GLOBAL_ENTRY(_mcount)
+       movl r2 = ftrace_stub
+       movl r3 = ftrace_trace_function;;
+       ld8 r3 = [r3];;
+       ld8 r3 = [r3];;
+       cmp.eq p7,p0 = r2, r3
+(p7)   br.sptk.many ftrace_stub
+       ;;
+
+       alloc loc0 = ar.pfs, 4, 4, 2, 0
+       ;;
+       mov loc1 = b0
+       mov out0 = b0
+       mov loc2 = r8
+       mov loc3 = r15
+       ;;
+       adds out0 = -MCOUNT_INSN_SIZE, out0
+       mov out1 = in2
+       mov b6 = r3
+
+       br.call.sptk.many b0 = b6
+       ;;
+       mov ar.pfs = loc0
+       mov b0 = loc1
+       mov r8 = loc2
+       mov r15 = loc3
+       br ftrace_stub
+       ;;
+END(_mcount)
+#endif
+
+GLOBAL_ENTRY(ftrace_stub)
+       mov r3 = b0
+       movl r2 = _mcount_ret_helper
+       ;;
+       mov b6 = r2
+       mov b7 = r3
+       br.ret.sptk.many b6
+
+_mcount_ret_helper:
+       mov b0 = r42
+       mov r1 = r41
+       mov ar.pfs = r40
+       br b7
+END(ftrace_stub)
+
+#endif /* CONFIG_FUNCTION_TRACER */
+
        .rodata
        .align 8
        .globl sys_call_table
diff --git a/arch/ia64/kernel/ftrace.c b/arch/ia64/kernel/ftrace.c
new file mode 100644 (file)
index 0000000..7fc8c96
--- /dev/null
@@ -0,0 +1,206 @@
+/*
+ * Dynamic function tracing support.
+ *
+ * Copyright (C) 2008 Shaohua Li <shaohua.li@intel.com>
+ *
+ * For licencing details, see COPYING.
+ *
+ * Defines low-level handling of mcount calls when the kernel
+ * is compiled with the -pg flag. When using dynamic ftrace, the
+ * mcount call-sites get patched lazily with NOP till they are
+ * enabled. All code mutation routines here take effect atomically.
+ */
+
+#include <linux/uaccess.h>
+#include <linux/ftrace.h>
+
+#include <asm/cacheflush.h>
+#include <asm/patch.h>
+
+/* In IA64, each function will be added below two bundles with -pg option */
+static unsigned char __attribute__((aligned(8)))
+ftrace_orig_code[MCOUNT_INSN_SIZE] = {
+       0x02, 0x40, 0x31, 0x10, 0x80, 0x05, /* alloc r40=ar.pfs,12,8,0 */
+       0xb0, 0x02, 0x00, 0x00, 0x42, 0x40, /* mov r43=r0;; */
+       0x05, 0x00, 0xc4, 0x00,             /* mov r42=b0 */
+       0x11, 0x48, 0x01, 0x02, 0x00, 0x21, /* mov r41=r1 */
+       0x00, 0x00, 0x00, 0x02, 0x00, 0x00, /* nop.i 0x0 */
+       0x08, 0x00, 0x00, 0x50              /* br.call.sptk.many b0 = _mcount;; */
+};
+
+struct ftrace_orig_insn {
+       u64 dummy1, dummy2, dummy3;
+       u64 dummy4:64-41+13;
+       u64 imm20:20;
+       u64 dummy5:3;
+       u64 sign:1;
+       u64 dummy6:4;
+};
+
+/* mcount stub will be converted below for nop */
+static unsigned char ftrace_nop_code[MCOUNT_INSN_SIZE] = {
+       0x00, 0x00, 0x00, 0x00, 0x01, 0x00, /* [MII] nop.m 0x0 */
+       0x30, 0x00, 0x00, 0x60, 0x00, 0x00, /* mov r3=ip */
+       0x00, 0x00, 0x04, 0x00,             /* nop.i 0x0 */
+       0x05, 0x00, 0x00, 0x00, 0x01, 0x00, /* [MLX] nop.m 0x0 */
+       0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* nop.x 0x0;; */
+       0x00, 0x00, 0x04, 0x00
+};
+
+static unsigned char *ftrace_nop_replace(void)
+{
+       return ftrace_nop_code;
+}
+
+/*
+ * mcount stub will be converted below for call
+ * Note: Just the last instruction is changed against nop
+ * */
+static unsigned char __attribute__((aligned(8)))
+ftrace_call_code[MCOUNT_INSN_SIZE] = {
+       0x00, 0x00, 0x00, 0x00, 0x01, 0x00, /* [MII] nop.m 0x0 */
+       0x30, 0x00, 0x00, 0x60, 0x00, 0x00, /* mov r3=ip */
+       0x00, 0x00, 0x04, 0x00,             /* nop.i 0x0 */
+       0x05, 0x00, 0x00, 0x00, 0x01, 0x00, /* [MLX] nop.m 0x0 */
+       0xff, 0xff, 0xff, 0xff, 0x7f, 0x00, /* brl.many .;;*/
+       0xf8, 0xff, 0xff, 0xc8
+};
+
+struct ftrace_call_insn {
+       u64 dummy1, dummy2;
+       u64 dummy3:48;
+       u64 imm39_l:16;
+       u64 imm39_h:23;
+       u64 dummy4:13;
+       u64 imm20:20;
+       u64 dummy5:3;
+       u64 i:1;
+       u64 dummy6:4;
+};
+
+static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
+{
+       struct ftrace_call_insn *code = (void *)ftrace_call_code;
+       unsigned long offset = addr - (ip + 0x10);
+
+       code->imm39_l = offset >> 24;
+       code->imm39_h = offset >> 40;
+       code->imm20 = offset >> 4;
+       code->i = offset >> 63;
+       return ftrace_call_code;
+}
+
+static int
+ftrace_modify_code(unsigned long ip, unsigned char *old_code,
+                  unsigned char *new_code, int do_check)
+{
+       unsigned char replaced[MCOUNT_INSN_SIZE];
+
+       /*
+        * Note: Due to modules and __init, code can
+        *  disappear and change, we need to protect against faulting
+        *  as well as code changing. We do this by using the
+        *  probe_kernel_* functions.
+        *
+        * No real locking needed, this code is run through
+        * kstop_machine, or before SMP starts.
+        */
+
+       if (!do_check)
+               goto skip_check;
+
+       /* read the text we want to modify */
+       if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE))
+               return -EFAULT;
+
+       /* Make sure it is what we expect it to be */
+       if (memcmp(replaced, old_code, MCOUNT_INSN_SIZE) != 0)
+               return -EINVAL;
+
+skip_check:
+       /* replace the text with the new text */
+       if (probe_kernel_write(((void *)ip), new_code, MCOUNT_INSN_SIZE))
+               return -EPERM;
+       flush_icache_range(ip, ip + MCOUNT_INSN_SIZE);
+
+       return 0;
+}
+
+static int ftrace_make_nop_check(struct dyn_ftrace *rec, unsigned long addr)
+{
+       unsigned char __attribute__((aligned(8))) replaced[MCOUNT_INSN_SIZE];
+       unsigned long ip = rec->ip;
+
+       if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE))
+               return -EFAULT;
+       if (rec->flags & FTRACE_FL_CONVERTED) {
+               struct ftrace_call_insn *call_insn, *tmp_call;
+
+               call_insn = (void *)ftrace_call_code;
+               tmp_call = (void *)replaced;
+               call_insn->imm39_l = tmp_call->imm39_l;
+               call_insn->imm39_h = tmp_call->imm39_h;
+               call_insn->imm20 = tmp_call->imm20;
+               call_insn->i = tmp_call->i;
+               if (memcmp(replaced, ftrace_call_code, MCOUNT_INSN_SIZE) != 0)
+                       return -EINVAL;
+               return 0;
+       } else {
+               struct ftrace_orig_insn *call_insn, *tmp_call;
+
+               call_insn = (void *)ftrace_orig_code;
+               tmp_call = (void *)replaced;
+               call_insn->sign = tmp_call->sign;
+               call_insn->imm20 = tmp_call->imm20;
+               if (memcmp(replaced, ftrace_orig_code, MCOUNT_INSN_SIZE) != 0)
+                       return -EINVAL;
+               return 0;
+       }
+}
+
+int ftrace_make_nop(struct module *mod,
+                   struct dyn_ftrace *rec, unsigned long addr)
+{
+       int ret;
+       char *new;
+
+       ret = ftrace_make_nop_check(rec, addr);
+       if (ret)
+               return ret;
+       new = ftrace_nop_replace();
+       return ftrace_modify_code(rec->ip, NULL, new, 0);
+}
+
+int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
+{
+       unsigned long ip = rec->ip;
+       unsigned char *old, *new;
+
+       old=  ftrace_nop_replace();
+       new = ftrace_call_replace(ip, addr);
+       return ftrace_modify_code(ip, old, new, 1);
+}
+
+/* in IA64, _mcount can't directly call ftrace_stub. Only jump is ok */
+int ftrace_update_ftrace_func(ftrace_func_t func)
+{
+       unsigned long ip;
+       unsigned long addr = ((struct fnptr *)ftrace_call)->ip;
+
+       if (func == ftrace_stub)
+               return 0;
+       ip = ((struct fnptr *)func)->ip;
+
+       ia64_patch_imm64(addr + 2, ip);
+
+       flush_icache_range(addr, addr + 16);
+       return 0;
+}
+
+/* run from kstop_machine */
+int __init ftrace_dyn_arch_init(void *data)
+{
+       *(unsigned long *)data = 0;
+
+       return 0;
+}
index 6da1f20..2d31186 100644 (file)
@@ -112,3 +112,9 @@ EXPORT_SYMBOL_GPL(esi_call_phys);
 #endif
 extern char ia64_ivt[];
 EXPORT_SYMBOL(ia64_ivt);
+
+#include <asm/ftrace.h>
+#ifdef CONFIG_FUNCTION_TRACER
+/* mcount is defined in assembly */
+EXPORT_SYMBOL(_mcount);
+#endif
index 10d6cc3..e1983fa 100644 (file)
@@ -174,28 +174,8 @@ config IOMMU_LEAK
          Add a simple leak tracer to the IOMMU code. This is useful when you
          are debugging a buggy device driver that leaks IOMMU mappings.
 
-config MMIOTRACE
-       bool "Memory mapped IO tracing"
-       depends on DEBUG_KERNEL && PCI
-       select TRACING
-       help
-         Mmiotrace traces Memory Mapped I/O access and is meant for
-         debugging and reverse engineering. It is called from the ioremap
-         implementation and works via page faults. Tracing is disabled by
-         default and can be enabled at run-time.
-
-         See Documentation/tracers/mmiotrace.txt.
-         If you are not helping to develop drivers, say N.
-
-config MMIOTRACE_TEST
-       tristate "Test module for mmiotrace"
-       depends on MMIOTRACE && m
-       help
-         This is a dumb module for testing mmiotrace. It is very dangerous
-         as it will write garbage to IO memory starting at a given address.
-         However, it should be safe to use on e.g. unused portion of VRAM.
-
-         Say N, unless you absolutely know what you are doing.
+config HAVE_MMIOTRACE_SUPPORT
+       def_bool y
 
 #
 # IO delay types:
index b81125f..c7da368 100644 (file)
@@ -55,7 +55,8 @@ config KVM_AMD
 
 config KVM_TRACE
        bool "KVM trace support"
-       depends on KVM && MARKERS && SYSFS
+       depends on KVM && SYSFS
+       select MARKERS
        select RELAY
        select DEBUG_FS
        default n
index 33a9351..30659ce 100644 (file)
@@ -283,7 +283,7 @@ static void sysrq_ftrace_dump(int key, struct tty_struct *tty)
 }
 static struct sysrq_key_op sysrq_ftrace_dump_op = {
        .handler        = sysrq_ftrace_dump,
-       .help_msg       = "dumpZ-ftrace-buffer",
+       .help_msg       = "dump-ftrace-buffer(Z)",
        .action_msg     = "Dump ftrace buffer",
        .enable_mask    = SYSRQ_ENABLE_DUMP,
 };
index 677432b..0547214 100644 (file)
@@ -126,6 +126,10 @@ extern int ftrace_update_ftrace_func(ftrace_func_t func);
 extern void ftrace_caller(void);
 extern void ftrace_call(void);
 extern void mcount_call(void);
+
+#ifndef FTRACE_ADDR
+#define FTRACE_ADDR ((unsigned long)ftrace_caller)
+#endif
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 extern void ftrace_graph_caller(void);
 extern int ftrace_enable_ftrace_graph_caller(void);
index 39c3a5e..455f9af 100644 (file)
@@ -14,6 +14,7 @@
 #include <asm/page.h>          /* kmalloc_sizes.h needs PAGE_SIZE */
 #include <asm/cache.h>         /* kmalloc_sizes.h needs L1_CACHE_BYTES */
 #include <linux/compiler.h>
+#include <trace/kmemtrace.h>
 
 /* Size description struct for general caches. */
 struct cache_sizes {
@@ -28,8 +29,26 @@ extern struct cache_sizes malloc_sizes[];
 void *kmem_cache_alloc(struct kmem_cache *, gfp_t);
 void *__kmalloc(size_t size, gfp_t flags);
 
-static inline void *kmalloc(size_t size, gfp_t flags)
+#ifdef CONFIG_KMEMTRACE
+extern void *kmem_cache_alloc_notrace(struct kmem_cache *cachep, gfp_t flags);
+extern size_t slab_buffer_size(struct kmem_cache *cachep);
+#else
+static __always_inline void *
+kmem_cache_alloc_notrace(struct kmem_cache *cachep, gfp_t flags)
 {
+       return kmem_cache_alloc(cachep, flags);
+}
+static inline size_t slab_buffer_size(struct kmem_cache *cachep)
+{
+       return 0;
+}
+#endif
+
+static __always_inline void *kmalloc(size_t size, gfp_t flags)
+{
+       struct kmem_cache *cachep;
+       void *ret;
+
        if (__builtin_constant_p(size)) {
                int i = 0;
 
@@ -50,10 +69,17 @@ static inline void *kmalloc(size_t size, gfp_t flags)
 found:
 #ifdef CONFIG_ZONE_DMA
                if (flags & GFP_DMA)
-                       return kmem_cache_alloc(malloc_sizes[i].cs_dmacachep,
-                                               flags);
+                       cachep = malloc_sizes[i].cs_dmacachep;
+               else
 #endif
-               return kmem_cache_alloc(malloc_sizes[i].cs_cachep, flags);
+                       cachep = malloc_sizes[i].cs_cachep;
+
+               ret = kmem_cache_alloc_notrace(cachep, flags);
+
+               kmemtrace_mark_alloc(KMEMTRACE_TYPE_KMALLOC, _THIS_IP_, ret,
+                                    size, slab_buffer_size(cachep), flags);
+
+               return ret;
        }
        return __kmalloc(size, flags);
 }
@@ -62,8 +88,25 @@ found:
 extern void *__kmalloc_node(size_t size, gfp_t flags, int node);
 extern void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node);
 
-static inline void *kmalloc_node(size_t size, gfp_t flags, int node)
+#ifdef CONFIG_KMEMTRACE
+extern void *kmem_cache_alloc_node_notrace(struct kmem_cache *cachep,
+                                          gfp_t flags,
+                                          int nodeid);
+#else
+static __always_inline void *
+kmem_cache_alloc_node_notrace(struct kmem_cache *cachep,
+                             gfp_t flags,
+                             int nodeid)
+{
+       return kmem_cache_alloc_node(cachep, flags, nodeid);
+}
+#endif
+
+static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node)
 {
+       struct kmem_cache *cachep;
+       void *ret;
+
        if (__builtin_constant_p(size)) {
                int i = 0;
 
@@ -84,11 +127,18 @@ static inline void *kmalloc_node(size_t size, gfp_t flags, int node)
 found:
 #ifdef CONFIG_ZONE_DMA
                if (flags & GFP_DMA)
-                       return kmem_cache_alloc_node(malloc_sizes[i].cs_dmacachep,
-                                               flags, node);
+                       cachep = malloc_sizes[i].cs_dmacachep;
+               else
 #endif
-               return kmem_cache_alloc_node(malloc_sizes[i].cs_cachep,
-                                               flags, node);
+                       cachep = malloc_sizes[i].cs_cachep;
+
+               ret = kmem_cache_alloc_node_notrace(cachep, flags, node);
+
+               kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_KMALLOC, _THIS_IP_,
+                                         ret, size, slab_buffer_size(cachep),
+                                         flags, node);
+
+               return ret;
        }
        return __kmalloc_node(size, flags, node);
 }
index 59a3fa4..0ec00b3 100644 (file)
@@ -3,14 +3,15 @@
 
 void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node);
 
-static inline void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags)
+static __always_inline void *kmem_cache_alloc(struct kmem_cache *cachep,
+                                             gfp_t flags)
 {
        return kmem_cache_alloc_node(cachep, flags, -1);
 }
 
 void *__kmalloc_node(size_t size, gfp_t flags, int node);
 
-static inline void *kmalloc_node(size_t size, gfp_t flags, int node)
+static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node)
 {
        return __kmalloc_node(size, flags, node);
 }
@@ -23,12 +24,12 @@ static inline void *kmalloc_node(size_t size, gfp_t flags, int node)
  * kmalloc is the normal method of allocating memory
  * in the kernel.
  */
-static inline void *kmalloc(size_t size, gfp_t flags)
+static __always_inline void *kmalloc(size_t size, gfp_t flags)
 {
        return __kmalloc_node(size, flags, -1);
 }
 
-static inline void *__kmalloc(size_t size, gfp_t flags)
+static __always_inline void *__kmalloc(size_t size, gfp_t flags)
 {
        return kmalloc(size, flags);
 }
index 2f5c16b..6b657f7 100644 (file)
@@ -10,6 +10,7 @@
 #include <linux/gfp.h>
 #include <linux/workqueue.h>
 #include <linux/kobject.h>
+#include <trace/kmemtrace.h>
 
 enum stat_item {
        ALLOC_FASTPATH,         /* Allocation from cpu slab */
@@ -204,13 +205,31 @@ static __always_inline struct kmem_cache *kmalloc_slab(size_t size)
 void *kmem_cache_alloc(struct kmem_cache *, gfp_t);
 void *__kmalloc(size_t size, gfp_t flags);
 
+#ifdef CONFIG_KMEMTRACE
+extern void *kmem_cache_alloc_notrace(struct kmem_cache *s, gfp_t gfpflags);
+#else
+static __always_inline void *
+kmem_cache_alloc_notrace(struct kmem_cache *s, gfp_t gfpflags)
+{
+       return kmem_cache_alloc(s, gfpflags);
+}
+#endif
+
 static __always_inline void *kmalloc_large(size_t size, gfp_t flags)
 {
-       return (void *)__get_free_pages(flags | __GFP_COMP, get_order(size));
+       unsigned int order = get_order(size);
+       void *ret = (void *) __get_free_pages(flags | __GFP_COMP, order);
+
+       kmemtrace_mark_alloc(KMEMTRACE_TYPE_KMALLOC, _THIS_IP_, ret,
+                            size, PAGE_SIZE << order, flags);
+
+       return ret;
 }
 
 static __always_inline void *kmalloc(size_t size, gfp_t flags)
 {
+       void *ret;
+
        if (__builtin_constant_p(size)) {
                if (size > PAGE_SIZE)
                        return kmalloc_large(size, flags);
@@ -221,7 +240,13 @@ static __always_inline void *kmalloc(size_t size, gfp_t flags)
                        if (!s)
                                return ZERO_SIZE_PTR;
 
-                       return kmem_cache_alloc(s, flags);
+                       ret = kmem_cache_alloc_notrace(s, flags);
+
+                       kmemtrace_mark_alloc(KMEMTRACE_TYPE_KMALLOC,
+                                            _THIS_IP_, ret,
+                                            size, s->size, flags);
+
+                       return ret;
                }
        }
        return __kmalloc(size, flags);
@@ -231,8 +256,24 @@ static __always_inline void *kmalloc(size_t size, gfp_t flags)
 void *__kmalloc_node(size_t size, gfp_t flags, int node);
 void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node);
 
+#ifdef CONFIG_KMEMTRACE
+extern void *kmem_cache_alloc_node_notrace(struct kmem_cache *s,
+                                          gfp_t gfpflags,
+                                          int node);
+#else
+static __always_inline void *
+kmem_cache_alloc_node_notrace(struct kmem_cache *s,
+                             gfp_t gfpflags,
+                             int node)
+{
+       return kmem_cache_alloc_node(s, gfpflags, node);
+}
+#endif
+
 static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node)
 {
+       void *ret;
+
        if (__builtin_constant_p(size) &&
                size <= PAGE_SIZE && !(flags & SLUB_DMA)) {
                        struct kmem_cache *s = kmalloc_slab(size);
@@ -240,7 +281,13 @@ static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node)
                if (!s)
                        return ZERO_SIZE_PTR;
 
-               return kmem_cache_alloc_node(s, flags, node);
+               ret = kmem_cache_alloc_node_notrace(s, flags, node);
+
+               kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_KMALLOC,
+                                         _THIS_IP_, ret,
+                                         size, s->size, flags, node);
+
+               return ret;
        }
        return __kmalloc_node(size, flags, node);
 }
diff --git a/include/trace/kmemtrace.h b/include/trace/kmemtrace.h
new file mode 100644 (file)
index 0000000..ad8b785
--- /dev/null
@@ -0,0 +1,75 @@
+/*
+ * Copyright (C) 2008 Eduard - Gabriel Munteanu
+ *
+ * This file is released under GPL version 2.
+ */
+
+#ifndef _LINUX_KMEMTRACE_H
+#define _LINUX_KMEMTRACE_H
+
+#ifdef __KERNEL__
+
+#include <linux/types.h>
+#include <linux/marker.h>
+
+enum kmemtrace_type_id {
+       KMEMTRACE_TYPE_KMALLOC = 0,     /* kmalloc() or kfree(). */
+       KMEMTRACE_TYPE_CACHE,           /* kmem_cache_*(). */
+       KMEMTRACE_TYPE_PAGES,           /* __get_free_pages() and friends. */
+};
+
+#ifdef CONFIG_KMEMTRACE
+
+extern void kmemtrace_init(void);
+
+extern void kmemtrace_mark_alloc_node(enum kmemtrace_type_id type_id,
+                                            unsigned long call_site,
+                                            const void *ptr,
+                                            size_t bytes_req,
+                                            size_t bytes_alloc,
+                                            gfp_t gfp_flags,
+                                            int node);
+
+extern void kmemtrace_mark_free(enum kmemtrace_type_id type_id,
+                                      unsigned long call_site,
+                                      const void *ptr);
+
+#else /* CONFIG_KMEMTRACE */
+
+static inline void kmemtrace_init(void)
+{
+}
+
+static inline void kmemtrace_mark_alloc_node(enum kmemtrace_type_id type_id,
+                                            unsigned long call_site,
+                                            const void *ptr,
+                                            size_t bytes_req,
+                                            size_t bytes_alloc,
+                                            gfp_t gfp_flags,
+                                            int node)
+{
+}
+
+static inline void kmemtrace_mark_free(enum kmemtrace_type_id type_id,
+                                      unsigned long call_site,
+                                      const void *ptr)
+{
+}
+
+#endif /* CONFIG_KMEMTRACE */
+
+static inline void kmemtrace_mark_alloc(enum kmemtrace_type_id type_id,
+                                       unsigned long call_site,
+                                       const void *ptr,
+                                       size_t bytes_req,
+                                       size_t bytes_alloc,
+                                       gfp_t gfp_flags)
+{
+       kmemtrace_mark_alloc_node(type_id, call_site, ptr,
+                                 bytes_req, bytes_alloc, gfp_flags, -1);
+}
+
+#endif /* __KERNEL__ */
+
+#endif /* _LINUX_KMEMTRACE_H */
+
diff --git a/include/trace/workqueue.h b/include/trace/workqueue.h
new file mode 100644 (file)
index 0000000..867829d
--- /dev/null
@@ -0,0 +1,25 @@
+#ifndef __TRACE_WORKQUEUE_H
+#define __TRACE_WORKQUEUE_H
+
+#include <linux/tracepoint.h>
+#include <linux/workqueue.h>
+#include <linux/sched.h>
+
+DECLARE_TRACE(workqueue_insertion,
+          TPPROTO(struct task_struct *wq_thread, struct work_struct *work),
+          TPARGS(wq_thread, work));
+
+DECLARE_TRACE(workqueue_execution,
+          TPPROTO(struct task_struct *wq_thread, struct work_struct *work),
+          TPARGS(wq_thread, work));
+
+/* Trace the creation of one workqueue thread on a cpu */
+DECLARE_TRACE(workqueue_creation,
+          TPPROTO(struct task_struct *wq_thread, int cpu),
+          TPARGS(wq_thread, cpu));
+
+DECLARE_TRACE(workqueue_destruction,
+          TPPROTO(struct task_struct *wq_thread),
+          TPARGS(wq_thread));
+
+#endif /* __TRACE_WORKQUEUE_H */
index 8442094..db7974f 100644 (file)
@@ -70,6 +70,7 @@
 #include <asm/setup.h>
 #include <asm/sections.h>
 #include <asm/cacheflush.h>
+#include <trace/kmemtrace.h>
 
 #ifdef CONFIG_X86_LOCAL_APIC
 #include <asm/smp.h>
@@ -641,6 +642,7 @@ asmlinkage void __init start_kernel(void)
        enable_debug_pagealloc();
        cpu_hotplug_init();
        kmem_cache_init();
+       kmemtrace_init();
        debug_objects_mem_init();
        idr_init_cache();
        setup_per_cpu_pageset();
index 09ac200..d064506 100644 (file)
@@ -675,9 +675,7 @@ int relay_late_setup_files(struct rchan *chan,
         */
        for_each_online_cpu(i) {
                if (unlikely(!chan->buf[i])) {
-                       printk(KERN_ERR "relay_late_setup_files: CPU %u "
-                                       "has no buffer, it must have!\n", i);
-                       BUG();
+                       WARN_ONCE(1, KERN_ERR "CPU has no buffer!\n");
                        err = -EINVAL;
                        break;
                }
index e2a4ff6..dde1d46 100644 (file)
@@ -264,6 +264,38 @@ config HW_BRANCH_TRACER
          This tracer records all branches on the system in a circular
          buffer giving access to the last N branches for each cpu.
 
+config KMEMTRACE
+       bool "Trace SLAB allocations"
+       select TRACING
+       help
+         kmemtrace provides tracing for slab allocator functions, such as
+         kmalloc, kfree, kmem_cache_alloc, kmem_cache_free etc.. Collected
+         data is then fed to the userspace application in order to analyse
+         allocation hotspots, internal fragmentation and so on, making it
+         possible to see how well an allocator performs, as well as debug
+         and profile kernel code.
+
+         This requires an userspace application to use. See
+         Documentation/vm/kmemtrace.txt for more information.
+
+         Saying Y will make the kernel somewhat larger and slower. However,
+         if you disable kmemtrace at run-time or boot-time, the performance
+         impact is minimal (depending on the arch the kernel is built for).
+
+         If unsure, say N.
+
+config WORKQUEUE_TRACER
+       bool "Trace workqueues"
+       select TRACING
+       help
+         The workqueue tracer provides some statistical informations
+          about each cpu workqueue thread such as the number of the
+          works inserted and executed since their creation. It can help
+          to evaluate the amount of work each of them have to perform.
+          For example it can help a developer to decide whether he should
+          choose a per cpu workqueue instead of a singlethreaded one.
+
+
 config DYNAMIC_FTRACE
        bool "enable/disable ftrace tracepoints dynamically"
        depends on FUNCTION_TRACER
@@ -302,4 +334,27 @@ config FTRACE_STARTUP_TEST
          functioning properly. It will do tests on all the configured
          tracers of ftrace.
 
+config MMIOTRACE
+       bool "Memory mapped IO tracing"
+       depends on HAVE_MMIOTRACE_SUPPORT && DEBUG_KERNEL && PCI
+       select TRACING
+       help
+         Mmiotrace traces Memory Mapped I/O access and is meant for
+         debugging and reverse engineering. It is called from the ioremap
+         implementation and works via page faults. Tracing is disabled by
+         default and can be enabled at run-time.
+
+         See Documentation/tracers/mmiotrace.txt.
+         If you are not helping to develop drivers, say N.
+
+config MMIOTRACE_TEST
+       tristate "Test module for mmiotrace"
+       depends on MMIOTRACE && m
+       help
+         This is a dumb module for testing mmiotrace. It is very dangerous
+         as it will write garbage to IO memory starting at a given address.
+         However, it should be safe to use on e.g. unused portion of VRAM.
+
+         Say N, unless you absolutely know what you are doing.
+
 endmenu
index 349d5a9..f76d48f 100644 (file)
@@ -19,6 +19,8 @@ obj-$(CONFIG_FUNCTION_TRACER) += libftrace.o
 obj-$(CONFIG_RING_BUFFER) += ring_buffer.o
 
 obj-$(CONFIG_TRACING) += trace.o
+obj-$(CONFIG_TRACING) += trace_output.o
+obj-$(CONFIG_TRACING) += trace_stat.o
 obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o
 obj-$(CONFIG_SYSPROF_TRACER) += trace_sysprof.o
 obj-$(CONFIG_FUNCTION_TRACER) += trace_functions.o
@@ -33,5 +35,7 @@ obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += trace_functions_graph.o
 obj-$(CONFIG_TRACE_BRANCH_PROFILING) += trace_branch.o
 obj-$(CONFIG_HW_BRANCH_TRACER) += trace_hw_branches.o
 obj-$(CONFIG_POWER_TRACER) += trace_power.o
+obj-$(CONFIG_KMEMTRACE) += kmemtrace.o
+obj-$(CONFIG_WORKQUEUE_TRACER) += trace_workqueue.o
 
 libftrace-y := ftrace.o
index 2f32969..7e9a20b 100644 (file)
@@ -17,6 +17,7 @@
 #include <linux/clocksource.h>
 #include <linux/kallsyms.h>
 #include <linux/seq_file.h>
+#include <linux/suspend.h>
 #include <linux/debugfs.h>
 #include <linux/hardirq.h>
 #include <linux/kthread.h>
@@ -263,14 +264,6 @@ static void ftrace_update_pid_func(void)
 # error Dynamic ftrace depends on MCOUNT_RECORD
 #endif
 
-/*
- * Since MCOUNT_ADDR may point to mcount itself, we do not want
- * to get it confused by reading a reference in the code as we
- * are parsing on objcopy output of text. Use a variable for
- * it instead.
- */
-static unsigned long mcount_addr = MCOUNT_ADDR;
-
 enum {
        FTRACE_ENABLE_CALLS             = (1 << 0),
        FTRACE_DISABLE_CALLS            = (1 << 1),
@@ -289,7 +282,7 @@ static DEFINE_MUTEX(ftrace_regex_lock);
 
 struct ftrace_page {
        struct ftrace_page      *next;
-       unsigned long           index;
+       int                     index;
        struct dyn_ftrace       records[];
 };
 
@@ -463,7 +456,7 @@ __ftrace_replace_code(struct dyn_ftrace *rec, int enable)
        unsigned long ip, fl;
        unsigned long ftrace_addr;
 
-       ftrace_addr = (unsigned long)ftrace_caller;
+       ftrace_addr = (unsigned long)FTRACE_ADDR;
 
        ip = rec->ip;
 
@@ -575,7 +568,7 @@ ftrace_code_disable(struct module *mod, struct dyn_ftrace *rec)
 
        ip = rec->ip;
 
-       ret = ftrace_make_nop(mod, rec, mcount_addr);
+       ret = ftrace_make_nop(mod, rec, MCOUNT_ADDR);
        if (ret) {
                ftrace_bug(ret, ip);
                rec->flags |= FTRACE_FL_FAILED;
@@ -786,7 +779,7 @@ enum {
 
 struct ftrace_iterator {
        struct ftrace_page      *pg;
-       unsigned                idx;
+       int                     idx;
        unsigned                flags;
        unsigned char           buffer[FTRACE_BUFF_MAX+1];
        unsigned                buffer_idx;
@@ -1902,7 +1895,7 @@ int register_ftrace_function(struct ftrace_ops *ops)
 }
 
 /**
- * unregister_ftrace_function - unresgister a function for profiling.
+ * unregister_ftrace_function - unregister a function for profiling.
  * @ops - ops structure that holds the function to unregister
  *
  * Unregister a function that was added to be called by ftrace profiling.
@@ -1965,6 +1958,7 @@ ftrace_enable_sysctl(struct ctl_table *table, int write,
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 
 static atomic_t ftrace_graph_active;
+static struct notifier_block ftrace_suspend_notifier;
 
 int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace)
 {
@@ -2043,6 +2037,27 @@ static int start_graph_tracing(void)
        return ret;
 }
 
+/*
+ * Hibernation protection.
+ * The state of the current task is too much unstable during
+ * suspend/restore to disk. We want to protect against that.
+ */
+static int
+ftrace_suspend_notifier_call(struct notifier_block *bl, unsigned long state,
+                                                       void *unused)
+{
+       switch (state) {
+       case PM_HIBERNATION_PREPARE:
+               pause_graph_tracing();
+               break;
+
+       case PM_POST_HIBERNATION:
+               unpause_graph_tracing();
+               break;
+       }
+       return NOTIFY_DONE;
+}
+
 int register_ftrace_graph(trace_func_graph_ret_t retfunc,
                        trace_func_graph_ent_t entryfunc)
 {
@@ -2050,6 +2065,9 @@ int register_ftrace_graph(trace_func_graph_ret_t retfunc,
 
        mutex_lock(&ftrace_sysctl_lock);
 
+       ftrace_suspend_notifier.notifier_call = ftrace_suspend_notifier_call;
+       register_pm_notifier(&ftrace_suspend_notifier);
+
        atomic_inc(&ftrace_graph_active);
        ret = start_graph_tracing();
        if (ret) {
@@ -2075,6 +2093,7 @@ void unregister_ftrace_graph(void)
        ftrace_graph_return = (trace_func_graph_ret_t)ftrace_stub;
        ftrace_graph_entry = ftrace_graph_entry_stub;
        ftrace_shutdown(FTRACE_STOP_FUNC_RET);
+       unregister_pm_notifier(&ftrace_suspend_notifier);
 
        mutex_unlock(&ftrace_sysctl_lock);
 }
diff --git a/kernel/trace/kmemtrace.c b/kernel/trace/kmemtrace.c
new file mode 100644 (file)
index 0000000..7ebc58c
--- /dev/null
@@ -0,0 +1,350 @@
+/*
+ * Memory allocator tracing
+ *
+ * Copyright (C) 2008 Eduard - Gabriel Munteanu
+ * Copyright (C) 2008 Pekka Enberg <penberg@cs.helsinki.fi>
+ * Copyright (C) 2008 Frederic Weisbecker <fweisbec@gmail.com>
+ */
+
+#include <linux/dcache.h>
+#include <linux/debugfs.h>
+#include <linux/fs.h>
+#include <linux/seq_file.h>
+#include <trace/kmemtrace.h>
+
+#include "trace.h"
+#include "trace_output.h"
+
+/* Select an alternative, minimalistic output than the original one */
+#define TRACE_KMEM_OPT_MINIMAL 0x1
+
+static struct tracer_opt kmem_opts[] = {
+       /* Default disable the minimalistic output */
+       { TRACER_OPT(kmem_minimalistic, TRACE_KMEM_OPT_MINIMAL) },
+       { }
+};
+
+static struct tracer_flags kmem_tracer_flags = {
+       .val = 0,
+       .opts = kmem_opts
+};
+
+
+static bool kmem_tracing_enabled __read_mostly;
+static struct trace_array *kmemtrace_array;
+
+static int kmem_trace_init(struct trace_array *tr)
+{
+       int cpu;
+       kmemtrace_array = tr;
+
+       for_each_cpu_mask(cpu, cpu_possible_map)
+               tracing_reset(tr, cpu);
+
+       kmem_tracing_enabled = true;
+
+       return 0;
+}
+
+static void kmem_trace_reset(struct trace_array *tr)
+{
+       kmem_tracing_enabled = false;
+}
+
+static void kmemtrace_headers(struct seq_file *s)
+{
+       /* Don't need headers for the original kmemtrace output */
+       if (!(kmem_tracer_flags.val & TRACE_KMEM_OPT_MINIMAL))
+               return;
+
+       seq_printf(s, "#\n");
+       seq_printf(s, "# ALLOC  TYPE  REQ   GIVEN  FLAGS     "
+                       "      POINTER         NODE    CALLER\n");
+       seq_printf(s, "# FREE   |      |     |       |       "
+                       "       |   |            |        |\n");
+       seq_printf(s, "# |\n\n");
+}
+
+/*
+ * The two following functions give the original output from kmemtrace,
+ * or something close to....perhaps they need some missing things
+ */
+static enum print_line_t
+kmemtrace_print_alloc_original(struct trace_iterator *iter,
+                               struct kmemtrace_alloc_entry *entry)
+{
+       struct trace_seq *s = &iter->seq;
+       int ret;
+
+       /* Taken from the old linux/kmemtrace.h */
+       ret = trace_seq_printf(s, "type_id %d call_site %lu ptr %lu "
+         "bytes_req %lu bytes_alloc %lu gfp_flags %lu node %d\n",
+          entry->type_id, entry->call_site, (unsigned long) entry->ptr,
+          (unsigned long) entry->bytes_req, (unsigned long) entry->bytes_alloc,
+          (unsigned long) entry->gfp_flags, entry->node);
+
+       if (!ret)
+               return TRACE_TYPE_PARTIAL_LINE;
+
+       return TRACE_TYPE_HANDLED;
+}
+
+static enum print_line_t
+kmemtrace_print_free_original(struct trace_iterator *iter,
+                               struct kmemtrace_free_entry *entry)
+{
+       struct trace_seq *s = &iter->seq;
+       int ret;
+
+       /* Taken from the old linux/kmemtrace.h */
+       ret = trace_seq_printf(s, "type_id %d call_site %lu ptr %lu\n",
+          entry->type_id, entry->call_site, (unsigned long) entry->ptr);
+
+       if (!ret)
+               return TRACE_TYPE_PARTIAL_LINE;
+
+       return TRACE_TYPE_HANDLED;
+}
+
+
+/* The two other following provide a more minimalistic output */
+static enum print_line_t
+kmemtrace_print_alloc_compress(struct trace_iterator *iter,
+                                       struct kmemtrace_alloc_entry *entry)
+{
+       struct trace_seq *s = &iter->seq;
+       int ret;
+
+       /* Alloc entry */
+       ret = trace_seq_printf(s, "  +      ");
+       if (!ret)
+               return TRACE_TYPE_PARTIAL_LINE;
+
+       /* Type */
+       switch (entry->type_id) {
+       case KMEMTRACE_TYPE_KMALLOC:
+               ret = trace_seq_printf(s, "K   ");
+               break;
+       case KMEMTRACE_TYPE_CACHE:
+               ret = trace_seq_printf(s, "C   ");
+               break;
+       case KMEMTRACE_TYPE_PAGES:
+               ret = trace_seq_printf(s, "P   ");
+               break;
+       default:
+               ret = trace_seq_printf(s, "?   ");
+       }
+
+       if (!ret)
+               return TRACE_TYPE_PARTIAL_LINE;
+
+       /* Requested */
+       ret = trace_seq_printf(s, "%4ld   ", entry->bytes_req);
+       if (!ret)
+               return TRACE_TYPE_PARTIAL_LINE;
+
+       /* Allocated */
+       ret = trace_seq_printf(s, "%4ld   ", entry->bytes_alloc);
+       if (!ret)
+               return TRACE_TYPE_PARTIAL_LINE;
+
+       /* Flags
+        * TODO: would be better to see the name of the GFP flag names
+        */
+       ret = trace_seq_printf(s, "%08x   ", entry->gfp_flags);
+       if (!ret)
+               return TRACE_TYPE_PARTIAL_LINE;
+
+       /* Pointer to allocated */
+       ret = trace_seq_printf(s, "0x%tx   ", (ptrdiff_t)entry->ptr);
+       if (!ret)
+               return TRACE_TYPE_PARTIAL_LINE;
+
+       /* Node */
+       ret = trace_seq_printf(s, "%4d   ", entry->node);
+       if (!ret)
+               return TRACE_TYPE_PARTIAL_LINE;
+
+       /* Call site */
+       ret = seq_print_ip_sym(s, entry->call_site, 0);
+       if (!ret)
+               return TRACE_TYPE_PARTIAL_LINE;
+
+       if (!trace_seq_printf(s, "\n"))
+               return TRACE_TYPE_PARTIAL_LINE;
+
+       return TRACE_TYPE_HANDLED;
+}
+
+static enum print_line_t
+kmemtrace_print_free_compress(struct trace_iterator *iter,
+                               struct kmemtrace_free_entry *entry)
+{
+       struct trace_seq *s = &iter->seq;
+       int ret;
+
+       /* Free entry */
+       ret = trace_seq_printf(s, "  -      ");
+       if (!ret)
+               return TRACE_TYPE_PARTIAL_LINE;
+
+       /* Type */
+       switch (entry->type_id) {
+       case KMEMTRACE_TYPE_KMALLOC:
+               ret = trace_seq_printf(s, "K     ");
+               break;
+       case KMEMTRACE_TYPE_CACHE:
+               ret = trace_seq_printf(s, "C     ");
+               break;
+       case KMEMTRACE_TYPE_PAGES:
+               ret = trace_seq_printf(s, "P     ");
+               break;
+       default:
+               ret = trace_seq_printf(s, "?     ");
+       }
+
+       if (!ret)
+               return TRACE_TYPE_PARTIAL_LINE;
+
+       /* Skip requested/allocated/flags */
+       ret = trace_seq_printf(s, "                       ");
+       if (!ret)
+               return TRACE_TYPE_PARTIAL_LINE;
+
+       /* Pointer to allocated */
+       ret = trace_seq_printf(s, "0x%tx   ", (ptrdiff_t)entry->ptr);
+       if (!ret)
+               return TRACE_TYPE_PARTIAL_LINE;
+
+       /* Skip node */
+       ret = trace_seq_printf(s, "       ");
+       if (!ret)
+               return TRACE_TYPE_PARTIAL_LINE;
+
+       /* Call site */
+       ret = seq_print_ip_sym(s, entry->call_site, 0);
+       if (!ret)
+               return TRACE_TYPE_PARTIAL_LINE;
+
+       if (!trace_seq_printf(s, "\n"))
+               return TRACE_TYPE_PARTIAL_LINE;
+
+       return TRACE_TYPE_HANDLED;
+}
+
+static enum print_line_t kmemtrace_print_line(struct trace_iterator *iter)
+{
+       struct trace_entry *entry = iter->ent;
+
+       switch (entry->type) {
+       case TRACE_KMEM_ALLOC: {
+               struct kmemtrace_alloc_entry *field;
+               trace_assign_type(field, entry);
+               if (kmem_tracer_flags.val & TRACE_KMEM_OPT_MINIMAL)
+                       return kmemtrace_print_alloc_compress(iter, field);
+               else
+                       return kmemtrace_print_alloc_original(iter, field);
+       }
+
+       case TRACE_KMEM_FREE: {
+               struct kmemtrace_free_entry *field;
+               trace_assign_type(field, entry);
+               if (kmem_tracer_flags.val & TRACE_KMEM_OPT_MINIMAL)
+                       return kmemtrace_print_free_compress(iter, field);
+               else
+                       return kmemtrace_print_free_original(iter, field);
+       }
+
+       default:
+               return TRACE_TYPE_UNHANDLED;
+       }
+}
+
+/* Trace allocations */
+void kmemtrace_mark_alloc_node(enum kmemtrace_type_id type_id,
+                            unsigned long call_site,
+                            const void *ptr,
+                            size_t bytes_req,
+                            size_t bytes_alloc,
+                            gfp_t gfp_flags,
+                            int node)
+{
+       struct ring_buffer_event *event;
+       struct kmemtrace_alloc_entry *entry;
+       struct trace_array *tr = kmemtrace_array;
+       unsigned long irq_flags;
+
+       if (!kmem_tracing_enabled)
+               return;
+
+       event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
+                                        &irq_flags);
+       if (!event)
+               return;
+       entry   = ring_buffer_event_data(event);
+       tracing_generic_entry_update(&entry->ent, 0, 0);
+
+       entry->ent.type = TRACE_KMEM_ALLOC;
+       entry->call_site = call_site;
+       entry->ptr = ptr;
+       entry->bytes_req = bytes_req;
+       entry->bytes_alloc = bytes_alloc;
+       entry->gfp_flags = gfp_flags;
+       entry->node     =       node;
+
+       ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
+
+       trace_wake_up();
+}
+EXPORT_SYMBOL(kmemtrace_mark_alloc_node);
+
+void kmemtrace_mark_free(enum kmemtrace_type_id type_id,
+                      unsigned long call_site,
+                      const void *ptr)
+{
+       struct ring_buffer_event *event;
+       struct kmemtrace_free_entry *entry;
+       struct trace_array *tr = kmemtrace_array;
+       unsigned long irq_flags;
+
+       if (!kmem_tracing_enabled)
+               return;
+
+       event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
+                                        &irq_flags);
+       if (!event)
+               return;
+       entry   = ring_buffer_event_data(event);
+       tracing_generic_entry_update(&entry->ent, 0, 0);
+
+       entry->ent.type = TRACE_KMEM_FREE;
+       entry->type_id  = type_id;
+       entry->call_site = call_site;
+       entry->ptr = ptr;
+
+       ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
+
+       trace_wake_up();
+}
+EXPORT_SYMBOL(kmemtrace_mark_free);
+
+static struct tracer kmem_tracer __read_mostly = {
+       .name           = "kmemtrace",
+       .init           = kmem_trace_init,
+       .reset          = kmem_trace_reset,
+       .print_line     = kmemtrace_print_line,
+       .print_header = kmemtrace_headers,
+       .flags          = &kmem_tracer_flags
+};
+
+void kmemtrace_init(void)
+{
+       /* earliest opportunity to start kmem tracing */
+}
+
+static int __init init_kmem_tracer(void)
+{
+       return register_tracer(&kmem_tracer);
+}
+
+device_initcall(init_kmem_tracer);
index 8b0daf0..0b9de5a 100644 (file)
@@ -123,8 +123,7 @@ void ring_buffer_normalize_time_stamp(int cpu, u64 *ts)
 EXPORT_SYMBOL_GPL(ring_buffer_normalize_time_stamp);
 
 #define RB_EVNT_HDR_SIZE (sizeof(struct ring_buffer_event))
-#define RB_ALIGNMENT_SHIFT     2
-#define RB_ALIGNMENT           (1 << RB_ALIGNMENT_SHIFT)
+#define RB_ALIGNMENT           4U
 #define RB_MAX_SMALL_DATA      28
 
 enum {
@@ -133,7 +132,7 @@ enum {
 };
 
 /* inline for ring buffer fast paths */
-static inline unsigned
+static unsigned
 rb_event_length(struct ring_buffer_event *event)
 {
        unsigned length;
@@ -151,7 +150,7 @@ rb_event_length(struct ring_buffer_event *event)
 
        case RINGBUF_TYPE_DATA:
                if (event->len)
-                       length = event->len << RB_ALIGNMENT_SHIFT;
+                       length = event->len * RB_ALIGNMENT;
                else
                        length = event->array[0];
                return length + RB_EVNT_HDR_SIZE;
@@ -179,7 +178,7 @@ unsigned ring_buffer_event_length(struct ring_buffer_event *event)
 EXPORT_SYMBOL_GPL(ring_buffer_event_length);
 
 /* inline for ring buffer fast paths */
-static inline void *
+static void *
 rb_event_data(struct ring_buffer_event *event)
 {
        BUG_ON(event->type != RINGBUF_TYPE_DATA);
@@ -229,10 +228,9 @@ static void rb_init_page(struct buffer_data_page *bpage)
  * Also stolen from mm/slob.c. Thanks to Mathieu Desnoyers for pointing
  * this issue out.
  */
-static inline void free_buffer_page(struct buffer_page *bpage)
+static void free_buffer_page(struct buffer_page *bpage)
 {
-       if (bpage->page)
-               free_page((unsigned long)bpage->page);
+       free_page((unsigned long)bpage->page);
        kfree(bpage);
 }
 
@@ -811,7 +809,7 @@ rb_event_index(struct ring_buffer_event *event)
        return (addr & ~PAGE_MASK) - (PAGE_SIZE - BUF_PAGE_SIZE);
 }
 
-static inline int
+static int
 rb_is_commit(struct ring_buffer_per_cpu *cpu_buffer,
             struct ring_buffer_event *event)
 {
@@ -825,7 +823,7 @@ rb_is_commit(struct ring_buffer_per_cpu *cpu_buffer,
                rb_commit_index(cpu_buffer) == index;
 }
 
-static inline void
+static void
 rb_set_commit_event(struct ring_buffer_per_cpu *cpu_buffer,
                    struct ring_buffer_event *event)
 {
@@ -850,7 +848,7 @@ rb_set_commit_event(struct ring_buffer_per_cpu *cpu_buffer,
        local_set(&cpu_buffer->commit_page->page->commit, index);
 }
 
-static inline void
+static void
 rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer)
 {
        /*
@@ -896,7 +894,7 @@ static void rb_reset_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
        cpu_buffer->reader_page->read = 0;
 }
 
-static inline void rb_inc_iter(struct ring_buffer_iter *iter)
+static void rb_inc_iter(struct ring_buffer_iter *iter)
 {
        struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
 
@@ -926,7 +924,7 @@ static inline void rb_inc_iter(struct ring_buffer_iter *iter)
  * and with this, we can determine what to place into the
  * data field.
  */
-static inline void
+static void
 rb_update_event(struct ring_buffer_event *event,
                         unsigned type, unsigned length)
 {
@@ -938,15 +936,11 @@ rb_update_event(struct ring_buffer_event *event,
                break;
 
        case RINGBUF_TYPE_TIME_EXTEND:
-               event->len =
-                       (RB_LEN_TIME_EXTEND + (RB_ALIGNMENT-1))
-                       >> RB_ALIGNMENT_SHIFT;
+               event->len = DIV_ROUND_UP(RB_LEN_TIME_EXTEND, RB_ALIGNMENT);
                break;
 
        case RINGBUF_TYPE_TIME_STAMP:
-               event->len =
-                       (RB_LEN_TIME_STAMP + (RB_ALIGNMENT-1))
-                       >> RB_ALIGNMENT_SHIFT;
+               event->len = DIV_ROUND_UP(RB_LEN_TIME_STAMP, RB_ALIGNMENT);
                break;
 
        case RINGBUF_TYPE_DATA:
@@ -955,16 +949,14 @@ rb_update_event(struct ring_buffer_event *event,
                        event->len = 0;
                        event->array[0] = length;
                } else
-                       event->len =
-                               (length + (RB_ALIGNMENT-1))
-                               >> RB_ALIGNMENT_SHIFT;
+                       event->len = DIV_ROUND_UP(length, RB_ALIGNMENT);
                break;
        default:
                BUG();
        }
 }
 
-static inline unsigned rb_calculate_event_length(unsigned length)
+static unsigned rb_calculate_event_length(unsigned length)
 {
        struct ring_buffer_event event; /* Used only for sizeof array */
 
@@ -1025,12 +1017,8 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
                }
 
                if (next_page == head_page) {
-                       if (!(buffer->flags & RB_FL_OVERWRITE)) {
-                               /* reset write */
-                               if (tail <= BUF_PAGE_SIZE)
-                                       local_set(&tail_page->write, tail);
+                       if (!(buffer->flags & RB_FL_OVERWRITE))
                                goto out_unlock;
-                       }
 
                        /* tail_page has not moved yet? */
                        if (tail_page == cpu_buffer->tail_page) {
@@ -1105,6 +1093,10 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
        return event;
 
  out_unlock:
+       /* reset write */
+       if (tail <= BUF_PAGE_SIZE)
+               local_set(&tail_page->write, tail);
+
        __raw_spin_unlock(&cpu_buffer->lock);
        local_irq_restore(flags);
        return NULL;
@@ -1438,7 +1430,7 @@ int ring_buffer_write(struct ring_buffer *buffer,
 }
 EXPORT_SYMBOL_GPL(ring_buffer_write);
 
-static inline int rb_per_cpu_empty(struct ring_buffer_per_cpu *cpu_buffer)
+static int rb_per_cpu_empty(struct ring_buffer_per_cpu *cpu_buffer)
 {
        struct buffer_page *reader = cpu_buffer->reader_page;
        struct buffer_page *head = cpu_buffer->head_page;
index c580233..220c264 100644 (file)
 #include <linux/irqflags.h>
 
 #include "trace.h"
+#include "trace_output.h"
 
 #define TRACE_BUFFER_FLAGS     (RB_FL_OVERWRITE)
 
-unsigned long __read_mostly    tracing_max_latency = (cycle_t)ULONG_MAX;
+unsigned long __read_mostly    tracing_max_latency;
 unsigned long __read_mostly    tracing_thresh;
 
 /*
@@ -186,9 +187,6 @@ int tracing_is_enabled(void)
        return tracer_enabled;
 }
 
-/* function tracing enabled */
-int                            ftrace_function_enabled;
-
 /*
  * trace_buf_size is the size in bytes that is allocated
  * for a buffer. Note, the number of bytes is always rounded
@@ -329,132 +327,6 @@ __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
        tracing_record_cmdline(current);
 }
 
-/**
- * trace_seq_printf - sequence printing of trace information
- * @s: trace sequence descriptor
- * @fmt: printf format string
- *
- * The tracer may use either sequence operations or its own
- * copy to user routines. To simplify formating of a trace
- * trace_seq_printf is used to store strings into a special
- * buffer (@s). Then the output may be either used by
- * the sequencer or pulled into another buffer.
- */
-int
-trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
-{
-       int len = (PAGE_SIZE - 1) - s->len;
-       va_list ap;
-       int ret;
-
-       if (!len)
-               return 0;
-
-       va_start(ap, fmt);
-       ret = vsnprintf(s->buffer + s->len, len, fmt, ap);
-       va_end(ap);
-
-       /* If we can't write it all, don't bother writing anything */
-       if (ret >= len)
-               return 0;
-
-       s->len += ret;
-
-       return len;
-}
-
-/**
- * trace_seq_puts - trace sequence printing of simple string
- * @s: trace sequence descriptor
- * @str: simple string to record
- *
- * The tracer may use either the sequence operations or its own
- * copy to user routines. This function records a simple string
- * into a special buffer (@s) for later retrieval by a sequencer
- * or other mechanism.
- */
-static int
-trace_seq_puts(struct trace_seq *s, const char *str)
-{
-       int len = strlen(str);
-
-       if (len > ((PAGE_SIZE - 1) - s->len))
-               return 0;
-
-       memcpy(s->buffer + s->len, str, len);
-       s->len += len;
-
-       return len;
-}
-
-static int
-trace_seq_putc(struct trace_seq *s, unsigned char c)
-{
-       if (s->len >= (PAGE_SIZE - 1))
-               return 0;
-
-       s->buffer[s->len++] = c;
-
-       return 1;
-}
-
-static int
-trace_seq_putmem(struct trace_seq *s, void *mem, size_t len)
-{
-       if (len > ((PAGE_SIZE - 1) - s->len))
-               return 0;
-
-       memcpy(s->buffer + s->len, mem, len);
-       s->len += len;
-
-       return len;
-}
-
-#define MAX_MEMHEX_BYTES       8
-#define HEX_CHARS              (MAX_MEMHEX_BYTES*2 + 1)
-
-static int
-trace_seq_putmem_hex(struct trace_seq *s, void *mem, size_t len)
-{
-       unsigned char hex[HEX_CHARS];
-       unsigned char *data = mem;
-       int i, j;
-
-#ifdef __BIG_ENDIAN
-       for (i = 0, j = 0; i < len; i++) {
-#else
-       for (i = len-1, j = 0; i >= 0; i--) {
-#endif
-               hex[j++] = hex_asc_hi(data[i]);
-               hex[j++] = hex_asc_lo(data[i]);
-       }
-       hex[j++] = ' ';
-
-       return trace_seq_putmem(s, hex, j);
-}
-
-static int
-trace_seq_path(struct trace_seq *s, struct path *path)
-{
-       unsigned char *p;
-
-       if (s->len >= (PAGE_SIZE - 1))
-               return 0;
-       p = d_path(path, s->buffer + s->len, PAGE_SIZE - s->len);
-       if (!IS_ERR(p)) {
-               p = mangle_path(s->buffer + s->len, p, "\n");
-               if (p) {
-                       s->len = p - s->buffer;
-                       return 1;
-               }
-       } else {
-               s->buffer[s->len++] = '?';
-               return 1;
-       }
-
-       return 0;
-}
-
 static void
 trace_seq_reset(struct trace_seq *s)
 {
@@ -960,10 +832,10 @@ ftrace(struct trace_array *tr, struct trace_array_cpu *data,
                trace_function(tr, data, ip, parent_ip, flags, pc);
 }
 
-static void ftrace_trace_stack(struct trace_array *tr,
-                              struct trace_array_cpu *data,
-                              unsigned long flags,
-                              int skip, int pc)
+static void __ftrace_trace_stack(struct trace_array *tr,
+                                struct trace_array_cpu *data,
+                                unsigned long flags,
+                                int skip, int pc)
 {
 #ifdef CONFIG_STACKTRACE
        struct ring_buffer_event *event;
@@ -971,9 +843,6 @@ static void ftrace_trace_stack(struct trace_array *tr,
        struct stack_trace trace;
        unsigned long irq_flags;
 
-       if (!(trace_flags & TRACE_ITER_STACKTRACE))
-               return;
-
        event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
                                         &irq_flags);
        if (!event)
@@ -994,12 +863,23 @@ static void ftrace_trace_stack(struct trace_array *tr,
 #endif
 }
 
+static void ftrace_trace_stack(struct trace_array *tr,
+                              struct trace_array_cpu *data,
+                              unsigned long flags,
+                              int skip, int pc)
+{
+       if (!(trace_flags & TRACE_ITER_STACKTRACE))
+               return;
+
+       __ftrace_trace_stack(tr, data, flags, skip, pc);
+}
+
 void __trace_stack(struct trace_array *tr,
                   struct trace_array_cpu *data,
                   unsigned long flags,
-                  int skip)
+                  int skip, int pc)
 {
-       ftrace_trace_stack(tr, data, flags, skip, preempt_count());
+       __ftrace_trace_stack(tr, data, flags, skip, pc);
 }
 
 static void ftrace_trace_userstack(struct trace_array *tr,
@@ -1163,65 +1043,6 @@ ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3)
        local_irq_restore(flags);
 }
 
-#ifdef CONFIG_FUNCTION_TRACER
-static void
-function_trace_call_preempt_only(unsigned long ip, unsigned long parent_ip)
-{
-       struct trace_array *tr = &global_trace;
-       struct trace_array_cpu *data;
-       unsigned long flags;
-       long disabled;
-       int cpu, resched;
-       int pc;
-
-       if (unlikely(!ftrace_function_enabled))
-               return;
-
-       pc = preempt_count();
-       resched = ftrace_preempt_disable();
-       local_save_flags(flags);
-       cpu = raw_smp_processor_id();
-       data = tr->data[cpu];
-       disabled = atomic_inc_return(&data->disabled);
-
-       if (likely(disabled == 1))
-               trace_function(tr, data, ip, parent_ip, flags, pc);
-
-       atomic_dec(&data->disabled);
-       ftrace_preempt_enable(resched);
-}
-
-static void
-function_trace_call(unsigned long ip, unsigned long parent_ip)
-{
-       struct trace_array *tr = &global_trace;
-       struct trace_array_cpu *data;
-       unsigned long flags;
-       long disabled;
-       int cpu;
-       int pc;
-
-       if (unlikely(!ftrace_function_enabled))
-               return;
-
-       /*
-        * Need to use raw, since this must be called before the
-        * recursive protection is performed.
-        */
-       local_irq_save(flags);
-       cpu = raw_smp_processor_id();
-       data = tr->data[cpu];
-       disabled = atomic_inc_return(&data->disabled);
-
-       if (likely(disabled == 1)) {
-               pc = preempt_count();
-               trace_function(tr, data, ip, parent_ip, flags, pc);
-       }
-
-       atomic_dec(&data->disabled);
-       local_irq_restore(flags);
-}
-
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 int trace_graph_entry(struct ftrace_graph_ent *trace)
 {
@@ -1279,31 +1100,6 @@ void trace_graph_return(struct ftrace_graph_ret *trace)
 }
 #endif /* CONFIG_FUNCTION_GRAPH_TRACER */
 
-static struct ftrace_ops trace_ops __read_mostly =
-{
-       .func = function_trace_call,
-};
-
-void tracing_start_function_trace(void)
-{
-       ftrace_function_enabled = 0;
-
-       if (trace_flags & TRACE_ITER_PREEMPTONLY)
-               trace_ops.func = function_trace_call_preempt_only;
-       else
-               trace_ops.func = function_trace_call;
-
-       register_ftrace_function(&trace_ops);
-       ftrace_function_enabled = 1;
-}
-
-void tracing_stop_function_trace(void)
-{
-       ftrace_function_enabled = 0;
-       unregister_ftrace_function(&trace_ops);
-}
-#endif
-
 enum trace_file_type {
        TRACE_FILE_LAT_FMT      = 1,
        TRACE_FILE_ANNOTATE     = 2,
@@ -1472,154 +1268,6 @@ static void s_stop(struct seq_file *m, void *p)
        mutex_unlock(&trace_types_lock);
 }
 
-#ifdef CONFIG_KRETPROBES
-static inline const char *kretprobed(const char *name)
-{
-       static const char tramp_name[] = "kretprobe_trampoline";
-       int size = sizeof(tramp_name);
-
-       if (strncmp(tramp_name, name, size) == 0)
-               return "[unknown/kretprobe'd]";
-       return name;
-}
-#else
-static inline const char *kretprobed(const char *name)
-{
-       return name;
-}
-#endif /* CONFIG_KRETPROBES */
-
-static int
-seq_print_sym_short(struct trace_seq *s, const char *fmt, unsigned long address)
-{
-#ifdef CONFIG_KALLSYMS
-       char str[KSYM_SYMBOL_LEN];
-       const char *name;
-
-       kallsyms_lookup(address, NULL, NULL, NULL, str);
-
-       name = kretprobed(str);
-
-       return trace_seq_printf(s, fmt, name);
-#endif
-       return 1;
-}
-
-static int
-seq_print_sym_offset(struct trace_seq *s, const char *fmt,
-                    unsigned long address)
-{
-#ifdef CONFIG_KALLSYMS
-       char str[KSYM_SYMBOL_LEN];
-       const char *name;
-
-       sprint_symbol(str, address);
-       name = kretprobed(str);
-
-       return trace_seq_printf(s, fmt, name);
-#endif
-       return 1;
-}
-
-#ifndef CONFIG_64BIT
-# define IP_FMT "%08lx"
-#else
-# define IP_FMT "%016lx"
-#endif
-
-int
-seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags)
-{
-       int ret;
-
-       if (!ip)
-               return trace_seq_printf(s, "0");
-
-       if (sym_flags & TRACE_ITER_SYM_OFFSET)
-               ret = seq_print_sym_offset(s, "%s", ip);
-       else
-               ret = seq_print_sym_short(s, "%s", ip);
-
-       if (!ret)
-               return 0;
-
-       if (sym_flags & TRACE_ITER_SYM_ADDR)
-               ret = trace_seq_printf(s, " <" IP_FMT ">", ip);
-       return ret;
-}
-
-static inline int seq_print_user_ip(struct trace_seq *s, struct mm_struct *mm,
-                                   unsigned long ip, unsigned long sym_flags)
-{
-       struct file *file = NULL;
-       unsigned long vmstart = 0;
-       int ret = 1;
-
-       if (mm) {
-               const struct vm_area_struct *vma;
-
-               down_read(&mm->mmap_sem);
-               vma = find_vma(mm, ip);
-               if (vma) {
-                       file = vma->vm_file;
-                       vmstart = vma->vm_start;
-               }
-               if (file) {
-                       ret = trace_seq_path(s, &file->f_path);
-                       if (ret)
-                               ret = trace_seq_printf(s, "[+0x%lx]", ip - vmstart);
-               }
-               up_read(&mm->mmap_sem);
-       }
-       if (ret && ((sym_flags & TRACE_ITER_SYM_ADDR) || !file))
-               ret = trace_seq_printf(s, " <" IP_FMT ">", ip);
-       return ret;
-}
-
-static int
-seq_print_userip_objs(const struct userstack_entry *entry, struct trace_seq *s,
-                     unsigned long sym_flags)
-{
-       struct mm_struct *mm = NULL;
-       int ret = 1;
-       unsigned int i;
-
-       if (trace_flags & TRACE_ITER_SYM_USEROBJ) {
-               struct task_struct *task;
-               /*
-                * we do the lookup on the thread group leader,
-                * since individual threads might have already quit!
-                */
-               rcu_read_lock();
-               task = find_task_by_vpid(entry->ent.tgid);
-               if (task)
-                       mm = get_task_mm(task);
-               rcu_read_unlock();
-       }
-
-       for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
-               unsigned long ip = entry->caller[i];
-
-               if (ip == ULONG_MAX || !ret)
-                       break;
-               if (i && ret)
-                       ret = trace_seq_puts(s, " <- ");
-               if (!ip) {
-                       if (ret)
-                               ret = trace_seq_puts(s, "??");
-                       continue;
-               }
-               if (!ret)
-                       break;
-               if (ret)
-                       ret = seq_print_user_ip(s, mm, ip, sym_flags);
-       }
-
-       if (mm)
-               mmput(mm);
-       return ret;
-}
-
 static void print_lat_help_header(struct seq_file *m)
 {
        seq_puts(m, "#                  _------=> CPU#            \n");
@@ -1755,52 +1403,6 @@ lat_print_timestamp(struct trace_seq *s, u64 abs_usecs,
                trace_seq_puts(s, " : ");
 }
 
-static const char state_to_char[] = TASK_STATE_TO_CHAR_STR;
-
-static int task_state_char(unsigned long state)
-{
-       int bit = state ? __ffs(state) + 1 : 0;
-
-       return bit < sizeof(state_to_char) - 1 ? state_to_char[bit] : '?';
-}
-
-/*
- * The message is supposed to contain an ending newline.
- * If the printing stops prematurely, try to add a newline of our own.
- */
-void trace_seq_print_cont(struct trace_seq *s, struct trace_iterator *iter)
-{
-       struct trace_entry *ent;
-       struct trace_field_cont *cont;
-       bool ok = true;
-
-       ent = peek_next_entry(iter, iter->cpu, NULL);
-       if (!ent || ent->type != TRACE_CONT) {
-               trace_seq_putc(s, '\n');
-               return;
-       }
-
-       do {
-               cont = (struct trace_field_cont *)ent;
-               if (ok)
-                       ok = (trace_seq_printf(s, "%s", cont->buf) > 0);
-
-               ftrace_disable_cpu();
-
-               if (iter->buffer_iter[iter->cpu])
-                       ring_buffer_read(iter->buffer_iter[iter->cpu], NULL);
-               else
-                       ring_buffer_consume(iter->tr->buffer, iter->cpu, NULL);
-
-               ftrace_enable_cpu();
-
-               ent = peek_next_entry(iter, iter->cpu, NULL);
-       } while (ent && ent->type == TRACE_CONT);
-
-       if (!ok)
-               trace_seq_putc(s, '\n');
-}
-
 static void test_cpu_buff_start(struct trace_iterator *iter)
 {
        struct trace_seq *s = &iter->seq;
@@ -1824,17 +1426,14 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu)
        struct trace_seq *s = &iter->seq;
        unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
        struct trace_entry *next_entry;
+       struct trace_event *event;
        unsigned long verbose = (trace_flags & TRACE_ITER_VERBOSE);
        struct trace_entry *entry = iter->ent;
        unsigned long abs_usecs;
        unsigned long rel_usecs;
        u64 next_ts;
        char *comm;
-       int S, T;
-       int i;
-
-       if (entry->type == TRACE_CONT)
-               return TRACE_TYPE_HANDLED;
+       int ret;
 
        test_cpu_buff_start(iter);
 
@@ -1859,96 +1458,16 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu)
                lat_print_generic(s, entry, cpu);
                lat_print_timestamp(s, abs_usecs, rel_usecs);
        }
-       switch (entry->type) {
-       case TRACE_FN: {
-               struct ftrace_entry *field;
 
-               trace_assign_type(field, entry);
-
-               seq_print_ip_sym(s, field->ip, sym_flags);
-               trace_seq_puts(s, " (");
-               seq_print_ip_sym(s, field->parent_ip, sym_flags);
-               trace_seq_puts(s, ")\n");
-               break;
-       }
-       case TRACE_CTX:
-       case TRACE_WAKE: {
-               struct ctx_switch_entry *field;
-
-               trace_assign_type(field, entry);
-
-               T = task_state_char(field->next_state);
-               S = task_state_char(field->prev_state);
-               comm = trace_find_cmdline(field->next_pid);
-               trace_seq_printf(s, " %5d:%3d:%c %s [%03d] %5d:%3d:%c %s\n",
-                                field->prev_pid,
-                                field->prev_prio,
-                                S, entry->type == TRACE_CTX ? "==>" : "  +",
-                                field->next_cpu,
-                                field->next_pid,
-                                field->next_prio,
-                                T, comm);
-               break;
-       }
-       case TRACE_SPECIAL: {
-               struct special_entry *field;
-
-               trace_assign_type(field, entry);
-
-               trace_seq_printf(s, "# %ld %ld %ld\n",
-                                field->arg1,
-                                field->arg2,
-                                field->arg3);
-               break;
-       }
-       case TRACE_STACK: {
-               struct stack_entry *field;
-
-               trace_assign_type(field, entry);
-
-               for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
-                       if (i)
-                               trace_seq_puts(s, " <= ");
-                       seq_print_ip_sym(s, field->caller[i], sym_flags);
-               }
-               trace_seq_puts(s, "\n");
-               break;
-       }
-       case TRACE_PRINT: {
-               struct print_entry *field;
-
-               trace_assign_type(field, entry);
-
-               seq_print_ip_sym(s, field->ip, sym_flags);
-               trace_seq_printf(s, ": %s", field->buf);
-               if (entry->flags & TRACE_FLAG_CONT)
-                       trace_seq_print_cont(s, iter);
-               break;
-       }
-       case TRACE_BRANCH: {
-               struct trace_branch *field;
-
-               trace_assign_type(field, entry);
-
-               trace_seq_printf(s, "[%s] %s:%s:%d\n",
-                                field->correct ? "  ok  " : " MISS ",
-                                field->func,
-                                field->file,
-                                field->line);
-               break;
+       event = ftrace_find_event(entry->type);
+       if (event && event->latency_trace) {
+               ret = event->latency_trace(s, entry, sym_flags);
+               if (ret)
+                       return ret;
+               return TRACE_TYPE_HANDLED;
        }
-       case TRACE_USER_STACK: {
-               struct userstack_entry *field;
 
-               trace_assign_type(field, entry);
-
-               seq_print_userip_objs(field, s, sym_flags);
-               trace_seq_putc(s, '\n');
-               break;
-       }
-       default:
-               trace_seq_printf(s, "Unknown type %d\n", entry->type);
-       }
+       trace_seq_printf(s, "Unknown type %d\n", entry->type);
        return TRACE_TYPE_HANDLED;
 }
 
@@ -1957,19 +1476,15 @@ static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
        struct trace_seq *s = &iter->seq;
        unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
        struct trace_entry *entry;
+       struct trace_event *event;
        unsigned long usec_rem;
        unsigned long long t;
        unsigned long secs;
        char *comm;
        int ret;
-       int S, T;
-       int i;
 
        entry = iter->ent;
 
-       if (entry->type == TRACE_CONT)
-               return TRACE_TYPE_HANDLED;
-
        test_cpu_buff_start(iter);
 
        comm = trace_find_cmdline(iter->ent->pid);
@@ -1988,129 +1503,17 @@ static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
        if (!ret)
                return TRACE_TYPE_PARTIAL_LINE;
 
-       switch (entry->type) {
-       case TRACE_FN: {
-               struct ftrace_entry *field;
-
-               trace_assign_type(field, entry);
-
-               ret = seq_print_ip_sym(s, field->ip, sym_flags);
-               if (!ret)
-                       return TRACE_TYPE_PARTIAL_LINE;
-               if ((sym_flags & TRACE_ITER_PRINT_PARENT) &&
-                                               field->parent_ip) {
-                       ret = trace_seq_printf(s, " <-");
-                       if (!ret)
-                               return TRACE_TYPE_PARTIAL_LINE;
-                       ret = seq_print_ip_sym(s,
-                                              field->parent_ip,
-                                              sym_flags);
-                       if (!ret)
-                               return TRACE_TYPE_PARTIAL_LINE;
-               }
-               ret = trace_seq_printf(s, "\n");
-               if (!ret)
-                       return TRACE_TYPE_PARTIAL_LINE;
-               break;
-       }
-       case TRACE_CTX:
-       case TRACE_WAKE: {
-               struct ctx_switch_entry *field;
-
-               trace_assign_type(field, entry);
-
-               T = task_state_char(field->next_state);
-               S = task_state_char(field->prev_state);
-               ret = trace_seq_printf(s, " %5d:%3d:%c %s [%03d] %5d:%3d:%c\n",
-                                      field->prev_pid,
-                                      field->prev_prio,
-                                      S,
-                                      entry->type == TRACE_CTX ? "==>" : "  +",
-                                      field->next_cpu,
-                                      field->next_pid,
-                                      field->next_prio,
-                                      T);
-               if (!ret)
-                       return TRACE_TYPE_PARTIAL_LINE;
-               break;
-       }
-       case TRACE_SPECIAL: {
-               struct special_entry *field;
-
-               trace_assign_type(field, entry);
-
-               ret = trace_seq_printf(s, "# %ld %ld %ld\n",
-                                field->arg1,
-                                field->arg2,
-                                field->arg3);
-               if (!ret)
-                       return TRACE_TYPE_PARTIAL_LINE;
-               break;
-       }
-       case TRACE_STACK: {
-               struct stack_entry *field;
-
-               trace_assign_type(field, entry);
-
-               for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
-                       if (i) {
-                               ret = trace_seq_puts(s, " <= ");
-                               if (!ret)
-                                       return TRACE_TYPE_PARTIAL_LINE;
-                       }
-                       ret = seq_print_ip_sym(s, field->caller[i],
-                                              sym_flags);
-                       if (!ret)
-                               return TRACE_TYPE_PARTIAL_LINE;
-               }
-               ret = trace_seq_puts(s, "\n");
-               if (!ret)
-                       return TRACE_TYPE_PARTIAL_LINE;
-               break;
-       }
-       case TRACE_PRINT: {
-               struct print_entry *field;
-
-               trace_assign_type(field, entry);
-
-               seq_print_ip_sym(s, field->ip, sym_flags);
-               trace_seq_printf(s, ": %s", field->buf);
-               if (entry->flags & TRACE_FLAG_CONT)
-                       trace_seq_print_cont(s, iter);
-               break;
-       }
-       case TRACE_GRAPH_RET: {
-               return print_graph_function(iter);
-       }
-       case TRACE_GRAPH_ENT: {
-               return print_graph_function(iter);
-       }
-       case TRACE_BRANCH: {
-               struct trace_branch *field;
-
-               trace_assign_type(field, entry);
-
-               trace_seq_printf(s, "[%s] %s:%s:%d\n",
-                                field->correct ? "  ok  " : " MISS ",
-                                field->func,
-                                field->file,
-                                field->line);
-               break;
+       event = ftrace_find_event(entry->type);
+       if (event && event->trace) {
+               ret = event->trace(s, entry, sym_flags);
+               if (ret)
+                       return ret;
+               return TRACE_TYPE_HANDLED;
        }
-       case TRACE_USER_STACK: {
-               struct userstack_entry *field;
-
-               trace_assign_type(field, entry);
+       ret = trace_seq_printf(s, "Unknown type %d\n", entry->type);
+       if (!ret)
+               return TRACE_TYPE_PARTIAL_LINE;
 
-               ret = seq_print_userip_objs(field, s, sym_flags);
-               if (!ret)
-                       return TRACE_TYPE_PARTIAL_LINE;
-               ret = trace_seq_putc(s, '\n');
-               if (!ret)
-                       return TRACE_TYPE_PARTIAL_LINE;
-               break;
-       }
-       }
        return TRACE_TYPE_HANDLED;
 }
 
@@ -2118,152 +1521,47 @@ static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
 {
        struct trace_seq *s = &iter->seq;
        struct trace_entry *entry;
+       struct trace_event *event;
        int ret;
-       int S, T;
 
        entry = iter->ent;
 
-       if (entry->type == TRACE_CONT)
-               return TRACE_TYPE_HANDLED;
-
        ret = trace_seq_printf(s, "%d %d %llu ",
                entry->pid, iter->cpu, iter->ts);
        if (!ret)
                return TRACE_TYPE_PARTIAL_LINE;
 
-       switch (entry->type) {
-       case TRACE_FN: {
-               struct ftrace_entry *field;
-
-               trace_assign_type(field, entry);
-
-               ret = trace_seq_printf(s, "%x %x\n",
-                                       field->ip,
-                                       field->parent_ip);
-               if (!ret)
-                       return TRACE_TYPE_PARTIAL_LINE;
-               break;
-       }
-       case TRACE_CTX:
-       case TRACE_WAKE: {
-               struct ctx_switch_entry *field;
-
-               trace_assign_type(field, entry);
-
-               T = task_state_char(field->next_state);
-               S = entry->type == TRACE_WAKE ? '+' :
-                       task_state_char(field->prev_state);
-               ret = trace_seq_printf(s, "%d %d %c %d %d %d %c\n",
-                                      field->prev_pid,
-                                      field->prev_prio,
-                                      S,
-                                      field->next_cpu,
-                                      field->next_pid,
-                                      field->next_prio,
-                                      T);
-               if (!ret)
-                       return TRACE_TYPE_PARTIAL_LINE;
-               break;
-       }
-       case TRACE_SPECIAL:
-       case TRACE_USER_STACK:
-       case TRACE_STACK: {
-               struct special_entry *field;
-
-               trace_assign_type(field, entry);
-
-               ret = trace_seq_printf(s, "# %ld %ld %ld\n",
-                                field->arg1,
-                                field->arg2,
-                                field->arg3);
-               if (!ret)
-                       return TRACE_TYPE_PARTIAL_LINE;
-               break;
+       event = ftrace_find_event(entry->type);
+       if (event && event->raw) {
+               ret = event->raw(s, entry, 0);
+               if (ret)
+                       return ret;
+               return TRACE_TYPE_HANDLED;
        }
-       case TRACE_PRINT: {
-               struct print_entry *field;
-
-               trace_assign_type(field, entry);
+       ret = trace_seq_printf(s, "%d ?\n", entry->type);
+       if (!ret)
+               return TRACE_TYPE_PARTIAL_LINE;
 
-               trace_seq_printf(s, "# %lx %s", field->ip, field->buf);
-               if (entry->flags & TRACE_FLAG_CONT)
-                       trace_seq_print_cont(s, iter);
-               break;
-       }
-       }
        return TRACE_TYPE_HANDLED;
 }
 
-#define SEQ_PUT_FIELD_RET(s, x)                                \
-do {                                                   \
-       if (!trace_seq_putmem(s, &(x), sizeof(x)))      \
-               return 0;                               \
-} while (0)
-
-#define SEQ_PUT_HEX_FIELD_RET(s, x)                    \
-do {                                                   \
-       BUILD_BUG_ON(sizeof(x) > MAX_MEMHEX_BYTES);     \
-       if (!trace_seq_putmem_hex(s, &(x), sizeof(x)))  \
-               return 0;                               \
-} while (0)
-
 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
 {
        struct trace_seq *s = &iter->seq;
        unsigned char newline = '\n';
        struct trace_entry *entry;
-       int S, T;
+       struct trace_event *event;
 
        entry = iter->ent;
 
-       if (entry->type == TRACE_CONT)
-               return TRACE_TYPE_HANDLED;
-
        SEQ_PUT_HEX_FIELD_RET(s, entry->pid);
        SEQ_PUT_HEX_FIELD_RET(s, iter->cpu);
        SEQ_PUT_HEX_FIELD_RET(s, iter->ts);
 
-       switch (entry->type) {
-       case TRACE_FN: {
-               struct ftrace_entry *field;
-
-               trace_assign_type(field, entry);
-
-               SEQ_PUT_HEX_FIELD_RET(s, field->ip);
-               SEQ_PUT_HEX_FIELD_RET(s, field->parent_ip);
-               break;
-       }
-       case TRACE_CTX:
-       case TRACE_WAKE: {
-               struct ctx_switch_entry *field;
-
-               trace_assign_type(field, entry);
-
-               T = task_state_char(field->next_state);
-               S = entry->type == TRACE_WAKE ? '+' :
-                       task_state_char(field->prev_state);
-               SEQ_PUT_HEX_FIELD_RET(s, field->prev_pid);
-               SEQ_PUT_HEX_FIELD_RET(s, field->prev_prio);
-               SEQ_PUT_HEX_FIELD_RET(s, S);
-               SEQ_PUT_HEX_FIELD_RET(s, field->next_cpu);
-               SEQ_PUT_HEX_FIELD_RET(s, field->next_pid);
-               SEQ_PUT_HEX_FIELD_RET(s, field->next_prio);
-               SEQ_PUT_HEX_FIELD_RET(s, T);
-               break;
-       }
-       case TRACE_SPECIAL:
-       case TRACE_USER_STACK:
-       case TRACE_STACK: {
-               struct special_entry *field;
-
-               trace_assign_type(field, entry);
+       event = ftrace_find_event(entry->type);
+       if (event && event->hex)
+               event->hex(s, entry, 0);
 
-               SEQ_PUT_HEX_FIELD_RET(s, field->arg1);
-               SEQ_PUT_HEX_FIELD_RET(s, field->arg2);
-               SEQ_PUT_HEX_FIELD_RET(s, field->arg3);
-               break;
-       }
-       }
        SEQ_PUT_FIELD_RET(s, newline);
 
        return TRACE_TYPE_HANDLED;
@@ -2282,9 +1580,6 @@ static enum print_line_t print_printk_msg_only(struct trace_iterator *iter)
        if (!ret)
                return TRACE_TYPE_PARTIAL_LINE;
 
-       if (entry->flags & TRACE_FLAG_CONT)
-               trace_seq_print_cont(s, iter);
-
        return TRACE_TYPE_HANDLED;
 }
 
@@ -2292,53 +1587,19 @@ static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
 {
        struct trace_seq *s = &iter->seq;
        struct trace_entry *entry;
+       struct trace_event *event;
 
        entry = iter->ent;
 
-       if (entry->type == TRACE_CONT)
-               return TRACE_TYPE_HANDLED;
-
        SEQ_PUT_FIELD_RET(s, entry->pid);
        SEQ_PUT_FIELD_RET(s, entry->cpu);
        SEQ_PUT_FIELD_RET(s, iter->ts);
 
-       switch (entry->type) {
-       case TRACE_FN: {
-               struct ftrace_entry *field;
-
-               trace_assign_type(field, entry);
-
-               SEQ_PUT_FIELD_RET(s, field->ip);
-               SEQ_PUT_FIELD_RET(s, field->parent_ip);
-               break;
-       }
-       case TRACE_CTX: {
-               struct ctx_switch_entry *field;
-
-               trace_assign_type(field, entry);
+       event = ftrace_find_event(entry->type);
+       if (event && event->binary)
+               event->binary(s, entry, 0);
 
-               SEQ_PUT_FIELD_RET(s, field->prev_pid);
-               SEQ_PUT_FIELD_RET(s, field->prev_prio);
-               SEQ_PUT_FIELD_RET(s, field->prev_state);
-               SEQ_PUT_FIELD_RET(s, field->next_pid);
-               SEQ_PUT_FIELD_RET(s, field->next_prio);
-               SEQ_PUT_FIELD_RET(s, field->next_state);
-               break;
-       }
-       case TRACE_SPECIAL:
-       case TRACE_USER_STACK:
-       case TRACE_STACK: {
-               struct special_entry *field;
-
-               trace_assign_type(field, entry);
-
-               SEQ_PUT_FIELD_RET(s, field->arg1);
-               SEQ_PUT_FIELD_RET(s, field->arg2);
-               SEQ_PUT_FIELD_RET(s, field->arg3);
-               break;
-       }
-       }
-       return 1;
+       return TRACE_TYPE_HANDLED;
 }
 
 static int trace_empty(struct trace_iterator *iter)
@@ -3736,7 +2997,7 @@ static struct notifier_block trace_die_notifier = {
  * it if we decide to change what log level the ftrace dump
  * should be at.
  */
-#define KERN_TRACE             KERN_INFO
+#define KERN_TRACE             KERN_EMERG
 
 static void
 trace_printk_seq(struct trace_seq *s)
@@ -3770,6 +3031,7 @@ void ftrace_dump(void)
        dump_ran = 1;
 
        /* No turning back! */
+       tracing_off();
        ftrace_kill();
 
        for_each_tracing_cpu(cpu) {
@@ -3877,7 +3139,6 @@ __init static int tracer_alloc_buffers(void)
 #else
        current_trace = &nop_trace;
 #endif
-
        /* All seems OK, enable tracing */
        tracing_disabled = 0;
 
index 4d3d381..54b7278 100644 (file)
@@ -9,6 +9,7 @@
 #include <linux/mmiotrace.h>
 #include <linux/ftrace.h>
 #include <trace/boot.h>
+#include <trace/kmemtrace.h>
 
 enum trace_type {
        __TRACE_FIRST_TYPE = 0,
@@ -16,7 +17,6 @@ enum trace_type {
        TRACE_FN,
        TRACE_CTX,
        TRACE_WAKE,
-       TRACE_CONT,
        TRACE_STACK,
        TRACE_PRINT,
        TRACE_SPECIAL,
@@ -29,9 +29,11 @@ enum trace_type {
        TRACE_GRAPH_ENT,
        TRACE_USER_STACK,
        TRACE_HW_BRANCHES,
+       TRACE_KMEM_ALLOC,
+       TRACE_KMEM_FREE,
        TRACE_POWER,
 
-       __TRACE_LAST_TYPE
+       __TRACE_LAST_TYPE,
 };
 
 /*
@@ -170,6 +172,24 @@ struct trace_power {
        struct power_trace      state_data;
 };
 
+struct kmemtrace_alloc_entry {
+       struct trace_entry      ent;
+       enum kmemtrace_type_id type_id;
+       unsigned long call_site;
+       const void *ptr;
+       size_t bytes_req;
+       size_t bytes_alloc;
+       gfp_t gfp_flags;
+       int node;
+};
+
+struct kmemtrace_free_entry {
+       struct trace_entry      ent;
+       enum kmemtrace_type_id type_id;
+       unsigned long call_site;
+       const void *ptr;
+};
+
 /*
  * trace_flag_type is an enumeration that holds different
  * states when a trace occurs. These are:
@@ -178,7 +198,6 @@ struct trace_power {
  *  NEED_RESCED                - reschedule is requested
  *  HARDIRQ            - inside an interrupt handler
  *  SOFTIRQ            - inside a softirq handler
- *  CONT               - multiple entries hold the trace item
  */
 enum trace_flag_type {
        TRACE_FLAG_IRQS_OFF             = 0x01,
@@ -186,7 +205,6 @@ enum trace_flag_type {
        TRACE_FLAG_NEED_RESCHED         = 0x04,
        TRACE_FLAG_HARDIRQ              = 0x08,
        TRACE_FLAG_SOFTIRQ              = 0x10,
-       TRACE_FLAG_CONT                 = 0x20,
 };
 
 #define TRACE_BUF_SIZE         1024
@@ -262,7 +280,6 @@ extern void __ftrace_bad_type(void);
        do {                                                            \
                IF_ASSIGN(var, ent, struct ftrace_entry, TRACE_FN);     \
                IF_ASSIGN(var, ent, struct ctx_switch_entry, 0);        \
-               IF_ASSIGN(var, ent, struct trace_field_cont, TRACE_CONT); \
                IF_ASSIGN(var, ent, struct stack_entry, TRACE_STACK);   \
                IF_ASSIGN(var, ent, struct userstack_entry, TRACE_USER_STACK);\
                IF_ASSIGN(var, ent, struct print_entry, TRACE_PRINT);   \
@@ -280,6 +297,10 @@ extern void __ftrace_bad_type(void);
                          TRACE_GRAPH_RET);             \
                IF_ASSIGN(var, ent, struct hw_branch_entry, TRACE_HW_BRANCHES);\
                IF_ASSIGN(var, ent, struct trace_power, TRACE_POWER); \
+               IF_ASSIGN(var, ent, struct kmemtrace_alloc_entry,       \
+                         TRACE_KMEM_ALLOC);    \
+               IF_ASSIGN(var, ent, struct kmemtrace_free_entry,        \
+                         TRACE_KMEM_FREE);     \
                __ftrace_bad_type();                                    \
        } while (0)
 
@@ -313,6 +334,7 @@ struct tracer_flags {
 /* Makes more easy to define a tracer opt */
 #define TRACER_OPT(s, b)       .name = #s, .bit = b
 
+
 /*
  * A specific tracer, represented by methods that operate on a trace array:
  */
@@ -340,6 +362,7 @@ struct tracer {
        struct tracer           *next;
        int                     print_max;
        struct tracer_flags     *flags;
+       struct tracer_stat      *stats;
 };
 
 struct trace_seq {
@@ -434,15 +457,12 @@ void update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu);
 void update_max_tr_single(struct trace_array *tr,
                          struct task_struct *tsk, int cpu);
 
-extern cycle_t ftrace_now(int cpu);
+void __trace_stack(struct trace_array *tr,
+                  struct trace_array_cpu *data,
+                  unsigned long flags,
+                  int skip, int pc);
 
-#ifdef CONFIG_FUNCTION_TRACER
-void tracing_start_function_trace(void);
-void tracing_stop_function_trace(void);
-#else
-# define tracing_start_function_trace()                do { } while (0)
-# define tracing_stop_function_trace()         do { } while (0)
-#endif
+extern cycle_t ftrace_now(int cpu);
 
 #ifdef CONFIG_CONTEXT_SWITCH_TRACER
 typedef void
@@ -456,10 +476,10 @@ struct tracer_switch_ops {
        void                            *private;
        struct tracer_switch_ops        *next;
 };
-
-char *trace_find_cmdline(int pid);
 #endif /* CONFIG_CONTEXT_SWITCH_TRACER */
 
+extern char *trace_find_cmdline(int pid);
+
 #ifdef CONFIG_DYNAMIC_FTRACE
 extern unsigned long ftrace_update_tot_cnt;
 #define DYN_FTRACE_TEST_NAME trace_selftest_dynamic_test_func
@@ -488,15 +508,6 @@ extern int trace_selftest_startup_branch(struct tracer *trace,
 #endif /* CONFIG_FTRACE_STARTUP_TEST */
 
 extern void *head_page(struct trace_array_cpu *data);
-extern int trace_seq_printf(struct trace_seq *s, const char *fmt, ...);
-extern void trace_seq_print_cont(struct trace_seq *s,
-                                struct trace_iterator *iter);
-
-extern int
-seq_print_ip_sym(struct trace_seq *s, unsigned long ip,
-               unsigned long sym_flags);
-extern ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf,
-                                size_t cnt);
 extern long ns2usecs(cycle_t nsec);
 extern int
 trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args);
index 366c8c3..0e94b3d 100644 (file)
@@ -11,6 +11,7 @@
 #include <linux/kallsyms.h>
 
 #include "trace.h"
+#include "trace_output.h"
 
 static struct trace_array *boot_trace;
 static bool pre_initcalls_finished;
index 6c00feb..ca017e0 100644 (file)
 #include <linux/hash.h>
 #include <linux/fs.h>
 #include <asm/local.h>
+
 #include "trace.h"
+#include "trace_stat.h"
+#include "trace_output.h"
 
 #ifdef CONFIG_BRANCH_TRACER
 
+static struct tracer branch_trace;
 static int branch_tracing_enabled __read_mostly;
 static DEFINE_MUTEX(branch_tracing_mutex);
+
 static struct trace_array *branch_tracer;
 
 static void
@@ -142,22 +147,74 @@ static void branch_trace_reset(struct trace_array *tr)
        stop_branch_trace(tr);
 }
 
-struct tracer branch_trace __read_mostly =
+static int
+trace_print_print(struct trace_seq *s, struct trace_entry *entry, int flags)
+{
+       struct print_entry *field;
+
+       trace_assign_type(field, entry);
+
+       if (seq_print_ip_sym(s, field->ip, flags))
+               goto partial;
+
+       if (trace_seq_printf(s, ": %s", field->buf))
+               goto partial;
+
+ partial:
+       return TRACE_TYPE_PARTIAL_LINE;
+}
+
+static int
+trace_branch_print(struct trace_seq *s, struct trace_entry *entry, int flags)
+{
+       struct trace_branch *field;
+
+       trace_assign_type(field, entry);
+
+       if (trace_seq_printf(s, "[%s] %s:%s:%d\n",
+                            field->correct ? "  ok  " : " MISS ",
+                            field->func,
+                            field->file,
+                            field->line))
+               return TRACE_TYPE_PARTIAL_LINE;
+
+       return 0;
+}
+
+
+static struct trace_event trace_branch_event = {
+       .type           = TRACE_BRANCH,
+       .trace          = trace_branch_print,
+       .latency_trace  = trace_branch_print,
+       .raw            = trace_nop_print,
+       .hex            = trace_nop_print,
+       .binary         = trace_nop_print,
+};
+
+static struct tracer branch_trace __read_mostly =
 {
        .name           = "branch",
        .init           = branch_trace_init,
        .reset          = branch_trace_reset,
 #ifdef CONFIG_FTRACE_SELFTEST
        .selftest       = trace_selftest_startup_branch,
-#endif
+#endif /* CONFIG_FTRACE_SELFTEST */
 };
 
-__init static int init_branch_trace(void)
+__init static int init_branch_tracer(void)
 {
+       int ret;
+
+       ret = register_ftrace_event(&trace_branch_event);
+       if (!ret) {
+               printk(KERN_WARNING "Warning: could not register "
+                                   "branch events\n");
+               return 1;
+       }
        return register_tracer(&branch_trace);
 }
+device_initcall(init_branch_tracer);
 
-device_initcall(init_branch_trace);
 #else
 static inline
 void trace_likely_condition(struct ftrace_branch_data *f, int val, int expect)
@@ -183,66 +240,39 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect)
 }
 EXPORT_SYMBOL(ftrace_likely_update);
 
-struct ftrace_pointer {
-       void            *start;
-       void            *stop;
-       int             hit;
-};
+extern unsigned long __start_annotated_branch_profile[];
+extern unsigned long __stop_annotated_branch_profile[];
 
-static void *
-t_next(struct seq_file *m, void *v, loff_t *pos)
+static int annotated_branch_stat_headers(struct seq_file *m)
 {
-       const struct ftrace_pointer *f = m->private;
-       struct ftrace_branch_data *p = v;
-
-       (*pos)++;
-
-       if (v == (void *)1)
-               return f->start;
-
-       ++p;
-
-       if ((void *)p >= (void *)f->stop)
-               return NULL;
-
-       return p;
+       seq_printf(m, " correct incorrect  %% ");
+       seq_printf(m, "       Function                "
+                             "  File              Line\n"
+                             " ------- ---------  - "
+                             "       --------                "
+                             "  ----              ----\n");
+       return 0;
 }
 
-static void *t_start(struct seq_file *m, loff_t *pos)
+static inline long get_incorrect_percent(struct ftrace_branch_data *p)
 {
-       void *t = (void *)1;
-       loff_t l = 0;
-
-       for (; t && l < *pos; t = t_next(m, t, &l))
-               ;
+       long percent;
 
-       return t;
-}
+       if (p->correct) {
+               percent = p->incorrect * 100;
+               percent /= p->correct + p->incorrect;
+       } else
+               percent = p->incorrect ? 100 : -1;
 
-static void t_stop(struct seq_file *m, void *p)
-{
+       return percent;
 }
 
-static int t_show(struct seq_file *m, void *v)
+static int branch_stat_show(struct seq_file *m, void *v)
 {
-       const struct ftrace_pointer *fp = m->private;
        struct ftrace_branch_data *p = v;
        const char *f;
        long percent;
 
-       if (v == (void *)1) {
-               if (fp->hit)
-                       seq_printf(m, "   miss      hit    %% ");
-               else
-                       seq_printf(m, " correct incorrect  %% ");
-               seq_printf(m, "       Function                "
-                             "  File              Line\n"
-                             " ------- ---------  - "
-                             "       --------                "
-                             "  ----              ----\n");
-               return 0;
-       }
-
        /* Only print the file, not the path */
        f = p->file + strlen(p->file);
        while (f >= p->file && *f != '/')
@@ -252,11 +282,7 @@ static int t_show(struct seq_file *m, void *v)
        /*
         * The miss is overlayed on correct, and hit on incorrect.
         */
-       if (p->correct) {
-               percent = p->incorrect * 100;
-               percent /= p->correct + p->incorrect;
-       } else
-               percent = p->incorrect ? 100 : -1;
+       percent = get_incorrect_percent(p);
 
        seq_printf(m, "%8lu %8lu ",  p->correct, p->incorrect);
        if (percent < 0)
@@ -267,76 +293,118 @@ static int t_show(struct seq_file *m, void *v)
        return 0;
 }
 
-static struct seq_operations tracing_likely_seq_ops = {
-       .start          = t_start,
-       .next           = t_next,
-       .stop           = t_stop,
-       .show           = t_show,
+static void *annotated_branch_stat_start(void)
+{
+       return __start_annotated_branch_profile;
+}
+
+static void *
+annotated_branch_stat_next(void *v, int idx)
+{
+       struct ftrace_branch_data *p = v;
+
+       ++p;
+
+       if ((void *)p >= (void *)__stop_annotated_branch_profile)
+               return NULL;
+
+       return p;
+}
+
+static int annotated_branch_stat_cmp(void *p1, void *p2)
+{
+       struct ftrace_branch_data *a = p1;
+       struct ftrace_branch_data *b = p2;
+
+       long percent_a, percent_b;
+
+       percent_a = get_incorrect_percent(a);
+       percent_b = get_incorrect_percent(b);
+
+       if (percent_a < percent_b)
+               return -1;
+       if (percent_a > percent_b)
+               return 1;
+       else
+               return 0;
+}
+
+static struct tracer_stat annotated_branch_stats = {
+       .name = "branch_annotated",
+       .stat_start = annotated_branch_stat_start,
+       .stat_next = annotated_branch_stat_next,
+       .stat_cmp = annotated_branch_stat_cmp,
+       .stat_headers = annotated_branch_stat_headers,
+       .stat_show = branch_stat_show
 };
 
-static int tracing_branch_open(struct inode *inode, struct file *file)
+__init static int init_annotated_branch_stats(void)
 {
        int ret;
 
-       ret = seq_open(file, &tracing_likely_seq_ops);
+       ret = register_stat_tracer(&annotated_branch_stats);
        if (!ret) {
-               struct seq_file *m = file->private_data;
-               m->private = (void *)inode->i_private;
+               printk(KERN_WARNING "Warning: could not register "
+                                   "annotated branches stats\n");
+               return 1;
        }
-
-       return ret;
+       return 0;
 }
-
-static const struct file_operations tracing_branch_fops = {
-       .open           = tracing_branch_open,
-       .read           = seq_read,
-       .llseek         = seq_lseek,
-};
+fs_initcall(init_annotated_branch_stats);
 
 #ifdef CONFIG_PROFILE_ALL_BRANCHES
+
 extern unsigned long __start_branch_profile[];
 extern unsigned long __stop_branch_profile[];
 
-static const struct ftrace_pointer ftrace_branch_pos = {
-       .start                  = __start_branch_profile,
-       .stop                   = __stop_branch_profile,
-       .hit                    = 1,
-};
+static int all_branch_stat_headers(struct seq_file *m)
+{
+       seq_printf(m, "   miss      hit    %% ");
+       seq_printf(m, "       Function                "
+                             "  File              Line\n"
+                             " ------- ---------  - "
+                             "       --------                "
+                             "  ----              ----\n");
+       return 0;
+}
 
-#endif /* CONFIG_PROFILE_ALL_BRANCHES */
+static void *all_branch_stat_start(void)
+{
+       return __start_branch_profile;
+}
 
-extern unsigned long __start_annotated_branch_profile[];
-extern unsigned long __stop_annotated_branch_profile[];
+static void *
+all_branch_stat_next(void *v, int idx)
+{
+       struct ftrace_branch_data *p = v;
 
-static const struct ftrace_pointer ftrace_annotated_branch_pos = {
-       .start                  = __start_annotated_branch_profile,
-       .stop                   = __stop_annotated_branch_profile,
-};
+       ++p;
 
-static __init int ftrace_branch_init(void)
-{
-       struct dentry *d_tracer;
-       struct dentry *entry;
+       if ((void *)p >= (void *)__stop_branch_profile)
+               return NULL;
 
-       d_tracer = tracing_init_dentry();
+       return p;
+}
 
-       entry = debugfs_create_file("profile_annotated_branch", 0444, d_tracer,
-                                   (void *)&ftrace_annotated_branch_pos,
-                                   &tracing_branch_fops);
-       if (!entry)
-               pr_warning("Could not create debugfs "
-                          "'profile_annotatet_branch' entry\n");
+static struct tracer_stat all_branch_stats = {
+       .name = "branch_all",
+       .stat_start = all_branch_stat_start,
+       .stat_next = all_branch_stat_next,
+       .stat_headers = all_branch_stat_headers,
+       .stat_show = branch_stat_show
+};
 
-#ifdef CONFIG_PROFILE_ALL_BRANCHES
-       entry = debugfs_create_file("profile_branch", 0444, d_tracer,
-                                   (void *)&ftrace_branch_pos,
-                                   &tracing_branch_fops);
-       if (!entry)
-               pr_warning("Could not create debugfs"
-                          " 'profile_branch' entry\n");
-#endif
+__init static int all_annotated_branch_stats(void)
+{
+       int ret;
 
+       ret = register_stat_tracer(&all_branch_stats);
+       if (!ret) {
+               printk(KERN_WARNING "Warning: could not register "
+                                   "all branches stats\n");
+               return 1;
+       }
        return 0;
 }
-
-device_initcall(ftrace_branch_init);
+fs_initcall(all_annotated_branch_stats);
+#endif /* CONFIG_PROFILE_ALL_BRANCHES */
index 9236d7e..b3a320f 100644 (file)
 
 #include "trace.h"
 
+/* function tracing enabled */
+static int                     ftrace_function_enabled;
+
+static struct trace_array      *func_trace;
+
+static void tracing_start_function_trace(void);
+static void tracing_stop_function_trace(void);
+
 static void start_function_trace(struct trace_array *tr)
 {
+       func_trace = tr;
        tr->cpu = get_cpu();
        tracing_reset_online_cpus(tr);
        put_cpu();
@@ -48,14 +57,188 @@ static void function_trace_start(struct trace_array *tr)
        tracing_reset_online_cpus(tr);
 }
 
+static void
+function_trace_call_preempt_only(unsigned long ip, unsigned long parent_ip)
+{
+       struct trace_array *tr = func_trace;
+       struct trace_array_cpu *data;
+       unsigned long flags;
+       long disabled;
+       int cpu, resched;
+       int pc;
+
+       if (unlikely(!ftrace_function_enabled))
+               return;
+
+       pc = preempt_count();
+       resched = ftrace_preempt_disable();
+       local_save_flags(flags);
+       cpu = raw_smp_processor_id();
+       data = tr->data[cpu];
+       disabled = atomic_inc_return(&data->disabled);
+
+       if (likely(disabled == 1))
+               trace_function(tr, data, ip, parent_ip, flags, pc);
+
+       atomic_dec(&data->disabled);
+       ftrace_preempt_enable(resched);
+}
+
+static void
+function_trace_call(unsigned long ip, unsigned long parent_ip)
+{
+       struct trace_array *tr = func_trace;
+       struct trace_array_cpu *data;
+       unsigned long flags;
+       long disabled;
+       int cpu;
+       int pc;
+
+       if (unlikely(!ftrace_function_enabled))
+               return;
+
+       /*
+        * Need to use raw, since this must be called before the
+        * recursive protection is performed.
+        */
+       local_irq_save(flags);
+       cpu = raw_smp_processor_id();
+       data = tr->data[cpu];
+       disabled = atomic_inc_return(&data->disabled);
+
+       if (likely(disabled == 1)) {
+               pc = preempt_count();
+               trace_function(tr, data, ip, parent_ip, flags, pc);
+       }
+
+       atomic_dec(&data->disabled);
+       local_irq_restore(flags);
+}
+
+static void
+function_stack_trace_call(unsigned long ip, unsigned long parent_ip)
+{
+       struct trace_array *tr = func_trace;
+       struct trace_array_cpu *data;
+       unsigned long flags;
+       long disabled;
+       int cpu;
+       int pc;
+
+       if (unlikely(!ftrace_function_enabled))
+               return;
+
+       /*
+        * Need to use raw, since this must be called before the
+        * recursive protection is performed.
+        */
+       local_irq_save(flags);
+       cpu = raw_smp_processor_id();
+       data = tr->data[cpu];
+       disabled = atomic_inc_return(&data->disabled);
+
+       if (likely(disabled == 1)) {
+               pc = preempt_count();
+               trace_function(tr, data, ip, parent_ip, flags, pc);
+               /*
+                * skip over 5 funcs:
+                *    __ftrace_trace_stack,
+                *    __trace_stack,
+                *    function_stack_trace_call
+                *    ftrace_list_func
+                *    ftrace_call
+                */
+               __trace_stack(tr, data, flags, 5, pc);
+       }
+
+       atomic_dec(&data->disabled);
+       local_irq_restore(flags);
+}
+
+
+static struct ftrace_ops trace_ops __read_mostly =
+{
+       .func = function_trace_call,
+};
+
+static struct ftrace_ops trace_stack_ops __read_mostly =
+{
+       .func = function_stack_trace_call,
+};
+
+/* Our two options */
+enum {
+       TRACE_FUNC_OPT_STACK = 0x1,
+};
+
+static struct tracer_opt func_opts[] = {
+#ifdef CONFIG_STACKTRACE
+       { TRACER_OPT(func_stack_trace, TRACE_FUNC_OPT_STACK) },
+#endif
+       { } /* Always set a last empty entry */
+};
+
+static struct tracer_flags func_flags = {
+       .val = 0, /* By default: all flags disabled */
+       .opts = func_opts
+};
+
+static void tracing_start_function_trace(void)
+{
+       ftrace_function_enabled = 0;
+
+       if (trace_flags & TRACE_ITER_PREEMPTONLY)
+               trace_ops.func = function_trace_call_preempt_only;
+       else
+               trace_ops.func = function_trace_call;
+
+       if (func_flags.val & TRACE_FUNC_OPT_STACK)
+               register_ftrace_function(&trace_stack_ops);
+       else
+               register_ftrace_function(&trace_ops);
+
+       ftrace_function_enabled = 1;
+}
+
+static void tracing_stop_function_trace(void)
+{
+       ftrace_function_enabled = 0;
+       /* OK if they are not registered */
+       unregister_ftrace_function(&trace_stack_ops);
+       unregister_ftrace_function(&trace_ops);
+}
+
+static int func_set_flag(u32 old_flags, u32 bit, int set)
+{
+       if (bit == TRACE_FUNC_OPT_STACK) {
+               /* do nothing if already set */
+               if (!!set == !!(func_flags.val & TRACE_FUNC_OPT_STACK))
+                       return 0;
+
+               if (set) {
+                       unregister_ftrace_function(&trace_ops);
+                       register_ftrace_function(&trace_stack_ops);
+               } else {
+                       unregister_ftrace_function(&trace_stack_ops);
+                       register_ftrace_function(&trace_ops);
+               }
+
+               return 0;
+       }
+
+       return -EINVAL;
+}
+
 static struct tracer function_trace __read_mostly =
 {
-       .name        = "function",
-       .init        = function_trace_init,
-       .reset       = function_trace_reset,
-       .start       = function_trace_start,
+       .name           = "function",
+       .init           = function_trace_init,
+       .reset          = function_trace_reset,
+       .start          = function_trace_start,
+       .flags          = &func_flags,
+       .set_flag       = func_set_flag,
 #ifdef CONFIG_FTRACE_SELFTEST
-       .selftest    = trace_selftest_startup_function,
+       .selftest       = trace_selftest_startup_function,
 #endif
 };
 
index 930c08e..3c54598 100644 (file)
@@ -12,6 +12,7 @@
 #include <linux/fs.h>
 
 #include "trace.h"
+#include "trace_output.h"
 
 #define TRACE_GRAPH_INDENT     2
 
@@ -589,8 +590,11 @@ print_graph_comment(struct print_entry *trace, struct trace_seq *s,
        if (!ret)
                return TRACE_TYPE_PARTIAL_LINE;
 
-       if (ent->flags & TRACE_FLAG_CONT)
-               trace_seq_print_cont(s, iter);
+       /* Strip ending newline */
+       if (s->buffer[s->len - 1] == '\n') {
+               s->buffer[s->len - 1] = '\0';
+               s->len--;
+       }
 
        ret = trace_seq_printf(s, " */\n");
        if (!ret)
index 649df22..df21c1e 100644 (file)
@@ -14,6 +14,7 @@
 #include <asm/ds.h>
 
 #include "trace.h"
+#include "trace_output.h"
 
 
 #define SIZEOF_BTS (1 << 13)
index 7c2e326..62a78d9 100644 (file)
@@ -380,6 +380,7 @@ static void stop_irqsoff_tracer(struct trace_array *tr)
 
 static void __irqsoff_tracer_init(struct trace_array *tr)
 {
+       tracing_max_latency = 0;
        irqsoff_trace = tr;
        /* make sure that the tracer is visible */
        smp_wmb();
index fffcb06..ec78e24 100644 (file)
@@ -9,8 +9,10 @@
 #include <linux/kernel.h>
 #include <linux/mmiotrace.h>
 #include <linux/pci.h>
+#include <asm/atomic.h>
 
 #include "trace.h"
+#include "trace_output.h"
 
 struct header_iter {
        struct pci_dev *dev;
@@ -19,6 +21,7 @@ struct header_iter {
 static struct trace_array *mmio_trace_array;
 static bool overrun_detected;
 static unsigned long prev_overruns;
+static atomic_t dropped_count;
 
 static void mmio_reset_data(struct trace_array *tr)
 {
@@ -121,11 +124,11 @@ static void mmio_close(struct trace_iterator *iter)
 
 static unsigned long count_overruns(struct trace_iterator *iter)
 {
-       unsigned long cnt = 0;
+       unsigned long cnt = atomic_xchg(&dropped_count, 0);
        unsigned long over = ring_buffer_overruns(iter->tr->buffer);
 
        if (over > prev_overruns)
-               cnt = over - prev_overruns;
+               cnt += over - prev_overruns;
        prev_overruns = over;
        return cnt;
 }
@@ -181,21 +184,22 @@ static enum print_line_t mmio_print_rw(struct trace_iterator *iter)
        switch (rw->opcode) {
        case MMIO_READ:
                ret = trace_seq_printf(s,
-                       "R %d %lu.%06lu %d 0x%llx 0x%lx 0x%lx %d\n",
+                       "R %d %u.%06lu %d 0x%llx 0x%lx 0x%lx %d\n",
                        rw->width, secs, usec_rem, rw->map_id,
                        (unsigned long long)rw->phys,
                        rw->value, rw->pc, 0);
                break;
        case MMIO_WRITE:
                ret = trace_seq_printf(s,
-                       "W %d %lu.%06lu %d 0x%llx 0x%lx 0x%lx %d\n",
+                       "W %d %u.%06lu %d 0x%llx 0x%lx 0x%lx %d\n",
                        rw->width, secs, usec_rem, rw->map_id,
                        (unsigned long long)rw->phys,
                        rw->value, rw->pc, 0);
                break;
        case MMIO_UNKNOWN_OP:
                ret = trace_seq_printf(s,
-                       "UNKNOWN %lu.%06lu %d 0x%llx %02x,%02x,%02x 0x%lx %d\n",
+                       "UNKNOWN %u.%06lu %d 0x%llx %02lx,%02lx,"
+                       "%02lx 0x%lx %d\n",
                        secs, usec_rem, rw->map_id,
                        (unsigned long long)rw->phys,
                        (rw->value >> 16) & 0xff, (rw->value >> 8) & 0xff,
@@ -227,14 +231,14 @@ static enum print_line_t mmio_print_map(struct trace_iterator *iter)
        switch (m->opcode) {
        case MMIO_PROBE:
                ret = trace_seq_printf(s,
-                       "MAP %lu.%06lu %d 0x%llx 0x%lx 0x%lx 0x%lx %d\n",
+                       "MAP %u.%06lu %d 0x%llx 0x%lx 0x%lx 0x%lx %d\n",
                        secs, usec_rem, m->map_id,
                        (unsigned long long)m->phys, m->virt, m->len,
                        0UL, 0);
                break;
        case MMIO_UNPROBE:
                ret = trace_seq_printf(s,
-                       "UNMAP %lu.%06lu %d 0x%lx %d\n",
+                       "UNMAP %u.%06lu %d 0x%lx %d\n",
                        secs, usec_rem, m->map_id, 0UL, 0);
                break;
        default:
@@ -258,13 +262,10 @@ static enum print_line_t mmio_print_mark(struct trace_iterator *iter)
        int ret;
 
        /* The trailing newline must be in the message. */
-       ret = trace_seq_printf(s, "MARK %lu.%06lu %s", secs, usec_rem, msg);
+       ret = trace_seq_printf(s, "MARK %u.%06lu %s", secs, usec_rem, msg);
        if (!ret)
                return TRACE_TYPE_PARTIAL_LINE;
 
-       if (entry->flags & TRACE_FLAG_CONT)
-               trace_seq_print_cont(s, iter);
-
        return TRACE_TYPE_HANDLED;
 }
 
@@ -310,8 +311,10 @@ static void __trace_mmiotrace_rw(struct trace_array *tr,
 
        event   = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
                                           &irq_flags);
-       if (!event)
+       if (!event) {
+               atomic_inc(&dropped_count);
                return;
+       }
        entry   = ring_buffer_event_data(event);
        tracing_generic_entry_update(&entry->ent, 0, preempt_count());
        entry->ent.type                 = TRACE_MMIO_RW;
@@ -338,8 +341,10 @@ static void __trace_mmiotrace_map(struct trace_array *tr,
 
        event   = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
                                           &irq_flags);
-       if (!event)
+       if (!event) {
+               atomic_inc(&dropped_count);
                return;
+       }
        entry   = ring_buffer_event_data(event);
        tracing_generic_entry_update(&entry->ent, 0, preempt_count());
        entry->ent.type                 = TRACE_MMIO_MAP;
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
new file mode 100644 (file)
index 0000000..1a4e144
--- /dev/null
@@ -0,0 +1,829 @@
+/*
+ * trace_output.c
+ *
+ * Copyright (C) 2008 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/ftrace.h>
+
+#include "trace_output.h"
+
+/* must be a power of 2 */
+#define EVENT_HASHSIZE 128
+
+static DEFINE_MUTEX(trace_event_mutex);
+static struct hlist_head event_hash[EVENT_HASHSIZE] __read_mostly;
+
+static int next_event_type = __TRACE_LAST_TYPE + 1;
+
+/**
+ * trace_seq_printf - sequence printing of trace information
+ * @s: trace sequence descriptor
+ * @fmt: printf format string
+ *
+ * The tracer may use either sequence operations or its own
+ * copy to user routines. To simplify formating of a trace
+ * trace_seq_printf is used to store strings into a special
+ * buffer (@s). Then the output may be either used by
+ * the sequencer or pulled into another buffer.
+ */
+int
+trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
+{
+       int len = (PAGE_SIZE - 1) - s->len;
+       va_list ap;
+       int ret;
+
+       if (!len)
+               return 0;
+
+       va_start(ap, fmt);
+       ret = vsnprintf(s->buffer + s->len, len, fmt, ap);
+       va_end(ap);
+
+       /* If we can't write it all, don't bother writing anything */
+       if (ret >= len)
+               return 0;
+
+       s->len += ret;
+
+       return len;
+}
+
+/**
+ * trace_seq_puts - trace sequence printing of simple string
+ * @s: trace sequence descriptor
+ * @str: simple string to record
+ *
+ * The tracer may use either the sequence operations or its own
+ * copy to user routines. This function records a simple string
+ * into a special buffer (@s) for later retrieval by a sequencer
+ * or other mechanism.
+ */
+int trace_seq_puts(struct trace_seq *s, const char *str)
+{
+       int len = strlen(str);
+
+       if (len > ((PAGE_SIZE - 1) - s->len))
+               return 0;
+
+       memcpy(s->buffer + s->len, str, len);
+       s->len += len;
+
+       return len;
+}
+
+int trace_seq_putc(struct trace_seq *s, unsigned char c)
+{
+       if (s->len >= (PAGE_SIZE - 1))
+               return 0;
+
+       s->buffer[s->len++] = c;
+
+       return 1;
+}
+
+int trace_seq_putmem(struct trace_seq *s, void *mem, size_t len)
+{
+       if (len > ((PAGE_SIZE - 1) - s->len))
+               return 0;
+
+       memcpy(s->buffer + s->len, mem, len);
+       s->len += len;
+
+       return len;
+}
+
+int trace_seq_putmem_hex(struct trace_seq *s, void *mem, size_t len)
+{
+       unsigned char hex[HEX_CHARS];
+       unsigned char *data = mem;
+       int i, j;
+
+#ifdef __BIG_ENDIAN
+       for (i = 0, j = 0; i < len; i++) {
+#else
+       for (i = len-1, j = 0; i >= 0; i--) {
+#endif
+               hex[j++] = hex_asc_hi(data[i]);
+               hex[j++] = hex_asc_lo(data[i]);
+       }
+       hex[j++] = ' ';
+
+       return trace_seq_putmem(s, hex, j);
+}
+
+int trace_seq_path(struct trace_seq *s, struct path *path)
+{
+       unsigned char *p;
+
+       if (s->len >= (PAGE_SIZE - 1))
+               return 0;
+       p = d_path(path, s->buffer + s->len, PAGE_SIZE - s->len);
+       if (!IS_ERR(p)) {
+               p = mangle_path(s->buffer + s->len, p, "\n");
+               if (p) {
+                       s->len = p - s->buffer;
+                       return 1;
+               }
+       } else {
+               s->buffer[s->len++] = '?';
+               return 1;
+       }
+
+       return 0;
+}
+
+#ifdef CONFIG_KRETPROBES
+static inline const char *kretprobed(const char *name)
+{
+       static const char tramp_name[] = "kretprobe_trampoline";
+       int size = sizeof(tramp_name);
+
+       if (strncmp(tramp_name, name, size) == 0)
+               return "[unknown/kretprobe'd]";
+       return name;
+}
+#else
+static inline const char *kretprobed(const char *name)
+{
+       return name;
+}
+#endif /* CONFIG_KRETPROBES */
+
+static int
+seq_print_sym_short(struct trace_seq *s, const char *fmt, unsigned long address)
+{
+#ifdef CONFIG_KALLSYMS
+       char str[KSYM_SYMBOL_LEN];
+       const char *name;
+
+       kallsyms_lookup(address, NULL, NULL, NULL, str);
+
+       name = kretprobed(str);
+
+       return trace_seq_printf(s, fmt, name);
+#endif
+       return 1;
+}
+
+static int
+seq_print_sym_offset(struct trace_seq *s, const char *fmt,
+                    unsigned long address)
+{
+#ifdef CONFIG_KALLSYMS
+       char str[KSYM_SYMBOL_LEN];
+       const char *name;
+
+       sprint_symbol(str, address);
+       name = kretprobed(str);
+
+       return trace_seq_printf(s, fmt, name);
+#endif
+       return 1;
+}
+
+#ifndef CONFIG_64BIT
+# define IP_FMT "%08lx"
+#else
+# define IP_FMT "%016lx"
+#endif
+
+int seq_print_user_ip(struct trace_seq *s, struct mm_struct *mm,
+                     unsigned long ip, unsigned long sym_flags)
+{
+       struct file *file = NULL;
+       unsigned long vmstart = 0;
+       int ret = 1;
+
+       if (mm) {
+               const struct vm_area_struct *vma;
+
+               down_read(&mm->mmap_sem);
+               vma = find_vma(mm, ip);
+               if (vma) {
+                       file = vma->vm_file;
+                       vmstart = vma->vm_start;
+               }
+               if (file) {
+                       ret = trace_seq_path(s, &file->f_path);
+                       if (ret)
+                               ret = trace_seq_printf(s, "[+0x%lx]",
+                                                      ip - vmstart);
+               }
+               up_read(&mm->mmap_sem);
+       }
+       if (ret && ((sym_flags & TRACE_ITER_SYM_ADDR) || !file))
+               ret = trace_seq_printf(s, " <" IP_FMT ">", ip);
+       return ret;
+}
+
+int
+seq_print_userip_objs(const struct userstack_entry *entry, struct trace_seq *s,
+                     unsigned long sym_flags)
+{
+       struct mm_struct *mm = NULL;
+       int ret = 1;
+       unsigned int i;
+
+       if (trace_flags & TRACE_ITER_SYM_USEROBJ) {
+               struct task_struct *task;
+               /*
+                * we do the lookup on the thread group leader,
+                * since individual threads might have already quit!
+                */
+               rcu_read_lock();
+               task = find_task_by_vpid(entry->ent.tgid);
+               if (task)
+                       mm = get_task_mm(task);
+               rcu_read_unlock();
+       }
+
+       for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
+               unsigned long ip = entry->caller[i];
+
+               if (ip == ULONG_MAX || !ret)
+                       break;
+               if (i && ret)
+                       ret = trace_seq_puts(s, " <- ");
+               if (!ip) {
+                       if (ret)
+                               ret = trace_seq_puts(s, "??");
+                       continue;
+               }
+               if (!ret)
+                       break;
+               if (ret)
+                       ret = seq_print_user_ip(s, mm, ip, sym_flags);
+       }
+
+       if (mm)
+               mmput(mm);
+       return ret;
+}
+
+int
+seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags)
+{
+       int ret;
+
+       if (!ip)
+               return trace_seq_printf(s, "0");
+
+       if (sym_flags & TRACE_ITER_SYM_OFFSET)
+               ret = seq_print_sym_offset(s, "%s", ip);
+       else
+               ret = seq_print_sym_short(s, "%s", ip);
+
+       if (!ret)
+               return 0;
+
+       if (sym_flags & TRACE_ITER_SYM_ADDR)
+               ret = trace_seq_printf(s, " <" IP_FMT ">", ip);
+       return ret;
+}
+
+static const char state_to_char[] = TASK_STATE_TO_CHAR_STR;
+
+static int task_state_char(unsigned long state)
+{
+       int bit = state ? __ffs(state) + 1 : 0;
+
+       return bit < sizeof(state_to_char) - 1 ? state_to_char[bit] : '?';
+}
+
+/**
+ * ftrace_find_event - find a registered event
+ * @type: the type of event to look for
+ *
+ * Returns an event of type @type otherwise NULL
+ */
+struct trace_event *ftrace_find_event(int type)
+{
+       struct trace_event *event;
+       struct hlist_node *n;
+       unsigned key;
+
+       key = type & (EVENT_HASHSIZE - 1);
+
+       hlist_for_each_entry_rcu(event, n, &event_hash[key], node) {
+               if (event->type == type)
+                       return event;
+       }
+
+       return NULL;
+}
+
+/**
+ * register_ftrace_event - register output for an event type
+ * @event: the event type to register
+ *
+ * Event types are stored in a hash and this hash is used to
+ * find a way to print an event. If the @event->type is set
+ * then it will use that type, otherwise it will assign a
+ * type to use.
+ *
+ * If you assign your own type, please make sure it is added
+ * to the trace_type enum in trace.h, to avoid collisions
+ * with the dynamic types.
+ *
+ * Returns the event type number or zero on error.
+ */
+int register_ftrace_event(struct trace_event *event)
+{
+       unsigned key;
+       int ret = 0;
+
+       mutex_lock(&trace_event_mutex);
+
+       if (!event->type)
+               event->type = next_event_type++;
+       else if (event->type > __TRACE_LAST_TYPE) {
+               printk(KERN_WARNING "Need to add type to trace.h\n");
+               WARN_ON(1);
+       }
+
+       if (ftrace_find_event(event->type))
+               goto out;
+
+       key = event->type & (EVENT_HASHSIZE - 1);
+
+       hlist_add_head_rcu(&event->node, &event_hash[key]);
+
+       ret = event->type;
+ out:
+       mutex_unlock(&trace_event_mutex);
+
+       return ret;
+}
+
+/**
+ * unregister_ftrace_event - remove a no longer used event
+ * @event: the event to remove
+ */
+int unregister_ftrace_event(struct trace_event *event)
+{
+       mutex_lock(&trace_event_mutex);
+       hlist_del(&event->node);
+       mutex_unlock(&trace_event_mutex);
+
+       return 0;
+}
+
+/*
+ * Standard events
+ */
+
+int
+trace_nop_print(struct trace_seq *s, struct trace_entry *entry, int flags)
+{
+       return 0;
+}
+
+/* TRACE_FN */
+static int
+trace_fn_latency(struct trace_seq *s, struct trace_entry *entry, int flags)
+{
+       struct ftrace_entry *field;
+
+       trace_assign_type(field, entry);
+
+       if (!seq_print_ip_sym(s, field->ip, flags))
+               goto partial;
+       if (!trace_seq_puts(s, " ("))
+               goto partial;
+       if (!seq_print_ip_sym(s, field->parent_ip, flags))
+               goto partial;
+       if (!trace_seq_puts(s, ")\n"))
+               goto partial;
+
+       return 0;
+
+ partial:
+       return TRACE_TYPE_PARTIAL_LINE;
+}
+
+static int
+trace_fn_trace(struct trace_seq *s, struct trace_entry *entry, int flags)
+{
+       struct ftrace_entry *field;
+
+       trace_assign_type(field, entry);
+
+       if (!seq_print_ip_sym(s, field->ip, flags))
+               goto partial;
+
+       if ((flags & TRACE_ITER_PRINT_PARENT) && field->parent_ip) {
+               if (!trace_seq_printf(s, " <-"))
+                       goto partial;
+               if (!seq_print_ip_sym(s,
+                                     field->parent_ip,
+                                     flags))
+                       goto partial;
+       }
+       if (!trace_seq_printf(s, "\n"))
+               goto partial;
+
+       return 0;
+
+ partial:
+       return TRACE_TYPE_PARTIAL_LINE;
+}
+
+static int
+trace_fn_raw(struct trace_seq *s, struct trace_entry *entry, int flags)
+{
+       struct ftrace_entry *field;
+
+       trace_assign_type(field, entry);
+
+       if (!trace_seq_printf(s, "%lx %lx\n",
+                             field->ip,
+                             field->parent_ip))
+               return TRACE_TYPE_PARTIAL_LINE;
+
+       return 0;
+}
+
+static int
+trace_fn_hex(struct trace_seq *s, struct trace_entry *entry, int flags)
+{
+       struct ftrace_entry *field;
+
+       trace_assign_type(field, entry);
+
+       SEQ_PUT_HEX_FIELD_RET(s, field->ip);
+       SEQ_PUT_HEX_FIELD_RET(s, field->parent_ip);
+
+       return 0;
+}
+
+static int
+trace_fn_bin(struct trace_seq *s, struct trace_entry *entry, int flags)
+{
+       struct ftrace_entry *field;
+
+       trace_assign_type(field, entry);
+
+       SEQ_PUT_FIELD_RET(s, field->ip);
+       SEQ_PUT_FIELD_RET(s, field->parent_ip);
+
+       return 0;
+}
+
+static struct trace_event trace_fn_event = {
+       .type           = TRACE_FN,
+       .trace          = trace_fn_trace,
+       .latency_trace  = trace_fn_latency,
+       .raw            = trace_fn_raw,
+       .hex            = trace_fn_hex,
+       .binary         = trace_fn_bin,
+};
+
+/* TRACE_CTX an TRACE_WAKE */
+static int
+trace_ctxwake_print(struct trace_seq *s, struct trace_entry *entry, int flags,
+                   char *delim)
+{
+       struct ctx_switch_entry *field;
+       char *comm;
+       int S, T;
+
+       trace_assign_type(field, entry);
+
+       T = task_state_char(field->next_state);
+       S = task_state_char(field->prev_state);
+       comm = trace_find_cmdline(field->next_pid);
+       if (!trace_seq_printf(s, " %5d:%3d:%c %s [%03d] %5d:%3d:%c %s\n",
+                             field->prev_pid,
+                             field->prev_prio,
+                             S, delim,
+                             field->next_cpu,
+                             field->next_pid,
+                             field->next_prio,
+                             T, comm))
+               return TRACE_TYPE_PARTIAL_LINE;
+
+       return 0;
+}
+
+static int
+trace_ctx_print(struct trace_seq *s, struct trace_entry *entry, int flags)
+{
+       return trace_ctxwake_print(s, entry, flags, "==>");
+}
+
+static int
+trace_wake_print(struct trace_seq *s, struct trace_entry *entry, int flags)
+{
+       return trace_ctxwake_print(s, entry, flags, "  +");
+}
+
+static int
+trace_ctxwake_raw(struct trace_seq *s, struct trace_entry *entry, int flags,
+                 char S)
+{
+       struct ctx_switch_entry *field;
+       int T;
+
+       trace_assign_type(field, entry);
+
+       if (!S)
+               task_state_char(field->prev_state);
+       T = task_state_char(field->next_state);
+       if (!trace_seq_printf(s, "%d %d %c %d %d %d %c\n",
+                             field->prev_pid,
+                             field->prev_prio,
+                             S,
+                             field->next_cpu,
+                             field->next_pid,
+                             field->next_prio,
+                             T))
+               return TRACE_TYPE_PARTIAL_LINE;
+
+       return 0;
+}
+
+static int
+trace_ctx_raw(struct trace_seq *s, struct trace_entry *entry, int flags)
+{
+       return trace_ctxwake_raw(s, entry, flags, 0);
+}
+
+static int
+trace_wake_raw(struct trace_seq *s, struct trace_entry *entry, int flags)
+{
+       return trace_ctxwake_raw(s, entry, flags, '+');
+}
+
+
+static int
+trace_ctxwake_hex(struct trace_seq *s, struct trace_entry *entry, int flags,
+                 char S)
+{
+       struct ctx_switch_entry *field;
+       int T;
+
+       trace_assign_type(field, entry);
+
+       if (!S)
+               task_state_char(field->prev_state);
+       T = task_state_char(field->next_state);
+
+       SEQ_PUT_HEX_FIELD_RET(s, field->prev_pid);
+       SEQ_PUT_HEX_FIELD_RET(s, field->prev_prio);
+       SEQ_PUT_HEX_FIELD_RET(s, S);
+       SEQ_PUT_HEX_FIELD_RET(s, field->next_cpu);
+       SEQ_PUT_HEX_FIELD_RET(s, field->next_pid);
+       SEQ_PUT_HEX_FIELD_RET(s, field->next_prio);
+       SEQ_PUT_HEX_FIELD_RET(s, T);
+
+       return 0;
+}
+
+static int
+trace_ctx_hex(struct trace_seq *s, struct trace_entry *entry, int flags)
+{
+       return trace_ctxwake_hex(s, entry, flags, 0);
+}
+
+static int
+trace_wake_hex(struct trace_seq *s, struct trace_entry *entry, int flags)
+{
+       return trace_ctxwake_hex(s, entry, flags, '+');
+}
+
+static int
+trace_ctxwake_bin(struct trace_seq *s, struct trace_entry *entry, int flags)
+{
+       struct ctx_switch_entry *field;
+
+       trace_assign_type(field, entry);
+
+       SEQ_PUT_FIELD_RET(s, field->prev_pid);
+       SEQ_PUT_FIELD_RET(s, field->prev_prio);
+       SEQ_PUT_FIELD_RET(s, field->prev_state);
+       SEQ_PUT_FIELD_RET(s, field->next_pid);
+       SEQ_PUT_FIELD_RET(s, field->next_prio);
+       SEQ_PUT_FIELD_RET(s, field->next_state);
+
+       return 0;
+}
+
+static struct trace_event trace_ctx_event = {
+       .type           = TRACE_CTX,
+       .trace          = trace_ctx_print,
+       .latency_trace  = trace_ctx_print,
+       .raw            = trace_ctx_raw,
+       .hex            = trace_ctx_hex,
+       .binary         = trace_ctxwake_bin,
+};
+
+static struct trace_event trace_wake_event = {
+       .type           = TRACE_WAKE,
+       .trace          = trace_wake_print,
+       .latency_trace  = trace_wake_print,
+       .raw            = trace_wake_raw,
+       .hex            = trace_wake_hex,
+       .binary         = trace_ctxwake_bin,
+};
+
+/* TRACE_SPECIAL */
+static int
+trace_special_print(struct trace_seq *s, struct trace_entry *entry, int flags)
+{
+       struct special_entry *field;
+
+       trace_assign_type(field, entry);
+
+       if (!trace_seq_printf(s, "# %ld %ld %ld\n",
+                             field->arg1,
+                             field->arg2,
+                             field->arg3))
+               return TRACE_TYPE_PARTIAL_LINE;
+
+       return 0;
+}
+
+static int
+trace_special_hex(struct trace_seq *s, struct trace_entry *entry, int flags)
+{
+       struct special_entry *field;
+
+       trace_assign_type(field, entry);
+
+       SEQ_PUT_HEX_FIELD_RET(s, field->arg1);
+       SEQ_PUT_HEX_FIELD_RET(s, field->arg2);
+       SEQ_PUT_HEX_FIELD_RET(s, field->arg3);
+
+       return 0;
+}
+
+static int
+trace_special_bin(struct trace_seq *s, struct trace_entry *entry, int flags)
+{
+       struct special_entry *field;
+
+       trace_assign_type(field, entry);
+
+       SEQ_PUT_FIELD_RET(s, field->arg1);
+       SEQ_PUT_FIELD_RET(s, field->arg2);
+       SEQ_PUT_FIELD_RET(s, field->arg3);
+
+       return 0;
+}
+
+static struct trace_event trace_special_event = {
+       .type           = TRACE_SPECIAL,
+       .trace          = trace_special_print,
+       .latency_trace  = trace_special_print,
+       .raw            = trace_special_print,
+       .hex            = trace_special_hex,
+       .binary         = trace_special_bin,
+};
+
+/* TRACE_STACK */
+
+static int
+trace_stack_print(struct trace_seq *s, struct trace_entry *entry, int flags)
+{
+       struct stack_entry *field;
+       int i;
+
+       trace_assign_type(field, entry);
+
+       for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
+               if (i) {
+                       if (!trace_seq_puts(s, " <= "))
+                               goto partial;
+
+                       if (!seq_print_ip_sym(s, field->caller[i], flags))
+                               goto partial;
+               }
+               if (!trace_seq_puts(s, "\n"))
+                       goto partial;
+       }
+
+       return 0;
+
+ partial:
+       return TRACE_TYPE_PARTIAL_LINE;
+}
+
+static struct trace_event trace_stack_event = {
+       .type           = TRACE_STACK,
+       .trace          = trace_stack_print,
+       .latency_trace  = trace_stack_print,
+       .raw            = trace_special_print,
+       .hex            = trace_special_hex,
+       .binary         = trace_special_bin,
+};
+
+/* TRACE_USER_STACK */
+static int
+trace_user_stack_print(struct trace_seq *s, struct trace_entry *entry,
+                      int flags)
+{
+       struct userstack_entry *field;
+
+       trace_assign_type(field, entry);
+
+       if (!seq_print_userip_objs(field, s, flags))
+               goto partial;
+
+       if (!trace_seq_putc(s, '\n'))
+               goto partial;
+
+       return 0;
+
+ partial:
+       return TRACE_TYPE_PARTIAL_LINE;
+}
+
+static struct trace_event trace_user_stack_event = {
+       .type           = TRACE_USER_STACK,
+       .trace          = trace_user_stack_print,
+       .latency_trace  = trace_user_stack_print,
+       .raw            = trace_special_print,
+       .hex            = trace_special_hex,
+       .binary         = trace_special_bin,
+};
+
+/* TRACE_PRINT */
+static int
+trace_print_print(struct trace_seq *s, struct trace_entry *entry, int flags)
+{
+       struct print_entry *field;
+
+       trace_assign_type(field, entry);
+
+       if (!seq_print_ip_sym(s, field->ip, flags))
+               goto partial;
+
+       if (!trace_seq_printf(s, ": %s", field->buf))
+               goto partial;
+
+       return 0;
+
+ partial:
+       return TRACE_TYPE_PARTIAL_LINE;
+}
+
+static int
+trace_print_raw(struct trace_seq *s, struct trace_entry *entry, int flags)
+{
+       struct print_entry *field;
+
+       trace_assign_type(field, entry);
+
+       if (!trace_seq_printf(s, "# %lx %s", field->ip, field->buf))
+               goto partial;
+
+       return 0;
+
+ partial:
+       return TRACE_TYPE_PARTIAL_LINE;
+}
+
+static struct trace_event trace_print_event = {
+       .type           = TRACE_PRINT,
+       .trace          = trace_print_print,
+       .latency_trace  = trace_print_print,
+       .raw            = trace_print_raw,
+       .hex            = trace_nop_print,
+       .binary         = trace_nop_print,
+};
+
+static struct trace_event *events[] __initdata = {
+       &trace_fn_event,
+       &trace_ctx_event,
+       &trace_wake_event,
+       &trace_special_event,
+       &trace_stack_event,
+       &trace_user_stack_event,
+       &trace_print_event,
+       NULL
+};
+
+__init static int init_events(void)
+{
+       struct trace_event *event;
+       int i, ret;
+
+       for (i = 0; events[i]; i++) {
+               event = events[i];
+
+               ret = register_ftrace_event(event);
+               if (!ret) {
+                       printk(KERN_WARNING "event %d failed to register\n",
+                              event->type);
+                       WARN_ON_ONCE(1);
+               }
+       }
+
+       return 0;
+}
+device_initcall(init_events);
diff --git a/kernel/trace/trace_output.h b/kernel/trace/trace_output.h
new file mode 100644 (file)
index 0000000..1cbab5e
--- /dev/null
@@ -0,0 +1,60 @@
+#ifndef __TRACE_EVENTS_H
+#define __TRACE_EVENTS_H
+
+#include "trace.h"
+
+typedef int (*trace_print_func)(struct trace_seq *s, struct trace_entry *entry,
+                               int flags);
+
+struct trace_event {
+       struct hlist_node       node;
+       int                     type;
+       trace_print_func        trace;
+       trace_print_func        latency_trace;
+       trace_print_func        raw;
+       trace_print_func        hex;
+       trace_print_func        binary;
+};
+
+extern int trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
+       __attribute__ ((format (printf, 2, 3)));
+extern int
+seq_print_ip_sym(struct trace_seq *s, unsigned long ip,
+               unsigned long sym_flags);
+extern ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf,
+                                size_t cnt);
+int trace_seq_puts(struct trace_seq *s, const char *str);
+int trace_seq_putc(struct trace_seq *s, unsigned char c);
+int trace_seq_putmem(struct trace_seq *s, void *mem, size_t len);
+int trace_seq_putmem_hex(struct trace_seq *s, void *mem, size_t len);
+int trace_seq_path(struct trace_seq *s, struct path *path);
+int seq_print_userip_objs(const struct userstack_entry *entry,
+                         struct trace_seq *s, unsigned long sym_flags);
+int seq_print_user_ip(struct trace_seq *s, struct mm_struct *mm,
+                     unsigned long ip, unsigned long sym_flags);
+
+struct trace_event *ftrace_find_event(int type);
+int register_ftrace_event(struct trace_event *event);
+int unregister_ftrace_event(struct trace_event *event);
+
+int
+trace_nop_print(struct trace_seq *s, struct trace_entry *entry, int flags);
+
+#define MAX_MEMHEX_BYTES       8
+#define HEX_CHARS              (MAX_MEMHEX_BYTES*2 + 1)
+
+#define SEQ_PUT_FIELD_RET(s, x)                                \
+do {                                                   \
+       if (!trace_seq_putmem(s, &(x), sizeof(x)))      \
+               return TRACE_TYPE_PARTIAL_LINE;         \
+} while (0)
+
+#define SEQ_PUT_HEX_FIELD_RET(s, x)                    \
+do {                                                   \
+       BUILD_BUG_ON(sizeof(x) > MAX_MEMHEX_BYTES);     \
+       if (!trace_seq_putmem_hex(s, &(x), sizeof(x)))  \
+               return TRACE_TYPE_PARTIAL_LINE;         \
+} while (0)
+
+#endif
+
index 7bda248..faa6ab7 100644 (file)
@@ -16,6 +16,7 @@
 #include <linux/module.h>
 
 #include "trace.h"
+#include "trace_output.h"
 
 static struct trace_array *power_trace;
 static int __read_mostly trace_power_enabled;
index 43586b6..42ae1e7 100644 (file)
@@ -333,6 +333,7 @@ static void stop_wakeup_tracer(struct trace_array *tr)
 
 static int wakeup_tracer_init(struct trace_array *tr)
 {
+       tracing_max_latency = 0;
        wakeup_trace = tr;
        start_wakeup_tracer(tr);
        return 0;
index 88c8eb7..5013812 100644 (file)
@@ -9,7 +9,6 @@ static inline int trace_valid_entry(struct trace_entry *entry)
        case TRACE_FN:
        case TRACE_CTX:
        case TRACE_WAKE:
-       case TRACE_CONT:
        case TRACE_STACK:
        case TRACE_PRINT:
        case TRACE_SPECIAL:
diff --git a/kernel/trace/trace_stat.c b/kernel/trace/trace_stat.c
new file mode 100644 (file)
index 0000000..eae9cef
--- /dev/null
@@ -0,0 +1,319 @@
+/*
+ * Infrastructure for statistic tracing (histogram output).
+ *
+ * Copyright (C) 2008 Frederic Weisbecker <fweisbec@gmail.com>
+ *
+ * Based on the code from trace_branch.c which is
+ * Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com>
+ *
+ */
+
+
+#include <linux/list.h>
+#include <linux/debugfs.h>
+#include "trace_stat.h"
+#include "trace.h"
+
+
+/* List of stat entries from a tracer */
+struct trace_stat_list {
+       struct list_head        list;
+       void                    *stat;
+};
+
+/* A stat session is the stats output in one file */
+struct tracer_stat_session {
+       struct list_head        session_list;
+       struct tracer_stat      *ts;
+       struct list_head        stat_list;
+       struct mutex            stat_mutex;
+       struct dentry           *file;
+};
+
+/* All of the sessions currently in use. Each stat file embeed one session */
+static LIST_HEAD(all_stat_sessions);
+static DEFINE_MUTEX(all_stat_sessions_mutex);
+
+/* The root directory for all stat files */
+static struct dentry           *stat_dir;
+
+
+static void reset_stat_session(struct tracer_stat_session *session)
+{
+       struct trace_stat_list *node, *next;
+
+       list_for_each_entry_safe(node, next, &session->stat_list, list)
+               kfree(node);
+
+       INIT_LIST_HEAD(&session->stat_list);
+}
+
+static void destroy_session(struct tracer_stat_session *session)
+{
+       debugfs_remove(session->file);
+       reset_stat_session(session);
+       mutex_destroy(&session->stat_mutex);
+       kfree(session);
+}
+
+/*
+ * For tracers that don't provide a stat_cmp callback.
+ * This one will force an immediate insertion on tail of
+ * the list.
+ */
+static int dummy_cmp(void *p1, void *p2)
+{
+       return 1;
+}
+
+/*
+ * Initialize the stat list at each trace_stat file opening.
+ * All of these copies and sorting are required on all opening
+ * since the stats could have changed between two file sessions.
+ */
+static int stat_seq_init(struct tracer_stat_session *session)
+{
+       struct trace_stat_list *iter_entry, *new_entry;
+       struct tracer_stat *ts = session->ts;
+       void *prev_stat;
+       int ret = 0;
+       int i;
+
+       mutex_lock(&session->stat_mutex);
+       reset_stat_session(session);
+
+       if (!ts->stat_cmp)
+               ts->stat_cmp = dummy_cmp;
+
+       /*
+        * The first entry. Actually this is the second, but the first
+        * one (the stat_list head) is pointless.
+        */
+       new_entry = kmalloc(sizeof(struct trace_stat_list), GFP_KERNEL);
+       if (!new_entry) {
+               ret = -ENOMEM;
+               goto exit;
+       }
+
+       INIT_LIST_HEAD(&new_entry->list);
+
+       list_add(&new_entry->list, &session->stat_list);
+
+       new_entry->stat = ts->stat_start();
+       prev_stat = new_entry->stat;
+
+       /*
+        * Iterate over the tracer stat entries and store them in a sorted
+        * list.
+        */
+       for (i = 1; ; i++) {
+               new_entry = kmalloc(sizeof(struct trace_stat_list), GFP_KERNEL);
+               if (!new_entry) {
+                       ret = -ENOMEM;
+                       goto exit_free_list;
+               }
+
+               INIT_LIST_HEAD(&new_entry->list);
+               new_entry->stat = ts->stat_next(prev_stat, i);
+
+               /* End of insertion */
+               if (!new_entry->stat)
+                       break;
+
+               list_for_each_entry(iter_entry, &session->stat_list, list) {
+
+                       /* Insertion with a descendent sorting */
+                       if (ts->stat_cmp(new_entry->stat,
+                                               iter_entry->stat) > 0) {
+
+                               list_add_tail(&new_entry->list,
+                                               &iter_entry->list);
+                               break;
+
+                       /* The current smaller value */
+                       } else if (list_is_last(&iter_entry->list,
+                                               &session->stat_list)) {
+                               list_add(&new_entry->list, &iter_entry->list);
+                               break;
+                       }
+               }
+
+               prev_stat = new_entry->stat;
+       }
+exit:
+       mutex_unlock(&session->stat_mutex);
+       return ret;
+
+exit_free_list:
+       reset_stat_session(session);
+       mutex_unlock(&session->stat_mutex);
+       return ret;
+}
+
+
+static void *stat_seq_start(struct seq_file *s, loff_t *pos)
+{
+       struct tracer_stat_session *session = s->private;
+
+       /* Prevent from tracer switch or stat_list modification */
+       mutex_lock(&session->stat_mutex);
+
+       /* If we are in the beginning of the file, print the headers */
+       if (!*pos && session->ts->stat_headers)
+               session->ts->stat_headers(s);
+
+       return seq_list_start(&session->stat_list, *pos);
+}
+
+static void *stat_seq_next(struct seq_file *s, void *p, loff_t *pos)
+{
+       struct tracer_stat_session *session = s->private;
+
+       return seq_list_next(p, &session->stat_list, pos);
+}
+
+static void stat_seq_stop(struct seq_file *s, void *p)
+{
+       struct tracer_stat_session *session = s->private;
+       mutex_unlock(&session->stat_mutex);
+}
+
+static int stat_seq_show(struct seq_file *s, void *v)
+{
+       struct tracer_stat_session *session = s->private;
+       struct trace_stat_list *l = list_entry(v, struct trace_stat_list, list);
+
+       return session->ts->stat_show(s, l->stat);
+}
+
+static const struct seq_operations trace_stat_seq_ops = {
+       .start          = stat_seq_start,
+       .next           = stat_seq_next,
+       .stop           = stat_seq_stop,
+       .show           = stat_seq_show
+};
+
+/* The session stat is refilled and resorted at each stat file opening */
+static int tracing_stat_open(struct inode *inode, struct file *file)
+{
+       int ret;
+
+       struct tracer_stat_session *session = inode->i_private;
+
+       ret = seq_open(file, &trace_stat_seq_ops);
+       if (!ret) {
+               struct seq_file *m = file->private_data;
+               m->private = session;
+               ret = stat_seq_init(session);
+       }
+
+       return ret;
+}
+
+/*
+ * Avoid consuming memory with our now useless list.
+ */
+static int tracing_stat_release(struct inode *i, struct file *f)
+{
+       struct tracer_stat_session *session = i->i_private;
+
+       mutex_lock(&session->stat_mutex);
+       reset_stat_session(session);
+       mutex_unlock(&session->stat_mutex);
+
+       return 0;
+}
+
+static const struct file_operations tracing_stat_fops = {
+       .open           = tracing_stat_open,
+       .read           = seq_read,
+       .llseek         = seq_lseek,
+       .release        = tracing_stat_release
+};
+
+static int tracing_stat_init(void)
+{
+       struct dentry *d_tracing;
+
+       d_tracing = tracing_init_dentry();
+
+       stat_dir = debugfs_create_dir("trace_stat", d_tracing);
+       if (!stat_dir)
+               pr_warning("Could not create debugfs "
+                          "'trace_stat' entry\n");
+       return 0;
+}
+
+static int init_stat_file(struct tracer_stat_session *session)
+{
+       if (!stat_dir && tracing_stat_init())
+               return -ENODEV;
+
+       session->file = debugfs_create_file(session->ts->name, 0644,
+                                           stat_dir,
+                                           session, &tracing_stat_fops);
+       if (!session->file)
+               return -ENOMEM;
+       return 0;
+}
+
+int register_stat_tracer(struct tracer_stat *trace)
+{
+       struct tracer_stat_session *session, *node, *tmp;
+       int ret;
+
+       if (!trace)
+               return -EINVAL;
+
+       if (!trace->stat_start || !trace->stat_next || !trace->stat_show)
+               return -EINVAL;
+
+       /* Already registered? */
+       mutex_lock(&all_stat_sessions_mutex);
+       list_for_each_entry_safe(node, tmp, &all_stat_sessions, session_list) {
+               if (node->ts == trace) {
+                       mutex_unlock(&all_stat_sessions_mutex);
+                       return -EINVAL;
+               }
+       }
+       mutex_unlock(&all_stat_sessions_mutex);
+
+       /* Init the session */
+       session = kmalloc(sizeof(struct tracer_stat_session), GFP_KERNEL);
+       if (!session)
+               return -ENOMEM;
+
+       session->ts = trace;
+       INIT_LIST_HEAD(&session->session_list);
+       INIT_LIST_HEAD(&session->stat_list);
+       mutex_init(&session->stat_mutex);
+       session->file = NULL;
+
+       ret = init_stat_file(session);
+       if (ret) {
+               destroy_session(session);
+               return ret;
+       }
+
+       /* Register */
+       mutex_lock(&all_stat_sessions_mutex);
+       list_add_tail(&session->session_list, &all_stat_sessions);
+       mutex_unlock(&all_stat_sessions_mutex);
+
+       return 0;
+}
+
+void unregister_stat_tracer(struct tracer_stat *trace)
+{
+       struct tracer_stat_session *node, *tmp;
+
+       mutex_lock(&all_stat_sessions_mutex);
+       list_for_each_entry_safe(node, tmp, &all_stat_sessions, session_list) {
+               if (node->ts == trace) {
+                       list_del(&node->session_list);
+                       destroy_session(node);
+                       break;
+               }
+       }
+       mutex_unlock(&all_stat_sessions_mutex);
+}
diff --git a/kernel/trace/trace_stat.h b/kernel/trace/trace_stat.h
new file mode 100644 (file)
index 0000000..202274c
--- /dev/null
@@ -0,0 +1,31 @@
+#ifndef __TRACE_STAT_H
+#define __TRACE_STAT_H
+
+#include <linux/seq_file.h>
+
+/*
+ * If you want to provide a stat file (one-shot statistics), fill
+ * an iterator with stat_start/stat_next and a stat_show callbacks.
+ * The others callbacks are optional.
+ */
+struct tracer_stat {
+       /* The name of your stat file */
+       const char              *name;
+       /* Iteration over statistic entries */
+       void                    *(*stat_start)(void);
+       void                    *(*stat_next)(void *prev, int idx);
+       /* Compare two entries for stats sorting */
+       int                     (*stat_cmp)(void *p1, void *p2);
+       /* Print a stat entry */
+       int                     (*stat_show)(struct seq_file *s, void *p);
+       /* Print the headers of your stat entries */
+       int                     (*stat_headers)(struct seq_file *s);
+};
+
+/*
+ * Destroy or create a stat file
+ */
+extern int register_stat_tracer(struct tracer_stat *trace);
+extern void unregister_stat_tracer(struct tracer_stat *trace);
+
+#endif /* __TRACE_STAT_H */
diff --git a/kernel/trace/trace_workqueue.c b/kernel/trace/trace_workqueue.c
new file mode 100644 (file)
index 0000000..f8118d3
--- /dev/null
@@ -0,0 +1,287 @@
+/*
+ * Workqueue statistical tracer.
+ *
+ * Copyright (C) 2008 Frederic Weisbecker <fweisbec@gmail.com>
+ *
+ */
+
+
+#include <trace/workqueue.h>
+#include <linux/list.h>
+#include "trace_stat.h"
+#include "trace.h"
+
+
+/* A cpu workqueue thread */
+struct cpu_workqueue_stats {
+       struct list_head            list;
+/* Useful to know if we print the cpu headers */
+       bool                        first_entry;
+       int                         cpu;
+       pid_t                       pid;
+/* Can be inserted from interrupt or user context, need to be atomic */
+       atomic_t                    inserted;
+/*
+ *  Don't need to be atomic, works are serialized in a single workqueue thread
+ *  on a single CPU.
+ */
+       unsigned int                executed;
+};
+
+/* List of workqueue threads on one cpu */
+struct workqueue_global_stats {
+       struct list_head        list;
+       spinlock_t              lock;
+};
+
+/* Don't need a global lock because allocated before the workqueues, and
+ * never freed.
+ */
+static struct workqueue_global_stats *all_workqueue_stat;
+
+/* Insertion of a work */
+static void
+probe_workqueue_insertion(struct task_struct *wq_thread,
+                         struct work_struct *work)
+{
+       int cpu = cpumask_first(&wq_thread->cpus_allowed);
+       struct cpu_workqueue_stats *node, *next;
+       unsigned long flags;
+
+       spin_lock_irqsave(&all_workqueue_stat[cpu].lock, flags);
+       list_for_each_entry_safe(node, next, &all_workqueue_stat[cpu].list,
+                                                       list) {
+               if (node->pid == wq_thread->pid) {
+                       atomic_inc(&node->inserted);
+                       goto found;
+               }
+       }
+       pr_debug("trace_workqueue: entry not found\n");
+found:
+       spin_unlock_irqrestore(&all_workqueue_stat[cpu].lock, flags);
+}
+
+/* Execution of a work */
+static void
+probe_workqueue_execution(struct task_struct *wq_thread,
+                         struct work_struct *work)
+{
+       int cpu = cpumask_first(&wq_thread->cpus_allowed);
+       struct cpu_workqueue_stats *node, *next;
+       unsigned long flags;
+
+       spin_lock_irqsave(&all_workqueue_stat[cpu].lock, flags);
+       list_for_each_entry_safe(node, next, &all_workqueue_stat[cpu].list,
+                                                       list) {
+               if (node->pid == wq_thread->pid) {
+                       node->executed++;
+                       goto found;
+               }
+       }
+       pr_debug("trace_workqueue: entry not found\n");
+found:
+       spin_unlock_irqrestore(&all_workqueue_stat[cpu].lock, flags);
+}
+
+/* Creation of a cpu workqueue thread */
+static void probe_workqueue_creation(struct task_struct *wq_thread, int cpu)
+{
+       struct cpu_workqueue_stats *cws;
+       unsigned long flags;
+
+       WARN_ON(cpu < 0 || cpu >= num_possible_cpus());
+
+       /* Workqueues are sometimes created in atomic context */
+       cws = kzalloc(sizeof(struct cpu_workqueue_stats), GFP_ATOMIC);
+       if (!cws) {
+               pr_warning("trace_workqueue: not enough memory\n");
+               return;
+       }
+       tracing_record_cmdline(wq_thread);
+
+       INIT_LIST_HEAD(&cws->list);
+       cws->cpu = cpu;
+
+       cws->pid = wq_thread->pid;
+
+       spin_lock_irqsave(&all_workqueue_stat[cpu].lock, flags);
+       if (list_empty(&all_workqueue_stat[cpu].list))
+               cws->first_entry = true;
+       list_add_tail(&cws->list, &all_workqueue_stat[cpu].list);
+       spin_unlock_irqrestore(&all_workqueue_stat[cpu].lock, flags);
+}
+
+/* Destruction of a cpu workqueue thread */
+static void probe_workqueue_destruction(struct task_struct *wq_thread)
+{
+       /* Workqueue only execute on one cpu */
+       int cpu = cpumask_first(&wq_thread->cpus_allowed);
+       struct cpu_workqueue_stats *node, *next;
+       unsigned long flags;
+
+       spin_lock_irqsave(&all_workqueue_stat[cpu].lock, flags);
+       list_for_each_entry_safe(node, next, &all_workqueue_stat[cpu].list,
+                                                       list) {
+               if (node->pid == wq_thread->pid) {
+                       list_del(&node->list);
+                       kfree(node);
+                       goto found;
+               }
+       }
+
+       pr_debug("trace_workqueue: don't find workqueue to destroy\n");
+found:
+       spin_unlock_irqrestore(&all_workqueue_stat[cpu].lock, flags);
+
+}
+
+static struct cpu_workqueue_stats *workqueue_stat_start_cpu(int cpu)
+{
+       unsigned long flags;
+       struct cpu_workqueue_stats *ret = NULL;
+
+
+       spin_lock_irqsave(&all_workqueue_stat[cpu].lock, flags);
+
+       if (!list_empty(&all_workqueue_stat[cpu].list))
+               ret = list_entry(all_workqueue_stat[cpu].list.next,
+                                struct cpu_workqueue_stats, list);
+
+       spin_unlock_irqrestore(&all_workqueue_stat[cpu].lock, flags);
+
+       return ret;
+}
+
+static void *workqueue_stat_start(void)
+{
+       int cpu;
+       void *ret = NULL;
+
+       for_each_possible_cpu(cpu) {
+               ret = workqueue_stat_start_cpu(cpu);
+               if (ret)
+                       return ret;
+       }
+       return NULL;
+}
+
+static void *workqueue_stat_next(void *prev, int idx)
+{
+       struct cpu_workqueue_stats *prev_cws = prev;
+       int cpu = prev_cws->cpu;
+       unsigned long flags;
+       void *ret = NULL;
+
+       spin_lock_irqsave(&all_workqueue_stat[cpu].lock, flags);
+       if (list_is_last(&prev_cws->list, &all_workqueue_stat[cpu].list)) {
+               spin_unlock_irqrestore(&all_workqueue_stat[cpu].lock, flags);
+               for (++cpu ; cpu < num_possible_cpus(); cpu++) {
+                       ret = workqueue_stat_start_cpu(cpu);
+                       if (ret)
+                               return ret;
+               }
+               return NULL;
+       }
+       spin_unlock_irqrestore(&all_workqueue_stat[cpu].lock, flags);
+
+       return list_entry(prev_cws->list.next, struct cpu_workqueue_stats,
+                         list);
+}
+
+static int workqueue_stat_show(struct seq_file *s, void *p)
+{
+       struct cpu_workqueue_stats *cws = p;
+       unsigned long flags;
+       int cpu = cws->cpu;
+
+       seq_printf(s, "%3d %6d     %6u       %s\n", cws->cpu,
+                  atomic_read(&cws->inserted),
+                  cws->executed,
+                  trace_find_cmdline(cws->pid));
+
+       spin_lock_irqsave(&all_workqueue_stat[cpu].lock, flags);
+       if (&cws->list == all_workqueue_stat[cpu].list.next)
+               seq_printf(s, "\n");
+       spin_unlock_irqrestore(&all_workqueue_stat[cpu].lock, flags);
+
+       return 0;
+}
+
+static int workqueue_stat_headers(struct seq_file *s)
+{
+       seq_printf(s, "# CPU  INSERTED  EXECUTED   NAME\n");
+       seq_printf(s, "# |      |         |          |\n\n");
+       return 0;
+}
+
+struct tracer_stat workqueue_stats __read_mostly = {
+       .name = "workqueues",
+       .stat_start = workqueue_stat_start,
+       .stat_next = workqueue_stat_next,
+       .stat_show = workqueue_stat_show,
+       .stat_headers = workqueue_stat_headers
+};
+
+
+int __init stat_workqueue_init(void)
+{
+       if (register_stat_tracer(&workqueue_stats)) {
+               pr_warning("Unable to register workqueue stat tracer\n");
+               return 1;
+       }
+
+       return 0;
+}
+fs_initcall(stat_workqueue_init);
+
+/*
+ * Workqueues are created very early, just after pre-smp initcalls.
+ * So we must register our tracepoints at this stage.
+ */
+int __init trace_workqueue_early_init(void)
+{
+       int ret, cpu;
+
+       ret = register_trace_workqueue_insertion(probe_workqueue_insertion);
+       if (ret)
+               goto out;
+
+       ret = register_trace_workqueue_execution(probe_workqueue_execution);
+       if (ret)
+               goto no_insertion;
+
+       ret = register_trace_workqueue_creation(probe_workqueue_creation);
+       if (ret)
+               goto no_execution;
+
+       ret = register_trace_workqueue_destruction(probe_workqueue_destruction);
+       if (ret)
+               goto no_creation;
+
+       all_workqueue_stat = kmalloc(sizeof(struct workqueue_global_stats)
+                                    * num_possible_cpus(), GFP_KERNEL);
+
+       if (!all_workqueue_stat) {
+               pr_warning("trace_workqueue: not enough memory\n");
+               goto no_creation;
+       }
+
+       for_each_possible_cpu(cpu) {
+               spin_lock_init(&all_workqueue_stat[cpu].lock);
+               INIT_LIST_HEAD(&all_workqueue_stat[cpu].list);
+       }
+
+       return 0;
+
+no_creation:
+       unregister_trace_workqueue_creation(probe_workqueue_creation);
+no_execution:
+       unregister_trace_workqueue_execution(probe_workqueue_execution);
+no_insertion:
+       unregister_trace_workqueue_insertion(probe_workqueue_insertion);
+out:
+       pr_warning("trace_workqueue: unable to trace workqueues\n");
+
+       return 1;
+}
+early_initcall(trace_workqueue_early_init);
index 2f44583..1fc2bc2 100644 (file)
@@ -33,6 +33,7 @@
 #include <linux/kallsyms.h>
 #include <linux/debug_locks.h>
 #include <linux/lockdep.h>
+#include <trace/workqueue.h>
 
 /*
  * The per-CPU workqueue (if single thread, we always use the first
@@ -125,9 +126,13 @@ struct cpu_workqueue_struct *get_wq_data(struct work_struct *work)
        return (void *) (atomic_long_read(&work->data) & WORK_STRUCT_WQ_DATA_MASK);
 }
 
+DEFINE_TRACE(workqueue_insertion);
+
 static void insert_work(struct cpu_workqueue_struct *cwq,
                        struct work_struct *work, struct list_head *head)
 {
+       trace_workqueue_insertion(cwq->thread, work);
+
        set_wq_data(work, cwq);
        /*
         * Ensure that we get the right work->data if we see the
@@ -259,6 +264,8 @@ int queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
 }
 EXPORT_SYMBOL_GPL(queue_delayed_work_on);
 
+DEFINE_TRACE(workqueue_execution);
+
 static void run_workqueue(struct cpu_workqueue_struct *cwq)
 {
        spin_lock_irq(&cwq->lock);
@@ -284,7 +291,7 @@ static void run_workqueue(struct cpu_workqueue_struct *cwq)
                 */
                struct lockdep_map lockdep_map = work->lockdep_map;
 #endif
-
+               trace_workqueue_execution(cwq->thread, work);
                cwq->current_work = work;
                list_del_init(cwq->worklist.next);
                spin_unlock_irq(&cwq->lock);
@@ -765,6 +772,8 @@ init_cpu_workqueue(struct workqueue_struct *wq, int cpu)
        return cwq;
 }
 
+DEFINE_TRACE(workqueue_creation);
+
 static int create_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu)
 {
        struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
@@ -787,6 +796,8 @@ static int create_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu)
                sched_setscheduler_nocheck(p, SCHED_FIFO, &param);
        cwq->thread = p;
 
+       trace_workqueue_creation(cwq->thread, cpu);
+
        return 0;
 }
 
@@ -868,6 +879,8 @@ struct workqueue_struct *__create_workqueue_key(const char *name,
 }
 EXPORT_SYMBOL_GPL(__create_workqueue_key);
 
+DEFINE_TRACE(workqueue_destruction);
+
 static void cleanup_workqueue_thread(struct cpu_workqueue_struct *cwq)
 {
        /*
@@ -891,6 +904,7 @@ static void cleanup_workqueue_thread(struct cpu_workqueue_struct *cwq)
         * checks list_empty(), and a "normal" queue_work() can't use
         * a dead CPU.
         */
+       trace_workqueue_destruction(cwq->thread);
        kthread_stop(cwq->thread);
        cwq->thread = NULL;
 }
index ddc41f3..dae716b 100644 (file)
--- a/mm/slab.c
+++ b/mm/slab.c
 #include       <linux/cpu.h>
 #include       <linux/sysctl.h>
 #include       <linux/module.h>
+#include       <trace/kmemtrace.h>
 #include       <linux/rcupdate.h>
 #include       <linux/string.h>
 #include       <linux/uaccess.h>
@@ -568,6 +569,14 @@ static void **dbg_userword(struct kmem_cache *cachep, void *objp)
 
 #endif
 
+#ifdef CONFIG_KMEMTRACE
+size_t slab_buffer_size(struct kmem_cache *cachep)
+{
+       return cachep->buffer_size;
+}
+EXPORT_SYMBOL(slab_buffer_size);
+#endif
+
 /*
  * Do not go above this order unless 0 objects fit into the slab.
  */
@@ -3550,10 +3559,23 @@ static inline void __cache_free(struct kmem_cache *cachep, void *objp)
  */
 void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags)
 {
-       return __cache_alloc(cachep, flags, __builtin_return_address(0));
+       void *ret = __cache_alloc(cachep, flags, __builtin_return_address(0));
+
+       kmemtrace_mark_alloc(KMEMTRACE_TYPE_CACHE, _RET_IP_, ret,
+                            obj_size(cachep), cachep->buffer_size, flags);
+
+       return ret;
 }
 EXPORT_SYMBOL(kmem_cache_alloc);
 
+#ifdef CONFIG_KMEMTRACE
+void *kmem_cache_alloc_notrace(struct kmem_cache *cachep, gfp_t flags)
+{
+       return __cache_alloc(cachep, flags, __builtin_return_address(0));
+}
+EXPORT_SYMBOL(kmem_cache_alloc_notrace);
+#endif
+
 /**
  * kmem_ptr_validate - check if an untrusted pointer might be a slab entry.
  * @cachep: the cache we're checking against
@@ -3598,23 +3620,47 @@ out:
 #ifdef CONFIG_NUMA
 void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid)
 {
-       return __cache_alloc_node(cachep, flags, nodeid,
-                       __builtin_return_address(0));
+       void *ret = __cache_alloc_node(cachep, flags, nodeid,
+                                      __builtin_return_address(0));
+
+       kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_CACHE, _RET_IP_, ret,
+                                 obj_size(cachep), cachep->buffer_size,
+                                 flags, nodeid);
+
+       return ret;
 }
 EXPORT_SYMBOL(kmem_cache_alloc_node);
 
+#ifdef CONFIG_KMEMTRACE
+void *kmem_cache_alloc_node_notrace(struct kmem_cache *cachep,
+                                   gfp_t flags,
+                                   int nodeid)
+{
+       return __cache_alloc_node(cachep, flags, nodeid,
+                                 __builtin_return_address(0));
+}
+EXPORT_SYMBOL(kmem_cache_alloc_node_notrace);
+#endif
+
 static __always_inline void *
 __do_kmalloc_node(size_t size, gfp_t flags, int node, void *caller)
 {
        struct kmem_cache *cachep;
+       void *ret;
 
        cachep = kmem_find_general_cachep(size, flags);
        if (unlikely(ZERO_OR_NULL_PTR(cachep)))
                return cachep;
-       return kmem_cache_alloc_node(cachep, flags, node);
+       ret = kmem_cache_alloc_node_notrace(cachep, flags, node);
+
+       kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_KMALLOC,
+                                 (unsigned long) caller, ret,
+                                 size, cachep->buffer_size, flags, node);
+
+       return ret;
 }
 
-#ifdef CONFIG_DEBUG_SLAB
+#if defined(CONFIG_DEBUG_SLAB) || defined(CONFIG_KMEMTRACE)
 void *__kmalloc_node(size_t size, gfp_t flags, int node)
 {
        return __do_kmalloc_node(size, flags, node,
@@ -3647,6 +3693,7 @@ static __always_inline void *__do_kmalloc(size_t size, gfp_t flags,
                                          void *caller)
 {
        struct kmem_cache *cachep;
+       void *ret;
 
        /* If you want to save a few bytes .text space: replace
         * __ with kmem_.
@@ -3656,11 +3703,17 @@ static __always_inline void *__do_kmalloc(size_t size, gfp_t flags,
        cachep = __find_general_cachep(size, flags);
        if (unlikely(ZERO_OR_NULL_PTR(cachep)))
                return cachep;
-       return __cache_alloc(cachep, flags, caller);
+       ret = __cache_alloc(cachep, flags, caller);
+
+       kmemtrace_mark_alloc(KMEMTRACE_TYPE_KMALLOC,
+                            (unsigned long) caller, ret,
+                            size, cachep->buffer_size, flags);
+
+       return ret;
 }
 
 
-#ifdef CONFIG_DEBUG_SLAB
+#if defined(CONFIG_DEBUG_SLAB) || defined(CONFIG_KMEMTRACE)
 void *__kmalloc(size_t size, gfp_t flags)
 {
        return __do_kmalloc(size, flags, __builtin_return_address(0));
@@ -3699,6 +3752,8 @@ void kmem_cache_free(struct kmem_cache *cachep, void *objp)
                debug_check_no_obj_freed(objp, obj_size(cachep));
        __cache_free(cachep, objp);
        local_irq_restore(flags);
+
+       kmemtrace_mark_free(KMEMTRACE_TYPE_CACHE, _RET_IP_, objp);
 }
 EXPORT_SYMBOL(kmem_cache_free);
 
@@ -3725,6 +3780,8 @@ void kfree(const void *objp)
        debug_check_no_obj_freed(objp, obj_size(c));
        __cache_free(c, (void *)objp);
        local_irq_restore(flags);
+
+       kmemtrace_mark_free(KMEMTRACE_TYPE_KMALLOC, _RET_IP_, objp);
 }
 EXPORT_SYMBOL(kfree);
 
index bf7e8fc..4d1c0fc 100644 (file)
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -65,6 +65,7 @@
 #include <linux/module.h>
 #include <linux/rcupdate.h>
 #include <linux/list.h>
+#include <trace/kmemtrace.h>
 #include <asm/atomic.h>
 
 /*
@@ -463,27 +464,38 @@ void *__kmalloc_node(size_t size, gfp_t gfp, int node)
 {
        unsigned int *m;
        int align = max(ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN);
+       void *ret;
 
        if (size < PAGE_SIZE - align) {
                if (!size)
                        return ZERO_SIZE_PTR;
 
                m = slob_alloc(size + align, gfp, align, node);
+
                if (!m)
                        return NULL;
                *m = size;
-               return (void *)m + align;
+               ret = (void *)m + align;
+
+               kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_KMALLOC,
+                                         _RET_IP_, ret,
+                                         size, size + align, gfp, node);
        } else {
-               void *ret;
+               unsigned int order = get_order(size);
 
-               ret = slob_new_page(gfp | __GFP_COMP, get_order(size), node);
+               ret = slob_new_page(gfp | __GFP_COMP, order, node);
                if (ret) {
                        struct page *page;
                        page = virt_to_page(ret);
                        page->private = size;
                }
-               return ret;
+
+               kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_KMALLOC,
+                                         _RET_IP_, ret,
+                                         size, PAGE_SIZE << order, gfp, node);
        }
+
+       return ret;
 }
 EXPORT_SYMBOL(__kmalloc_node);
 
@@ -501,6 +513,8 @@ void kfree(const void *block)
                slob_free(m, *m + align);
        } else
                put_page(&sp->page);
+
+       kmemtrace_mark_free(KMEMTRACE_TYPE_KMALLOC, _RET_IP_, block);
 }
 EXPORT_SYMBOL(kfree);
 
@@ -569,10 +583,19 @@ void *kmem_cache_alloc_node(struct kmem_cache *c, gfp_t flags, int node)
 {
        void *b;
 
-       if (c->size < PAGE_SIZE)
+       if (c->size < PAGE_SIZE) {
                b = slob_alloc(c->size, flags, c->align, node);
-       else
+               kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_CACHE,
+                                         _RET_IP_, b, c->size,
+                                         SLOB_UNITS(c->size) * SLOB_UNIT,
+                                         flags, node);
+       } else {
                b = slob_new_page(flags, get_order(c->size), node);
+               kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_CACHE,
+                                         _RET_IP_, b, c->size,
+                                         PAGE_SIZE << get_order(c->size),
+                                         flags, node);
+       }
 
        if (c->ctor)
                c->ctor(b);
@@ -608,6 +631,8 @@ void kmem_cache_free(struct kmem_cache *c, void *b)
        } else {
                __kmem_cache_free(b, c->size);
        }
+
+       kmemtrace_mark_free(KMEMTRACE_TYPE_CACHE, _RET_IP_, b);
 }
 EXPORT_SYMBOL(kmem_cache_free);
 
index 6392ae5..f657c88 100644 (file)
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -16,6 +16,7 @@
 #include <linux/slab.h>
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
+#include <trace/kmemtrace.h>
 #include <linux/cpu.h>
 #include <linux/cpuset.h>
 #include <linux/mempolicy.h>
@@ -1623,18 +1624,46 @@ static __always_inline void *slab_alloc(struct kmem_cache *s,
 
 void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags)
 {
-       return slab_alloc(s, gfpflags, -1, _RET_IP_);
+       void *ret = slab_alloc(s, gfpflags, -1, _RET_IP_);
+
+       kmemtrace_mark_alloc(KMEMTRACE_TYPE_CACHE, _RET_IP_, ret,
+                            s->objsize, s->size, gfpflags);
+
+       return ret;
 }
 EXPORT_SYMBOL(kmem_cache_alloc);
 
+#ifdef CONFIG_KMEMTRACE
+void *kmem_cache_alloc_notrace(struct kmem_cache *s, gfp_t gfpflags)
+{
+       return slab_alloc(s, gfpflags, -1, _RET_IP_);
+}
+EXPORT_SYMBOL(kmem_cache_alloc_notrace);
+#endif
+
 #ifdef CONFIG_NUMA
 void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node)
 {
-       return slab_alloc(s, gfpflags, node, _RET_IP_);
+       void *ret = slab_alloc(s, gfpflags, node, _RET_IP_);
+
+       kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_CACHE, _RET_IP_, ret,
+                                 s->objsize, s->size, gfpflags, node);
+
+       return ret;
 }
 EXPORT_SYMBOL(kmem_cache_alloc_node);
 #endif
 
+#ifdef CONFIG_KMEMTRACE
+void *kmem_cache_alloc_node_notrace(struct kmem_cache *s,
+                                   gfp_t gfpflags,
+                                   int node)
+{
+       return slab_alloc(s, gfpflags, node, _RET_IP_);
+}
+EXPORT_SYMBOL(kmem_cache_alloc_node_notrace);
+#endif
+
 /*
  * Slow patch handling. This may still be called frequently since objects
  * have a longer lifetime than the cpu slabs in most processing loads.
@@ -1742,6 +1771,8 @@ void kmem_cache_free(struct kmem_cache *s, void *x)
        page = virt_to_head_page(x);
 
        slab_free(s, page, x, _RET_IP_);
+
+       kmemtrace_mark_free(KMEMTRACE_TYPE_CACHE, _RET_IP_, x);
 }
 EXPORT_SYMBOL(kmem_cache_free);
 
@@ -2657,6 +2688,7 @@ static struct kmem_cache *get_slab(size_t size, gfp_t flags)
 void *__kmalloc(size_t size, gfp_t flags)
 {
        struct kmem_cache *s;
+       void *ret;
 
        if (unlikely(size > PAGE_SIZE))
                return kmalloc_large(size, flags);
@@ -2666,7 +2698,12 @@ void *__kmalloc(size_t size, gfp_t flags)
        if (unlikely(ZERO_OR_NULL_PTR(s)))
                return s;
 
-       return slab_alloc(s, flags, -1, _RET_IP_);
+       ret = slab_alloc(s, flags, -1, _RET_IP_);
+
+       kmemtrace_mark_alloc(KMEMTRACE_TYPE_KMALLOC, _RET_IP_, ret,
+                            size, s->size, flags);
+
+       return ret;
 }
 EXPORT_SYMBOL(__kmalloc);
 
@@ -2685,16 +2722,30 @@ static void *kmalloc_large_node(size_t size, gfp_t flags, int node)
 void *__kmalloc_node(size_t size, gfp_t flags, int node)
 {
        struct kmem_cache *s;
+       void *ret;
 
-       if (unlikely(size > PAGE_SIZE))
-               return kmalloc_large_node(size, flags, node);
+       if (unlikely(size > PAGE_SIZE)) {
+               ret = kmalloc_large_node(size, flags, node);
+
+               kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_KMALLOC,
+                                         _RET_IP_, ret,
+                                         size, PAGE_SIZE << get_order(size),
+                                         flags, node);
+
+               return ret;
+       }
 
        s = get_slab(size, flags);
 
        if (unlikely(ZERO_OR_NULL_PTR(s)))
                return s;
 
-       return slab_alloc(s, flags, node, _RET_IP_);
+       ret = slab_alloc(s, flags, node, _RET_IP_);
+
+       kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_KMALLOC, _RET_IP_, ret,
+                                 size, s->size, flags, node);
+
+       return ret;
 }
 EXPORT_SYMBOL(__kmalloc_node);
 #endif
@@ -2752,6 +2803,8 @@ void kfree(const void *x)
                return;
        }
        slab_free(page->slab, page, object, _RET_IP_);
+
+       kmemtrace_mark_free(KMEMTRACE_TYPE_KMALLOC, _RET_IP_, x);
 }
 EXPORT_SYMBOL(kfree);
 
@@ -3221,6 +3274,7 @@ static struct notifier_block __cpuinitdata slab_notifier = {
 void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller)
 {
        struct kmem_cache *s;
+       void *ret;
 
        if (unlikely(size > PAGE_SIZE))
                return kmalloc_large(size, gfpflags);
@@ -3230,13 +3284,20 @@ void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller)
        if (unlikely(ZERO_OR_NULL_PTR(s)))
                return s;
 
-       return slab_alloc(s, gfpflags, -1, caller);
+       ret = slab_alloc(s, gfpflags, -1, caller);
+
+       /* Honor the call site pointer we recieved. */
+       kmemtrace_mark_alloc(KMEMTRACE_TYPE_KMALLOC, caller, ret, size,
+                            s->size, gfpflags);
+
+       return ret;
 }
 
 void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
                                        int node, unsigned long caller)
 {
        struct kmem_cache *s;
+       void *ret;
 
        if (unlikely(size > PAGE_SIZE))
                return kmalloc_large_node(size, gfpflags, node);
@@ -3246,7 +3307,13 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
        if (unlikely(ZERO_OR_NULL_PTR(s)))
                return s;
 
-       return slab_alloc(s, gfpflags, node, caller);
+       ret = slab_alloc(s, gfpflags, node, caller);
+
+       /* Honor the call site pointer we recieved. */
+       kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_KMALLOC, caller, ret,
+                                 size, s->size, gfpflags, node);
+
+       return ret;
 }
 
 #ifdef CONFIG_SLUB_DEBUG
index 5d90030..b5efa98 100644 (file)
@@ -112,13 +112,13 @@ endif
 # ---------------------------------------------------------------------------
 
 # Default is built-in, unless we know otherwise
-modkern_cflags := $(CFLAGS_KERNEL)
+modkern_cflags = $(if $(part-of-module), $(CFLAGS_MODULE), $(CFLAGS_KERNEL))
 quiet_modtag := $(empty)   $(empty)
 
-$(real-objs-m)        : modkern_cflags := $(CFLAGS_MODULE)
-$(real-objs-m:.o=.i)  : modkern_cflags := $(CFLAGS_MODULE)
-$(real-objs-m:.o=.s)  : modkern_cflags := $(CFLAGS_MODULE)
-$(real-objs-m:.o=.lst): modkern_cflags := $(CFLAGS_MODULE)
+$(real-objs-m)        : part-of-module := y
+$(real-objs-m:.o=.i)  : part-of-module := y
+$(real-objs-m:.o=.s)  : part-of-module := y
+$(real-objs-m:.o=.lst): part-of-module := y
 
 $(real-objs-m)        : quiet_modtag := [M]
 $(real-objs-m:.o=.i)  : quiet_modtag := [M]
@@ -215,7 +215,8 @@ endif
 ifdef CONFIG_FTRACE_MCOUNT_RECORD
 cmd_record_mcount = perl $(srctree)/scripts/recordmcount.pl "$(ARCH)" \
        "$(if $(CONFIG_64BIT),64,32)" \
-       "$(OBJDUMP)" "$(OBJCOPY)" "$(CC)" "$(LD)" "$(NM)" "$(RM)" "$(MV)" "$(@)";
+       "$(OBJDUMP)" "$(OBJCOPY)" "$(CC)" "$(LD)" "$(NM)" "$(RM)" "$(MV)" \
+       "$(if $(part-of-module),1,0)" "$(@)";
 endif
 
 define rule_cc_o_c
index fe83141..2ded5c8 100755 (executable)
@@ -100,14 +100,19 @@ $P =~ s@.*/@@g;
 
 my $V = '0.1';
 
-if ($#ARGV < 6) {
-       print "usage: $P arch objdump objcopy cc ld nm rm mv inputfile\n";
+if ($#ARGV < 7) {
+       print "usage: $P arch objdump objcopy cc ld nm rm mv is_module inputfile\n";
        print "version: $V\n";
        exit(1);
 }
 
 my ($arch, $bits, $objdump, $objcopy, $cc,
-    $ld, $nm, $rm, $mv, $inputfile) = @ARGV;
+    $ld, $nm, $rm, $mv, $is_module, $inputfile) = @ARGV;
+
+# This file refers to mcount and shouldn't be ftraced, so lets' ignore it
+if ($inputfile eq "kernel/trace/ftrace.o") {
+    exit(0);
+}
 
 # Acceptable sections to record.
 my %text_sections = (
@@ -201,6 +206,13 @@ if ($arch eq "x86_64") {
     $alignment = 2;
     $section_type = '%progbits';
 
+} elsif ($arch eq "ia64") {
+    $mcount_regex = "^\\s*([0-9a-fA-F]+):.*\\s_mcount\$";
+    $type = "data8";
+
+    if ($is_module eq "0") {
+        $cc .= " -mconstant-gp";
+    }
 } else {
     die "Arch $arch is not supported with CONFIG_FTRACE_MCOUNT_RECORD";
 }