Merge commit 'v2.6.32-rc6' into perf/core
Ingo Molnar [Wed, 4 Nov 2009 10:54:15 +0000 (11:54 +0100)]
Conflicts:
tools/perf/Makefile

Merge reason: Resolve the conflict, merge to upstream and merge in
              perf fixes so we can add a dependent patch.

Signed-off-by: Ingo Molnar <mingo@elte.hu>

99 files changed:
Documentation/kernel-parameters.txt
Documentation/trace/ftrace-design.txt
arch/s390/kernel/ftrace.c
arch/x86/include/asm/perf_event.h
arch/x86/kernel/cpu/perf_event.c
arch/x86/kernel/entry_32.S
arch/x86/kernel/entry_64.S
arch/x86/kernel/ftrace.c
arch/x86/mm/testmmiotrace.c
include/linux/ftrace_event.h
include/linux/perf_counter.h
include/linux/perf_event.h
include/linux/smp_lock.h
include/trace/events/bkl.h [new file with mode: 0644]
include/trace/events/irq.h
include/trace/events/power.h
include/trace/events/sched.h
include/trace/events/timer.h
include/trace/ftrace.h
include/trace/syscall.h
kernel/perf_event.c
kernel/trace/ftrace.c
kernel/trace/ring_buffer.c
kernel/trace/trace.c
kernel/trace/trace.h
kernel/trace/trace_events.c
kernel/trace/trace_events_filter.c
kernel/trace/trace_export.c
kernel/trace/trace_syscalls.c
lib/kernel_lock.c
scripts/recordmcount.pl
tools/perf/Documentation/perf-timechart.txt
tools/perf/Makefile
tools/perf/builtin-annotate.c
tools/perf/builtin-help.c
tools/perf/builtin-record.c
tools/perf/builtin-report.c
tools/perf/builtin-sched.c
tools/perf/builtin-stat.c
tools/perf/builtin-timechart.c
tools/perf/builtin-top.c
tools/perf/builtin-trace.c
tools/perf/perf.c
tools/perf/perf.h
tools/perf/util/cache.h
tools/perf/util/callchain.c
tools/perf/util/callchain.h
tools/perf/util/color.h
tools/perf/util/data_map.c [new file with mode: 0644]
tools/perf/util/data_map.h [new file with mode: 0644]
tools/perf/util/debug.c
tools/perf/util/debug.h
tools/perf/util/event.c [new file with mode: 0644]
tools/perf/util/event.h
tools/perf/util/exec_cmd.h
tools/perf/util/header.c
tools/perf/util/header.h
tools/perf/util/help.h
tools/perf/util/hist.c [new file with mode: 0644]
tools/perf/util/hist.h [new file with mode: 0644]
tools/perf/util/include/asm/asm-offsets.h [new file with mode: 0644]
tools/perf/util/include/asm/bitops.h [new file with mode: 0644]
tools/perf/util/include/asm/byteorder.h [new file with mode: 0644]
tools/perf/util/include/asm/swab.h [new file with mode: 0644]
tools/perf/util/include/asm/uaccess.h [new file with mode: 0644]
tools/perf/util/include/linux/bitmap.h [new file with mode: 0644]
tools/perf/util/include/linux/bitops.h [new file with mode: 0644]
tools/perf/util/include/linux/compiler.h [new file with mode: 0644]
tools/perf/util/include/linux/ctype.h [new file with mode: 0644]
tools/perf/util/include/linux/kernel.h
tools/perf/util/include/linux/string.h [new file with mode: 0644]
tools/perf/util/include/linux/types.h [new file with mode: 0644]
tools/perf/util/levenshtein.h
tools/perf/util/map.c
tools/perf/util/module.c [deleted file]
tools/perf/util/module.h [deleted file]
tools/perf/util/parse-events.c
tools/perf/util/parse-events.h
tools/perf/util/parse-options.h
tools/perf/util/quote.h
tools/perf/util/run-command.h
tools/perf/util/sigchain.h
tools/perf/util/sort.c [new file with mode: 0644]
tools/perf/util/sort.h [new file with mode: 0644]
tools/perf/util/strbuf.h
tools/perf/util/string.c
tools/perf/util/string.h
tools/perf/util/strlist.h
tools/perf/util/svghelper.h
tools/perf/util/symbol.c
tools/perf/util/symbol.h
tools/perf/util/thread.c
tools/perf/util/thread.h
tools/perf/util/trace-event-info.c
tools/perf/util/trace-event-parse.c
tools/perf/util/trace-event-read.c
tools/perf/util/trace-event.h
tools/perf/util/types.h
tools/perf/util/values.h

index 9107b38..c8d1b2b 100644 (file)
@@ -779,6 +779,13 @@ and is between 256 and 4096 characters. It is defined in the file
                        by the set_ftrace_notrace file in the debugfs
                        tracing directory.
 
+       ftrace_graph_filter=[function-list]
+                       [FTRACE] Limit the top level callers functions traced
+                       by the function graph tracer at boot up.
+                       function-list is a comma separated list of functions
+                       that can be changed at run time by the
+                       set_graph_function file in the debugfs tracing directory.
+
        gamecon.map[2|3]=
                        [HW,JOY] Multisystem joystick and NES/SNES/PSX pad
                        support via parallel port (up to 5 devices per port)
index 7003e10..641a1ef 100644 (file)
@@ -213,10 +213,19 @@ If you can't trace NMI functions, then skip this option.
 <details to be filled>
 
 
-HAVE_FTRACE_SYSCALLS
+HAVE_SYSCALL_TRACEPOINTS
 ---------------------
 
-<details to be filled>
+You need very few things to get the syscalls tracing in an arch.
+
+- Have a NR_syscalls variable in <asm/unistd.h> that provides the number
+  of syscalls supported by the arch.
+- Implement arch_syscall_addr() that resolves a syscall address from a
+  syscall number.
+- Support the TIF_SYSCALL_TRACEPOINT thread flags
+- Put the trace_sys_enter() and trace_sys_exit() tracepoints calls from ptrace
+  in the ptrace syscalls tracing path.
+- Tag this arch as HAVE_SYSCALL_TRACEPOINTS.
 
 
 HAVE_FTRACE_MCOUNT_RECORD
index f5fe34d..5a82bc6 100644 (file)
@@ -203,73 +203,10 @@ out:
 
 #ifdef CONFIG_FTRACE_SYSCALLS
 
-extern unsigned long __start_syscalls_metadata[];
-extern unsigned long __stop_syscalls_metadata[];
 extern unsigned int sys_call_table[];
 
-static struct syscall_metadata **syscalls_metadata;
-
-struct syscall_metadata *syscall_nr_to_meta(int nr)
-{
-       if (!syscalls_metadata || nr >= NR_syscalls || nr < 0)
-               return NULL;
-
-       return syscalls_metadata[nr];
-}
-
-int syscall_name_to_nr(char *name)
-{
-       int i;
-
-       if (!syscalls_metadata)
-               return -1;
-       for (i = 0; i < NR_syscalls; i++)
-               if (syscalls_metadata[i])
-                       if (!strcmp(syscalls_metadata[i]->name, name))
-                               return i;
-       return -1;
-}
-
-void set_syscall_enter_id(int num, int id)
-{
-       syscalls_metadata[num]->enter_id = id;
-}
-
-void set_syscall_exit_id(int num, int id)
+unsigned long __init arch_syscall_addr(int nr)
 {
-       syscalls_metadata[num]->exit_id = id;
-}
-
-static struct syscall_metadata *find_syscall_meta(unsigned long syscall)
-{
-       struct syscall_metadata *start;
-       struct syscall_metadata *stop;
-       char str[KSYM_SYMBOL_LEN];
-
-       start = (struct syscall_metadata *)__start_syscalls_metadata;
-       stop = (struct syscall_metadata *)__stop_syscalls_metadata;
-       kallsyms_lookup(syscall, NULL, NULL, NULL, str);
-
-       for ( ; start < stop; start++) {
-               if (start->name && !strcmp(start->name + 3, str + 3))
-                       return start;
-       }
-       return NULL;
-}
-
-static int __init arch_init_ftrace_syscalls(void)
-{
-       struct syscall_metadata *meta;
-       int i;
-       syscalls_metadata = kzalloc(sizeof(*syscalls_metadata) * NR_syscalls,
-                                   GFP_KERNEL);
-       if (!syscalls_metadata)
-               return -ENOMEM;
-       for (i = 0; i < NR_syscalls; i++) {
-               meta = find_syscall_meta((unsigned long)sys_call_table[i]);
-               syscalls_metadata[i] = meta;
-       }
-       return 0;
+       return (unsigned long)sys_call_table[nr];
 }
-arch_initcall(arch_init_ftrace_syscalls);
 #endif
index ad7ce3f..8d9f854 100644 (file)
  */
 #define ARCH_PERFMON_EVENT_MASK                                    0xffff
 
+/*
+ * filter mask to validate fixed counter events.
+ * the following filters disqualify for fixed counters:
+ *  - inv
+ *  - edge
+ *  - cnt-mask
+ *  The other filters are supported by fixed counters.
+ *  The any-thread option is supported starting with v3.
+ */
+#define ARCH_PERFMON_EVENT_FILTER_MASK                 0xff840000
+
 #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL                0x3c
 #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK                (0x00 << 8)
-#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX                 0
+#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX                         0
 #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT \
                (1 << (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX))
 
index b5801c3..2e20bca 100644 (file)
@@ -77,6 +77,18 @@ struct cpu_hw_events {
        struct debug_store      *ds;
 };
 
+struct event_constraint {
+       unsigned long   idxmsk[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
+       int             code;
+};
+
+#define EVENT_CONSTRAINT(c, m) { .code = (c), .idxmsk[0] = (m) }
+#define EVENT_CONSTRAINT_END  { .code = 0, .idxmsk[0] = 0 }
+
+#define for_each_event_constraint(e, c) \
+       for ((e) = (c); (e)->idxmsk[0]; (e)++)
+
+
 /*
  * struct x86_pmu - generic x86 pmu
  */
@@ -102,6 +114,8 @@ struct x86_pmu {
        u64             intel_ctrl;
        void            (*enable_bts)(u64 config);
        void            (*disable_bts)(void);
+       int             (*get_event_idx)(struct cpu_hw_events *cpuc,
+                                        struct hw_perf_event *hwc);
 };
 
 static struct x86_pmu x86_pmu __read_mostly;
@@ -110,6 +124,8 @@ static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = {
        .enabled = 1,
 };
 
+static const struct event_constraint *event_constraints;
+
 /*
  * Not sure about some of these
  */
@@ -155,6 +171,16 @@ static u64 p6_pmu_raw_event(u64 hw_event)
        return hw_event & P6_EVNTSEL_MASK;
 }
 
+static const struct event_constraint intel_p6_event_constraints[] =
+{
+       EVENT_CONSTRAINT(0xc1, 0x1),    /* FLOPS */
+       EVENT_CONSTRAINT(0x10, 0x1),    /* FP_COMP_OPS_EXE */
+       EVENT_CONSTRAINT(0x11, 0x1),    /* FP_ASSIST */
+       EVENT_CONSTRAINT(0x12, 0x2),    /* MUL */
+       EVENT_CONSTRAINT(0x13, 0x2),    /* DIV */
+       EVENT_CONSTRAINT(0x14, 0x1),    /* CYCLES_DIV_BUSY */
+       EVENT_CONSTRAINT_END
+};
 
 /*
  * Intel PerfMon v3. Used on Core2 and later.
@@ -170,6 +196,35 @@ static const u64 intel_perfmon_event_map[] =
   [PERF_COUNT_HW_BUS_CYCLES]           = 0x013c,
 };
 
+static const struct event_constraint intel_core_event_constraints[] =
+{
+       EVENT_CONSTRAINT(0x10, 0x1),    /* FP_COMP_OPS_EXE */
+       EVENT_CONSTRAINT(0x11, 0x2),    /* FP_ASSIST */
+       EVENT_CONSTRAINT(0x12, 0x2),    /* MUL */
+       EVENT_CONSTRAINT(0x13, 0x2),    /* DIV */
+       EVENT_CONSTRAINT(0x14, 0x1),    /* CYCLES_DIV_BUSY */
+       EVENT_CONSTRAINT(0x18, 0x1),    /* IDLE_DURING_DIV */
+       EVENT_CONSTRAINT(0x19, 0x2),    /* DELAYED_BYPASS */
+       EVENT_CONSTRAINT(0xa1, 0x1),    /* RS_UOPS_DISPATCH_CYCLES */
+       EVENT_CONSTRAINT(0xcb, 0x1),    /* MEM_LOAD_RETIRED */
+       EVENT_CONSTRAINT_END
+};
+
+static const struct event_constraint intel_nehalem_event_constraints[] =
+{
+       EVENT_CONSTRAINT(0x40, 0x3),    /* L1D_CACHE_LD */
+       EVENT_CONSTRAINT(0x41, 0x3),    /* L1D_CACHE_ST */
+       EVENT_CONSTRAINT(0x42, 0x3),    /* L1D_CACHE_LOCK */
+       EVENT_CONSTRAINT(0x43, 0x3),    /* L1D_ALL_REF */
+       EVENT_CONSTRAINT(0x4e, 0x3),    /* L1D_PREFETCH */
+       EVENT_CONSTRAINT(0x4c, 0x3),    /* LOAD_HIT_PRE */
+       EVENT_CONSTRAINT(0x51, 0x3),    /* L1D */
+       EVENT_CONSTRAINT(0x52, 0x3),    /* L1D_CACHE_PREFETCH_LOCK_FB_HIT */
+       EVENT_CONSTRAINT(0x53, 0x3),    /* L1D_CACHE_LOCK_FB_HIT */
+       EVENT_CONSTRAINT(0xc5, 0x3),    /* CACHE_LOCK_CYCLES */
+       EVENT_CONSTRAINT_END
+};
+
 static u64 intel_pmu_event_map(int hw_event)
 {
        return intel_perfmon_event_map[hw_event];
@@ -469,7 +524,7 @@ static u64 intel_pmu_raw_event(u64 hw_event)
 #define CORE_EVNTSEL_UNIT_MASK         0x0000FF00ULL
 #define CORE_EVNTSEL_EDGE_MASK         0x00040000ULL
 #define CORE_EVNTSEL_INV_MASK          0x00800000ULL
-#define CORE_EVNTSEL_REG_MASK  0xFF000000ULL
+#define CORE_EVNTSEL_REG_MASK          0xFF000000ULL
 
 #define CORE_EVNTSEL_MASK              \
        (CORE_EVNTSEL_EVENT_MASK |      \
@@ -932,6 +987,8 @@ static int __hw_perf_event_init(struct perf_event *event)
         */
        hwc->config = ARCH_PERFMON_EVENTSEL_INT;
 
+       hwc->idx = -1;
+
        /*
         * Count user and OS events unless requested not to.
         */
@@ -1334,8 +1391,7 @@ static void amd_pmu_enable_event(struct hw_perf_event *hwc, int idx)
                x86_pmu_enable_event(hwc, idx);
 }
 
-static int
-fixed_mode_idx(struct perf_event *event, struct hw_perf_event *hwc)
+static int fixed_mode_idx(struct hw_perf_event *hwc)
 {
        unsigned int hw_event;
 
@@ -1349,6 +1405,12 @@ fixed_mode_idx(struct perf_event *event, struct hw_perf_event *hwc)
        if (!x86_pmu.num_events_fixed)
                return -1;
 
+       /*
+        * fixed counters do not take all possible filters
+        */
+       if (hwc->config & ARCH_PERFMON_EVENT_FILTER_MASK)
+               return -1;
+
        if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_INSTRUCTIONS)))
                return X86_PMC_IDX_FIXED_INSTRUCTIONS;
        if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_CPU_CYCLES)))
@@ -1360,22 +1422,57 @@ fixed_mode_idx(struct perf_event *event, struct hw_perf_event *hwc)
 }
 
 /*
- * Find a PMC slot for the freshly enabled / scheduled in event:
+ * generic counter allocator: get next free counter
  */
-static int x86_pmu_enable(struct perf_event *event)
+static int
+gen_get_event_idx(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc)
+{
+       int idx;
+
+       idx = find_first_zero_bit(cpuc->used_mask, x86_pmu.num_events);
+       return idx == x86_pmu.num_events ? -1 : idx;
+}
+
+/*
+ * intel-specific counter allocator: check event constraints
+ */
+static int
+intel_get_event_idx(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc)
+{
+       const struct event_constraint *event_constraint;
+       int i, code;
+
+       if (!event_constraints)
+               goto skip;
+
+       code = hwc->config & CORE_EVNTSEL_EVENT_MASK;
+
+       for_each_event_constraint(event_constraint, event_constraints) {
+               if (code == event_constraint->code) {
+                       for_each_bit(i, event_constraint->idxmsk, X86_PMC_IDX_MAX) {
+                               if (!test_and_set_bit(i, cpuc->used_mask))
+                                       return i;
+                       }
+                       return -1;
+               }
+       }
+skip:
+       return gen_get_event_idx(cpuc, hwc);
+}
+
+static int
+x86_schedule_event(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc)
 {
-       struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
-       struct hw_perf_event *hwc = &event->hw;
        int idx;
 
-       idx = fixed_mode_idx(event, hwc);
+       idx = fixed_mode_idx(hwc);
        if (idx == X86_PMC_IDX_FIXED_BTS) {
                /* BTS is already occupied. */
                if (test_and_set_bit(idx, cpuc->used_mask))
                        return -EAGAIN;
 
                hwc->config_base        = 0;
-               hwc->event_base = 0;
+               hwc->event_base         = 0;
                hwc->idx                = idx;
        } else if (idx >= 0) {
                /*
@@ -1396,20 +1493,35 @@ static int x86_pmu_enable(struct perf_event *event)
        } else {
                idx = hwc->idx;
                /* Try to get the previous generic event again */
-               if (test_and_set_bit(idx, cpuc->used_mask)) {
+               if (idx == -1 || test_and_set_bit(idx, cpuc->used_mask)) {
 try_generic:
-                       idx = find_first_zero_bit(cpuc->used_mask,
-                                                 x86_pmu.num_events);
-                       if (idx == x86_pmu.num_events)
+                       idx = x86_pmu.get_event_idx(cpuc, hwc);
+                       if (idx == -1)
                                return -EAGAIN;
 
                        set_bit(idx, cpuc->used_mask);
                        hwc->idx = idx;
                }
-               hwc->config_base  = x86_pmu.eventsel;
-               hwc->event_base = x86_pmu.perfctr;
+               hwc->config_base = x86_pmu.eventsel;
+               hwc->event_base  = x86_pmu.perfctr;
        }
 
+       return idx;
+}
+
+/*
+ * Find a PMC slot for the freshly enabled / scheduled in event:
+ */
+static int x86_pmu_enable(struct perf_event *event)
+{
+       struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+       struct hw_perf_event *hwc = &event->hw;
+       int idx;
+
+       idx = x86_schedule_event(cpuc, hwc);
+       if (idx < 0)
+               return idx;
+
        perf_events_lapic_init();
 
        x86_pmu.disable(hwc, idx);
@@ -1877,6 +1989,7 @@ static struct x86_pmu p6_pmu = {
         */
        .event_bits             = 32,
        .event_mask             = (1ULL << 32) - 1,
+       .get_event_idx          = intel_get_event_idx,
 };
 
 static struct x86_pmu intel_pmu = {
@@ -1900,6 +2013,7 @@ static struct x86_pmu intel_pmu = {
        .max_period             = (1ULL << 31) - 1,
        .enable_bts             = intel_pmu_enable_bts,
        .disable_bts            = intel_pmu_disable_bts,
+       .get_event_idx          = intel_get_event_idx,
 };
 
 static struct x86_pmu amd_pmu = {
@@ -1920,6 +2034,7 @@ static struct x86_pmu amd_pmu = {
        .apic                   = 1,
        /* use highest bit to detect overflow */
        .max_period             = (1ULL << 47) - 1,
+       .get_event_idx          = gen_get_event_idx,
 };
 
 static int p6_pmu_init(void)
@@ -1932,10 +2047,12 @@ static int p6_pmu_init(void)
        case 7:
        case 8:
        case 11: /* Pentium III */
+               event_constraints = intel_p6_event_constraints;
                break;
        case 9:
        case 13:
                /* Pentium M */
+               event_constraints = intel_p6_event_constraints;
                break;
        default:
                pr_cont("unsupported p6 CPU model %d ",
@@ -2007,12 +2124,14 @@ static int intel_pmu_init(void)
                       sizeof(hw_cache_event_ids));
 
                pr_cont("Core2 events, ");
+               event_constraints = intel_core_event_constraints;
                break;
        default:
        case 26:
                memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids,
                       sizeof(hw_cache_event_ids));
 
+               event_constraints = intel_nehalem_event_constraints;
                pr_cont("Nehalem/Corei7 events, ");
                break;
        case 28:
@@ -2105,11 +2224,47 @@ static const struct pmu pmu = {
        .unthrottle     = x86_pmu_unthrottle,
 };
 
+static int
+validate_event(struct cpu_hw_events *cpuc, struct perf_event *event)
+{
+       struct hw_perf_event fake_event = event->hw;
+
+       if (event->pmu != &pmu)
+               return 0;
+
+       return x86_schedule_event(cpuc, &fake_event);
+}
+
+static int validate_group(struct perf_event *event)
+{
+       struct perf_event *sibling, *leader = event->group_leader;
+       struct cpu_hw_events fake_pmu;
+
+       memset(&fake_pmu, 0, sizeof(fake_pmu));
+
+       if (!validate_event(&fake_pmu, leader))
+               return -ENOSPC;
+
+       list_for_each_entry(sibling, &leader->sibling_list, group_entry) {
+               if (!validate_event(&fake_pmu, sibling))
+                       return -ENOSPC;
+       }
+
+       if (!validate_event(&fake_pmu, event))
+               return -ENOSPC;
+
+       return 0;
+}
+
 const struct pmu *hw_perf_event_init(struct perf_event *event)
 {
        int err;
 
        err = __hw_perf_event_init(event);
+       if (!err) {
+               if (event->group_leader != event)
+                       err = validate_group(event);
+       }
        if (err) {
                if (event->destroy)
                        event->destroy(event);
index c097e7d..7d52e9d 100644 (file)
@@ -1185,17 +1185,14 @@ END(ftrace_graph_caller)
 
 .globl return_to_handler
 return_to_handler:
-       pushl $0
        pushl %eax
-       pushl %ecx
        pushl %edx
        movl %ebp, %eax
        call ftrace_return_to_handler
-       movl %eax, 0xc(%esp)
+       movl %eax, %ecx
        popl %edx
-       popl %ecx
        popl %eax
-       ret
+       jmp *%ecx
 #endif
 
 .section .rodata,"a"
index b5c061f..bd5bbdd 100644 (file)
@@ -155,11 +155,11 @@ GLOBAL(return_to_handler)
 
        call ftrace_return_to_handler
 
-       movq %rax, 16(%rsp)
+       movq %rax, %rdi
        movq 8(%rsp), %rdx
        movq (%rsp), %rax
-       addq $16, %rsp
-       retq
+       addq $24, %rsp
+       jmp *%rdi
 #endif
 
 
index 9dbb527..5a1b975 100644 (file)
@@ -9,6 +9,8 @@
  * the dangers of modifying code on the run.
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/spinlock.h>
 #include <linux/hardirq.h>
 #include <linux/uaccess.h>
@@ -336,15 +338,15 @@ int __init ftrace_dyn_arch_init(void *data)
 
        switch (faulted) {
        case 0:
-               pr_info("ftrace: converting mcount calls to 0f 1f 44 00 00\n");
+               pr_info("converting mcount calls to 0f 1f 44 00 00\n");
                memcpy(ftrace_nop, ftrace_test_p6nop, MCOUNT_INSN_SIZE);
                break;
        case 1:
-               pr_info("ftrace: converting mcount calls to 66 66 66 66 90\n");
+               pr_info("converting mcount calls to 66 66 66 66 90\n");
                memcpy(ftrace_nop, ftrace_test_nop5, MCOUNT_INSN_SIZE);
                break;
        case 2:
-               pr_info("ftrace: converting mcount calls to jmp . + 5\n");
+               pr_info("converting mcount calls to jmp . + 5\n");
                memcpy(ftrace_nop, ftrace_test_jmp, MCOUNT_INSN_SIZE);
                break;
        }
@@ -468,82 +470,10 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
 
 #ifdef CONFIG_FTRACE_SYSCALLS
 
-extern unsigned long __start_syscalls_metadata[];
-extern unsigned long __stop_syscalls_metadata[];
 extern unsigned long *sys_call_table;
 
-static struct syscall_metadata **syscalls_metadata;
-
-static struct syscall_metadata *find_syscall_meta(unsigned long *syscall)
-{
-       struct syscall_metadata *start;
-       struct syscall_metadata *stop;
-       char str[KSYM_SYMBOL_LEN];
-
-
-       start = (struct syscall_metadata *)__start_syscalls_metadata;
-       stop = (struct syscall_metadata *)__stop_syscalls_metadata;
-       kallsyms_lookup((unsigned long) syscall, NULL, NULL, NULL, str);
-
-       for ( ; start < stop; start++) {
-               if (start->name && !strcmp(start->name, str))
-                       return start;
-       }
-       return NULL;
-}
-
-struct syscall_metadata *syscall_nr_to_meta(int nr)
-{
-       if (!syscalls_metadata || nr >= NR_syscalls || nr < 0)
-               return NULL;
-
-       return syscalls_metadata[nr];
-}
-
-int syscall_name_to_nr(char *name)
+unsigned long __init arch_syscall_addr(int nr)
 {
-       int i;
-
-       if (!syscalls_metadata)
-               return -1;
-
-       for (i = 0; i < NR_syscalls; i++) {
-               if (syscalls_metadata[i]) {
-                       if (!strcmp(syscalls_metadata[i]->name, name))
-                               return i;
-               }
-       }
-       return -1;
-}
-
-void set_syscall_enter_id(int num, int id)
-{
-       syscalls_metadata[num]->enter_id = id;
-}
-
-void set_syscall_exit_id(int num, int id)
-{
-       syscalls_metadata[num]->exit_id = id;
-}
-
-static int __init arch_init_ftrace_syscalls(void)
-{
-       int i;
-       struct syscall_metadata *meta;
-       unsigned long **psys_syscall_table = &sys_call_table;
-
-       syscalls_metadata = kzalloc(sizeof(*syscalls_metadata) *
-                                       NR_syscalls, GFP_KERNEL);
-       if (!syscalls_metadata) {
-               WARN_ON(1);
-               return -ENOMEM;
-       }
-
-       for (i = 0; i < NR_syscalls; i++) {
-               meta = find_syscall_meta(psys_syscall_table[i]);
-               syscalls_metadata[i] = meta;
-       }
-       return 0;
+       return (unsigned long)(&sys_call_table)[nr];
 }
-arch_initcall(arch_init_ftrace_syscalls);
 #endif
index 427fd1b..8565d94 100644 (file)
@@ -1,12 +1,13 @@
 /*
  * Written by Pekka Paalanen, 2008-2009 <pq@iki.fi>
  */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/module.h>
 #include <linux/io.h>
 #include <linux/mmiotrace.h>
 
-#define MODULE_NAME "testmmiotrace"
-
 static unsigned long mmio_address;
 module_param(mmio_address, ulong, 0);
 MODULE_PARM_DESC(mmio_address, " Start address of the mapping of 16 kB "
@@ -30,7 +31,7 @@ static unsigned v32(unsigned i)
 static void do_write_test(void __iomem *p)
 {
        unsigned int i;
-       pr_info(MODULE_NAME ": write test.\n");
+       pr_info("write test.\n");
        mmiotrace_printk("Write test.\n");
 
        for (i = 0; i < 256; i++)
@@ -47,7 +48,7 @@ static void do_read_test(void __iomem *p)
 {
        unsigned int i;
        unsigned errs[3] = { 0 };
-       pr_info(MODULE_NAME ": read test.\n");
+       pr_info("read test.\n");
        mmiotrace_printk("Read test.\n");
 
        for (i = 0; i < 256; i++)
@@ -68,7 +69,7 @@ static void do_read_test(void __iomem *p)
 
 static void do_read_far_test(void __iomem *p)
 {
-       pr_info(MODULE_NAME ": read far test.\n");
+       pr_info("read far test.\n");
        mmiotrace_printk("Read far test.\n");
 
        ioread32(p + read_far);
@@ -78,7 +79,7 @@ static void do_test(unsigned long size)
 {
        void __iomem *p = ioremap_nocache(mmio_address, size);
        if (!p) {
-               pr_err(MODULE_NAME ": could not ioremap, aborting.\n");
+               pr_err("could not ioremap, aborting.\n");
                return;
        }
        mmiotrace_printk("ioremap returned %p.\n", p);
@@ -94,24 +95,22 @@ static int __init init(void)
        unsigned long size = (read_far) ? (8 << 20) : (16 << 10);
 
        if (mmio_address == 0) {
-               pr_err(MODULE_NAME ": you have to use the module argument "
-                                                       "mmio_address.\n");
-               pr_err(MODULE_NAME ": DO NOT LOAD THIS MODULE UNLESS"
-                               " YOU REALLY KNOW WHAT YOU ARE DOING!\n");
+               pr_err("you have to use the module argument mmio_address.\n");
+               pr_err("DO NOT LOAD THIS MODULE UNLESS YOU REALLY KNOW WHAT YOU ARE DOING!\n");
                return -ENXIO;
        }
 
-       pr_warning(MODULE_NAME ": WARNING: mapping %lu kB @ 0x%08lx in PCI "
-               "address space, and writing 16 kB of rubbish in there.\n",
-                size >> 10, mmio_address);
+       pr_warning("WARNING: mapping %lu kB @ 0x%08lx in PCI address space, "
+                  "and writing 16 kB of rubbish in there.\n",
+                  size >> 10, mmio_address);
        do_test(size);
-       pr_info(MODULE_NAME ": All done.\n");
+       pr_info("All done.\n");
        return 0;
 }
 
 static void __exit cleanup(void)
 {
-       pr_debug(MODULE_NAME ": unloaded.\n");
+       pr_debug("unloaded.\n");
 }
 
 module_init(init);
index 4ec5e67..d117704 100644 (file)
@@ -144,7 +144,7 @@ extern char                 *trace_profile_buf_nmi;
 #define MAX_FILTER_STR_VAL     256     /* Should handle KSYM_SYMBOL_LEN */
 
 extern void destroy_preds(struct ftrace_event_call *call);
-extern int filter_match_preds(struct ftrace_event_call *call, void *rec);
+extern int filter_match_preds(struct event_filter *filter, void *rec);
 extern int filter_current_check_discard(struct ring_buffer *buffer,
                                        struct ftrace_event_call *call,
                                        void *rec,
@@ -186,4 +186,13 @@ do {                                                                       \
                __trace_printk(ip, fmt, ##args);                        \
 } while (0)
 
+#ifdef CONFIG_EVENT_PROFILE
+struct perf_event;
+extern int ftrace_profile_enable(int event_id);
+extern void ftrace_profile_disable(int event_id);
+extern int ftrace_profile_set_filter(struct perf_event *event, int event_id,
+                                    char *filter_str);
+extern void ftrace_profile_free_filter(struct perf_event *event);
+#endif
+
 #endif /* _LINUX_FTRACE_EVENT_H */
index 7b7fbf4..91a2b43 100644 (file)
@@ -225,6 +225,7 @@ struct perf_counter_attr {
 #define PERF_COUNTER_IOC_RESET         _IO ('$', 3)
 #define PERF_COUNTER_IOC_PERIOD                _IOW('$', 4, u64)
 #define PERF_COUNTER_IOC_SET_OUTPUT    _IO ('$', 5)
+#define PERF_COUNTER_IOC_SET_FILTER    _IOW('$', 6, char *)
 
 enum perf_counter_ioc_flags {
        PERF_IOC_FLAG_GROUP             = 1U << 0,
index 9e70126..45b56fa 100644 (file)
@@ -221,6 +221,7 @@ struct perf_event_attr {
 #define PERF_EVENT_IOC_RESET           _IO ('$', 3)
 #define PERF_EVENT_IOC_PERIOD          _IOW('$', 4, u64)
 #define PERF_EVENT_IOC_SET_OUTPUT      _IO ('$', 5)
+#define PERF_EVENT_IOC_SET_FILTER      _IOW('$', 6, char *)
 
 enum perf_event_ioc_flags {
        PERF_IOC_FLAG_GROUP             = 1U << 0,
@@ -633,7 +634,12 @@ struct perf_event {
 
        struct pid_namespace            *ns;
        u64                             id;
+
+#ifdef CONFIG_EVENT_PROFILE
+       struct event_filter             *filter;
 #endif
+
+#endif /* CONFIG_PERF_EVENTS */
 };
 
 /**
index 813be59..2ea1dd1 100644 (file)
@@ -24,8 +24,21 @@ static inline int reacquire_kernel_lock(struct task_struct *task)
        return 0;
 }
 
-extern void __lockfunc lock_kernel(void)       __acquires(kernel_lock);
-extern void __lockfunc unlock_kernel(void)     __releases(kernel_lock);
+extern void __lockfunc
+_lock_kernel(const char *func, const char *file, int line)
+__acquires(kernel_lock);
+
+extern void __lockfunc
+_unlock_kernel(const char *func, const char *file, int line)
+__releases(kernel_lock);
+
+#define lock_kernel() do {                                     \
+       _lock_kernel(__func__, __FILE__, __LINE__);             \
+} while (0)
+
+#define unlock_kernel()        do {                                    \
+       _unlock_kernel(__func__, __FILE__, __LINE__);           \
+} while (0)
 
 /*
  * Various legacy drivers don't really need the BKL in a specific
@@ -41,8 +54,8 @@ static inline void cycle_kernel_lock(void)
 
 #else
 
-#define lock_kernel()                          do { } while(0)
-#define unlock_kernel()                                do { } while(0)
+#define lock_kernel()
+#define unlock_kernel()
 #define release_kernel_lock(task)              do { } while(0)
 #define cycle_kernel_lock()                    do { } while(0)
 #define reacquire_kernel_lock(task)            0
diff --git a/include/trace/events/bkl.h b/include/trace/events/bkl.h
new file mode 100644 (file)
index 0000000..1af72dc
--- /dev/null
@@ -0,0 +1,61 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM bkl
+
+#if !defined(_TRACE_BKL_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_BKL_H
+
+#include <linux/tracepoint.h>
+
+TRACE_EVENT(lock_kernel,
+
+       TP_PROTO(const char *func, const char *file, int line),
+
+       TP_ARGS(func, file, line),
+
+       TP_STRUCT__entry(
+               __field(        int,            depth                   )
+               __field_ext(    const char *,   func, FILTER_PTR_STRING )
+               __field_ext(    const char *,   file, FILTER_PTR_STRING )
+               __field(        int,            line                    )
+       ),
+
+       TP_fast_assign(
+               /* We want to record the lock_depth after lock is acquired */
+               __entry->depth = current->lock_depth + 1;
+               __entry->func = func;
+               __entry->file = file;
+               __entry->line = line;
+       ),
+
+       TP_printk("depth=%d file:line=%s:%d func=%s()", __entry->depth,
+                 __entry->file, __entry->line, __entry->func)
+);
+
+TRACE_EVENT(unlock_kernel,
+
+       TP_PROTO(const char *func, const char *file, int line),
+
+       TP_ARGS(func, file, line),
+
+       TP_STRUCT__entry(
+               __field(int,            depth           )
+               __field(const char *,   func            )
+               __field(const char *,   file            )
+               __field(int,            line            )
+       ),
+
+       TP_fast_assign(
+               __entry->depth = current->lock_depth;
+               __entry->func = func;
+               __entry->file = file;
+               __entry->line = line;
+       ),
+
+       TP_printk("depth=%d file:line=%s:%d func=%s()", __entry->depth,
+                 __entry->file, __entry->line, __entry->func)
+);
+
+#endif /* _TRACE_BKL_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
index b89f9db..dcfcd44 100644 (file)
@@ -48,7 +48,7 @@ TRACE_EVENT(irq_handler_entry,
                __assign_str(name, action->name);
        ),
 
-       TP_printk("irq=%d handler=%s", __entry->irq, __get_str(name))
+       TP_printk("irq=%d name=%s", __entry->irq, __get_str(name))
 );
 
 /**
@@ -78,7 +78,7 @@ TRACE_EVENT(irq_handler_exit,
                __entry->ret    = ret;
        ),
 
-       TP_printk("irq=%d return=%s",
+       TP_printk("irq=%d ret=%s",
                  __entry->irq, __entry->ret ? "handled" : "unhandled")
 );
 
@@ -107,7 +107,7 @@ TRACE_EVENT(softirq_entry,
                __entry->vec = (int)(h - vec);
        ),
 
-       TP_printk("softirq=%d action=%s", __entry->vec,
+       TP_printk("vec=%d [action=%s]", __entry->vec,
                  show_softirq_name(__entry->vec))
 );
 
@@ -136,7 +136,7 @@ TRACE_EVENT(softirq_exit,
                __entry->vec = (int)(h - vec);
        ),
 
-       TP_printk("softirq=%d action=%s", __entry->vec,
+       TP_printk("vec=%d [action=%s]", __entry->vec,
                  show_softirq_name(__entry->vec))
 );
 
index ea6d579..9bb96e5 100644 (file)
@@ -16,8 +16,6 @@ enum {
 };
 #endif
 
-
-
 TRACE_EVENT(power_start,
 
        TP_PROTO(unsigned int type, unsigned int state),
index 4069c43..b50b985 100644 (file)
@@ -26,7 +26,7 @@ TRACE_EVENT(sched_kthread_stop,
                __entry->pid    = t->pid;
        ),
 
-       TP_printk("task %s:%d", __entry->comm, __entry->pid)
+       TP_printk("comm=%s pid=%d", __entry->comm, __entry->pid)
 );
 
 /*
@@ -46,7 +46,7 @@ TRACE_EVENT(sched_kthread_stop_ret,
                __entry->ret    = ret;
        ),
 
-       TP_printk("ret %d", __entry->ret)
+       TP_printk("ret=%d", __entry->ret)
 );
 
 /*
@@ -73,7 +73,7 @@ TRACE_EVENT(sched_wait_task,
                __entry->prio   = p->prio;
        ),
 
-       TP_printk("task %s:%d [%d]",
+       TP_printk("comm=%s pid=%d prio=%d",
                  __entry->comm, __entry->pid, __entry->prio)
 );
 
@@ -94,7 +94,7 @@ TRACE_EVENT(sched_wakeup,
                __field(        pid_t,  pid                     )
                __field(        int,    prio                    )
                __field(        int,    success                 )
-               __field(        int,    cpu                     )
+               __field(        int,    target_cpu              )
        ),
 
        TP_fast_assign(
@@ -102,12 +102,12 @@ TRACE_EVENT(sched_wakeup,
                __entry->pid            = p->pid;
                __entry->prio           = p->prio;
                __entry->success        = success;
-               __entry->cpu            = task_cpu(p);
+               __entry->target_cpu     = task_cpu(p);
        ),
 
-       TP_printk("task %s:%d [%d] success=%d [%03d]",
+       TP_printk("comm=%s pid=%d prio=%d success=%d target_cpu=%03d",
                  __entry->comm, __entry->pid, __entry->prio,
-                 __entry->success, __entry->cpu)
+                 __entry->success, __entry->target_cpu)
 );
 
 /*
@@ -127,7 +127,7 @@ TRACE_EVENT(sched_wakeup_new,
                __field(        pid_t,  pid                     )
                __field(        int,    prio                    )
                __field(        int,    success                 )
-               __field(        int,    cpu                     )
+               __field(        int,    target_cpu              )
        ),
 
        TP_fast_assign(
@@ -135,12 +135,12 @@ TRACE_EVENT(sched_wakeup_new,
                __entry->pid            = p->pid;
                __entry->prio           = p->prio;
                __entry->success        = success;
-               __entry->cpu            = task_cpu(p);
+               __entry->target_cpu     = task_cpu(p);
        ),
 
-       TP_printk("task %s:%d [%d] success=%d [%03d]",
+       TP_printk("comm=%s pid=%d prio=%d success=%d target_cpu=%03d",
                  __entry->comm, __entry->pid, __entry->prio,
-                 __entry->success, __entry->cpu)
+                 __entry->success, __entry->target_cpu)
 );
 
 /*
@@ -176,7 +176,7 @@ TRACE_EVENT(sched_switch,
                __entry->next_prio      = next->prio;
        ),
 
-       TP_printk("task %s:%d [%d] (%s) ==> %s:%d [%d]",
+       TP_printk("prev_comm=%s prev_pid=%d prev_prio=%d prev_state=%s ==> next_comm=%s next_pid=%d next_prio=%d",
                __entry->prev_comm, __entry->prev_pid, __entry->prev_prio,
                __entry->prev_state ?
                  __print_flags(__entry->prev_state, "|",
@@ -211,7 +211,7 @@ TRACE_EVENT(sched_migrate_task,
                __entry->dest_cpu       = dest_cpu;
        ),
 
-       TP_printk("task %s:%d [%d] from: %d  to: %d",
+       TP_printk("comm=%s pid=%d prio=%d orig_cpu=%d dest_cpu=%d",
                  __entry->comm, __entry->pid, __entry->prio,
                  __entry->orig_cpu, __entry->dest_cpu)
 );
@@ -237,7 +237,7 @@ TRACE_EVENT(sched_process_free,
                __entry->prio           = p->prio;
        ),
 
-       TP_printk("task %s:%d [%d]",
+       TP_printk("comm=%s pid=%d prio=%d",
                  __entry->comm, __entry->pid, __entry->prio)
 );
 
@@ -262,7 +262,7 @@ TRACE_EVENT(sched_process_exit,
                __entry->prio           = p->prio;
        ),
 
-       TP_printk("task %s:%d [%d]",
+       TP_printk("comm=%s pid=%d prio=%d",
                  __entry->comm, __entry->pid, __entry->prio)
 );
 
@@ -287,7 +287,7 @@ TRACE_EVENT(sched_process_wait,
                __entry->prio           = current->prio;
        ),
 
-       TP_printk("task %s:%d [%d]",
+       TP_printk("comm=%s pid=%d prio=%d",
                  __entry->comm, __entry->pid, __entry->prio)
 );
 
@@ -314,7 +314,7 @@ TRACE_EVENT(sched_process_fork,
                __entry->child_pid      = child->pid;
        ),
 
-       TP_printk("parent %s:%d  child %s:%d",
+       TP_printk("comm=%s pid=%d child_comm=%s child_pid=%d",
                __entry->parent_comm, __entry->parent_pid,
                __entry->child_comm, __entry->child_pid)
 );
@@ -340,7 +340,7 @@ TRACE_EVENT(sched_signal_send,
                __entry->sig    = sig;
        ),
 
-       TP_printk("sig: %d  task %s:%d",
+       TP_printk("sig=%d comm=%s pid=%d",
                  __entry->sig, __entry->comm, __entry->pid)
 );
 
@@ -374,7 +374,7 @@ TRACE_EVENT(sched_stat_wait,
                __perf_count(delay);
        ),
 
-       TP_printk("task: %s:%d wait: %Lu [ns]",
+       TP_printk("comm=%s pid=%d delay=%Lu [ns]",
                        __entry->comm, __entry->pid,
                        (unsigned long long)__entry->delay)
 );
@@ -406,7 +406,7 @@ TRACE_EVENT(sched_stat_runtime,
                __perf_count(runtime);
        ),
 
-       TP_printk("task: %s:%d runtime: %Lu [ns], vruntime: %Lu [ns]",
+       TP_printk("comm=%s pid=%d runtime=%Lu [ns] vruntime=%Lu [ns]",
                        __entry->comm, __entry->pid,
                        (unsigned long long)__entry->runtime,
                        (unsigned long long)__entry->vruntime)
@@ -437,7 +437,7 @@ TRACE_EVENT(sched_stat_sleep,
                __perf_count(delay);
        ),
 
-       TP_printk("task: %s:%d sleep: %Lu [ns]",
+       TP_printk("comm=%s pid=%d delay=%Lu [ns]",
                        __entry->comm, __entry->pid,
                        (unsigned long long)__entry->delay)
 );
@@ -467,7 +467,7 @@ TRACE_EVENT(sched_stat_iowait,
                __perf_count(delay);
        ),
 
-       TP_printk("task: %s:%d iowait: %Lu [ns]",
+       TP_printk("comm=%s pid=%d delay=%Lu [ns]",
                        __entry->comm, __entry->pid,
                        (unsigned long long)__entry->delay)
 );
index 1844c48..e5ce87a 100644 (file)
@@ -26,7 +26,7 @@ TRACE_EVENT(timer_init,
                __entry->timer  = timer;
        ),
 
-       TP_printk("timer %p", __entry->timer)
+       TP_printk("timer=%p", __entry->timer)
 );
 
 /**
@@ -54,7 +54,7 @@ TRACE_EVENT(timer_start,
                __entry->now            = jiffies;
        ),
 
-       TP_printk("timer %p: func %pf, expires %lu, timeout %ld",
+       TP_printk("timer=%p function=%pf expires=%lu [timeout=%ld]",
                  __entry->timer, __entry->function, __entry->expires,
                  (long)__entry->expires - __entry->now)
 );
@@ -81,7 +81,7 @@ TRACE_EVENT(timer_expire_entry,
                __entry->now            = jiffies;
        ),
 
-       TP_printk("timer %p: now %lu", __entry->timer, __entry->now)
+       TP_printk("timer=%p now=%lu", __entry->timer, __entry->now)
 );
 
 /**
@@ -108,7 +108,7 @@ TRACE_EVENT(timer_expire_exit,
                __entry->timer  = timer;
        ),
 
-       TP_printk("timer %p", __entry->timer)
+       TP_printk("timer=%p", __entry->timer)
 );
 
 /**
@@ -129,7 +129,7 @@ TRACE_EVENT(timer_cancel,
                __entry->timer  = timer;
        ),
 
-       TP_printk("timer %p", __entry->timer)
+       TP_printk("timer=%p", __entry->timer)
 );
 
 /**
@@ -140,24 +140,24 @@ TRACE_EVENT(timer_cancel,
  */
 TRACE_EVENT(hrtimer_init,
 
-       TP_PROTO(struct hrtimer *timer, clockid_t clockid,
+       TP_PROTO(struct hrtimer *hrtimer, clockid_t clockid,
                 enum hrtimer_mode mode),
 
-       TP_ARGS(timer, clockid, mode),
+       TP_ARGS(hrtimer, clockid, mode),
 
        TP_STRUCT__entry(
-               __field( void *,                timer           )
+               __field( void *,                hrtimer         )
                __field( clockid_t,             clockid         )
                __field( enum hrtimer_mode,     mode            )
        ),
 
        TP_fast_assign(
-               __entry->timer          = timer;
+               __entry->hrtimer        = hrtimer;
                __entry->clockid        = clockid;
                __entry->mode           = mode;
        ),
 
-       TP_printk("hrtimer %p, clockid %s, mode %s", __entry->timer,
+       TP_printk("hrtimer=%p clockid=%s mode=%s", __entry->hrtimer,
                  __entry->clockid == CLOCK_REALTIME ?
                        "CLOCK_REALTIME" : "CLOCK_MONOTONIC",
                  __entry->mode == HRTIMER_MODE_ABS ?
@@ -170,26 +170,26 @@ TRACE_EVENT(hrtimer_init,
  */
 TRACE_EVENT(hrtimer_start,
 
-       TP_PROTO(struct hrtimer *timer),
+       TP_PROTO(struct hrtimer *hrtimer),
 
-       TP_ARGS(timer),
+       TP_ARGS(hrtimer),
 
        TP_STRUCT__entry(
-               __field( void *,        timer           )
+               __field( void *,        hrtimer         )
                __field( void *,        function        )
                __field( s64,           expires         )
                __field( s64,           softexpires     )
        ),
 
        TP_fast_assign(
-               __entry->timer          = timer;
-               __entry->function       = timer->function;
-               __entry->expires        = hrtimer_get_expires(timer).tv64;
-               __entry->softexpires    = hrtimer_get_softexpires(timer).tv64;
+               __entry->hrtimer        = hrtimer;
+               __entry->function       = hrtimer->function;
+               __entry->expires        = hrtimer_get_expires(hrtimer).tv64;
+               __entry->softexpires    = hrtimer_get_softexpires(hrtimer).tv64;
        ),
 
-       TP_printk("hrtimer %p, func %pf, expires %llu, softexpires %llu",
-                 __entry->timer, __entry->function,
+       TP_printk("hrtimer=%p function=%pf expires=%llu softexpires=%llu",
+                 __entry->hrtimer, __entry->function,
                  (unsigned long long)ktime_to_ns((ktime_t) {
                                  .tv64 = __entry->expires }),
                  (unsigned long long)ktime_to_ns((ktime_t) {
@@ -206,23 +206,22 @@ TRACE_EVENT(hrtimer_start,
  */
 TRACE_EVENT(hrtimer_expire_entry,
 
-       TP_PROTO(struct hrtimer *timer, ktime_t *now),
+       TP_PROTO(struct hrtimer *hrtimer, ktime_t *now),
 
-       TP_ARGS(timer, now),
+       TP_ARGS(hrtimer, now),
 
        TP_STRUCT__entry(
-               __field( void *,        timer   )
+               __field( void *,        hrtimer )
                __field( s64,           now     )
        ),
 
        TP_fast_assign(
-               __entry->timer  = timer;
-               __entry->now    = now->tv64;
+               __entry->hrtimer        = hrtimer;
+               __entry->now            = now->tv64;
        ),
 
-       TP_printk("hrtimer %p, now %llu", __entry->timer,
-                 (unsigned long long)ktime_to_ns((ktime_t) {
-                                 .tv64 = __entry->now }))
+       TP_printk("hrtimer=%p now=%llu", __entry->hrtimer,
+                 (unsigned long long)ktime_to_ns((ktime_t) { .tv64 = __entry->now }))
  );
 
 /**
@@ -234,40 +233,40 @@ TRACE_EVENT(hrtimer_expire_entry,
  */
 TRACE_EVENT(hrtimer_expire_exit,
 
-       TP_PROTO(struct hrtimer *timer),
+       TP_PROTO(struct hrtimer *hrtimer),
 
-       TP_ARGS(timer),
+       TP_ARGS(hrtimer),
 
        TP_STRUCT__entry(
-               __field( void *,        timer   )
+               __field( void *,        hrtimer )
        ),
 
        TP_fast_assign(
-               __entry->timer  = timer;
+               __entry->hrtimer        = hrtimer;
        ),
 
-       TP_printk("hrtimer %p", __entry->timer)
+       TP_printk("hrtimer=%p", __entry->hrtimer)
 );
 
 /**
  * hrtimer_cancel - called when the hrtimer is canceled
- * @timer:     pointer to struct hrtimer
+ * @hrtimer:   pointer to struct hrtimer
  */
 TRACE_EVENT(hrtimer_cancel,
 
-       TP_PROTO(struct hrtimer *timer),
+       TP_PROTO(struct hrtimer *hrtimer),
 
-       TP_ARGS(timer),
+       TP_ARGS(hrtimer),
 
        TP_STRUCT__entry(
-               __field( void *,        timer   )
+               __field( void *,        hrtimer )
        ),
 
        TP_fast_assign(
-               __entry->timer  = timer;
+               __entry->hrtimer        = hrtimer;
        ),
 
-       TP_printk("hrtimer %p", __entry->timer)
+       TP_printk("hrtimer=%p", __entry->hrtimer)
 );
 
 /**
@@ -302,7 +301,7 @@ TRACE_EVENT(itimer_state,
                __entry->interval_usec  = value->it_interval.tv_usec;
        ),
 
-       TP_printk("which %d, expires %lu, it_value %lu.%lu, it_interval %lu.%lu",
+       TP_printk("which=%d expires=%lu it_value=%lu.%lu it_interval=%lu.%lu",
                  __entry->which, __entry->expires,
                  __entry->value_sec, __entry->value_usec,
                  __entry->interval_sec, __entry->interval_usec)
@@ -332,7 +331,7 @@ TRACE_EVENT(itimer_expire,
                __entry->pid    = pid_nr(pid);
        ),
 
-           TP_printk("which %d, pid %d, now %lu", __entry->which,
+           TP_printk("which=%d pid=%d now=%lu", __entry->which,
                      (int) __entry->pid, __entry->now)
 );
 
index cc0d966..c9bbcab 100644 (file)
 #undef __field
 #define __field(type, item)                                    \
        ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t"      \
-                              "offset:%u;\tsize:%u;\n",                \
+                              "offset:%u;\tsize:%u;\tsigned:%u;\n",    \
                               (unsigned int)offsetof(typeof(field), item), \
-                              (unsigned int)sizeof(field.item));       \
+                              (unsigned int)sizeof(field.item),        \
+                              (unsigned int)is_signed_type(type));     \
        if (!ret)                                                       \
                return 0;
 
 #undef __array
 #define __array(type, item, len)                                               \
        ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t"    \
-                              "offset:%u;\tsize:%u;\n",                \
+                              "offset:%u;\tsize:%u;\tsigned:%u;\n",    \
                               (unsigned int)offsetof(typeof(field), item), \
-                              (unsigned int)sizeof(field.item));       \
+                              (unsigned int)sizeof(field.item),        \
+                              (unsigned int)is_signed_type(type));     \
        if (!ret)                                                       \
                return 0;
 
 #undef __dynamic_array
 #define __dynamic_array(type, item, len)                                      \
        ret = trace_seq_printf(s, "\tfield:__data_loc " #type "[] " #item ";\t"\
-                              "offset:%u;\tsize:%u;\n",                       \
+                              "offset:%u;\tsize:%u;\tsigned:%u;\n",           \
                               (unsigned int)offsetof(typeof(field),           \
                                        __data_loc_##item),                    \
-                              (unsigned int)sizeof(field.__data_loc_##item)); \
+                              (unsigned int)sizeof(field.__data_loc_##item), \
+                              (unsigned int)is_signed_type(type));     \
        if (!ret)                                                              \
                return 0;
 
index 5dc283b..e972f0a 100644 (file)
@@ -33,7 +33,7 @@ struct syscall_metadata {
 };
 
 #ifdef CONFIG_FTRACE_SYSCALLS
-extern struct syscall_metadata *syscall_nr_to_meta(int nr);
+extern unsigned long arch_syscall_addr(int nr);
 extern int syscall_name_to_nr(char *name);
 void set_syscall_enter_id(int num, int id);
 void set_syscall_exit_id(int num, int id);
index 7f29643..a69d4ed 100644 (file)
@@ -28,6 +28,7 @@
 #include <linux/anon_inodes.h>
 #include <linux/kernel_stat.h>
 #include <linux/perf_event.h>
+#include <linux/ftrace_event.h>
 
 #include <asm/irq_regs.h>
 
@@ -1658,6 +1659,8 @@ static struct perf_event_context *find_get_context(pid_t pid, int cpu)
        return ERR_PTR(err);
 }
 
+static void perf_event_free_filter(struct perf_event *event);
+
 static void free_event_rcu(struct rcu_head *head)
 {
        struct perf_event *event;
@@ -1665,6 +1668,7 @@ static void free_event_rcu(struct rcu_head *head)
        event = container_of(head, struct perf_event, rcu_head);
        if (event->ns)
                put_pid_ns(event->ns);
+       perf_event_free_filter(event);
        kfree(event);
 }
 
@@ -1974,7 +1978,8 @@ unlock:
        return ret;
 }
 
-int perf_event_set_output(struct perf_event *event, int output_fd);
+static int perf_event_set_output(struct perf_event *event, int output_fd);
+static int perf_event_set_filter(struct perf_event *event, void __user *arg);
 
 static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 {
@@ -2002,6 +2007,9 @@ static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
        case PERF_EVENT_IOC_SET_OUTPUT:
                return perf_event_set_output(event, arg);
 
+       case PERF_EVENT_IOC_SET_FILTER:
+               return perf_event_set_filter(event, (void __user *)arg);
+
        default:
                return -ENOTTY;
        }
@@ -3806,9 +3814,14 @@ static int perf_swevent_is_counting(struct perf_event *event)
        return 1;
 }
 
+static int perf_tp_event_match(struct perf_event *event,
+                               struct perf_sample_data *data);
+
 static int perf_swevent_match(struct perf_event *event,
                                enum perf_type_id type,
-                               u32 event_id, struct pt_regs *regs)
+                               u32 event_id,
+                               struct perf_sample_data *data,
+                               struct pt_regs *regs)
 {
        if (!perf_swevent_is_counting(event))
                return 0;
@@ -3826,6 +3839,10 @@ static int perf_swevent_match(struct perf_event *event,
                        return 0;
        }
 
+       if (event->attr.type == PERF_TYPE_TRACEPOINT &&
+           !perf_tp_event_match(event, data))
+               return 0;
+
        return 1;
 }
 
@@ -3842,7 +3859,7 @@ static void perf_swevent_ctx_event(struct perf_event_context *ctx,
 
        rcu_read_lock();
        list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
-               if (perf_swevent_match(event, type, event_id, regs))
+               if (perf_swevent_match(event, type, event_id, data, regs))
                        perf_swevent_add(event, nr, nmi, data, regs);
        }
        rcu_read_unlock();
@@ -4108,6 +4125,7 @@ static const struct pmu perf_ops_task_clock = {
 };
 
 #ifdef CONFIG_EVENT_PROFILE
+
 void perf_tp_event(int event_id, u64 addr, u64 count, void *record,
                          int entry_size)
 {
@@ -4131,8 +4149,15 @@ void perf_tp_event(int event_id, u64 addr, u64 count, void *record,
 }
 EXPORT_SYMBOL_GPL(perf_tp_event);
 
-extern int ftrace_profile_enable(int);
-extern void ftrace_profile_disable(int);
+static int perf_tp_event_match(struct perf_event *event,
+                               struct perf_sample_data *data)
+{
+       void *record = data->raw->data;
+
+       if (likely(!event->filter) || filter_match_preds(event->filter, record))
+               return 1;
+       return 0;
+}
 
 static void tp_perf_event_destroy(struct perf_event *event)
 {
@@ -4157,12 +4182,53 @@ static const struct pmu *tp_perf_event_init(struct perf_event *event)
 
        return &perf_ops_generic;
 }
+
+static int perf_event_set_filter(struct perf_event *event, void __user *arg)
+{
+       char *filter_str;
+       int ret;
+
+       if (event->attr.type != PERF_TYPE_TRACEPOINT)
+               return -EINVAL;
+
+       filter_str = strndup_user(arg, PAGE_SIZE);
+       if (IS_ERR(filter_str))
+               return PTR_ERR(filter_str);
+
+       ret = ftrace_profile_set_filter(event, event->attr.config, filter_str);
+
+       kfree(filter_str);
+       return ret;
+}
+
+static void perf_event_free_filter(struct perf_event *event)
+{
+       ftrace_profile_free_filter(event);
+}
+
 #else
+
+static int perf_tp_event_match(struct perf_event *event,
+                               struct perf_sample_data *data)
+{
+       return 1;
+}
+
 static const struct pmu *tp_perf_event_init(struct perf_event *event)
 {
        return NULL;
 }
-#endif
+
+static int perf_event_set_filter(struct perf_event *event, void __user *arg)
+{
+       return -ENOENT;
+}
+
+static void perf_event_free_filter(struct perf_event *event)
+{
+}
+
+#endif /* CONFIG_EVENT_PROFILE */
 
 atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX];
 
@@ -4416,7 +4482,7 @@ err_size:
        goto out;
 }
 
-int perf_event_set_output(struct perf_event *event, int output_fd)
+static int perf_event_set_output(struct perf_event *event, int output_fd)
 {
        struct perf_event *output_event = NULL;
        struct file *output_file = NULL;
index 9c451a1..1ed514f 100644 (file)
@@ -60,6 +60,13 @@ static int last_ftrace_enabled;
 /* Quick disabling of function tracer. */
 int function_trace_stop;
 
+/* List for set_ftrace_pid's pids. */
+LIST_HEAD(ftrace_pids);
+struct ftrace_pid {
+       struct list_head list;
+       struct pid *pid;
+};
+
 /*
  * ftrace_disabled is set when an anomaly is discovered.
  * ftrace_disabled is much stronger than ftrace_enabled.
@@ -78,6 +85,10 @@ ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub;
 ftrace_func_t __ftrace_trace_function __read_mostly = ftrace_stub;
 ftrace_func_t ftrace_pid_function __read_mostly = ftrace_stub;
 
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+static int ftrace_set_func(unsigned long *array, int *idx, char *buffer);
+#endif
+
 static void ftrace_list_func(unsigned long ip, unsigned long parent_ip)
 {
        struct ftrace_ops *op = ftrace_list;
@@ -155,7 +166,7 @@ static int __register_ftrace_function(struct ftrace_ops *ops)
                else
                        func = ftrace_list_func;
 
-               if (ftrace_pid_trace) {
+               if (!list_empty(&ftrace_pids)) {
                        set_ftrace_pid_function(func);
                        func = ftrace_pid_func;
                }
@@ -203,7 +214,7 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops)
                if (ftrace_list->next == &ftrace_list_end) {
                        ftrace_func_t func = ftrace_list->func;
 
-                       if (ftrace_pid_trace) {
+                       if (!list_empty(&ftrace_pids)) {
                                set_ftrace_pid_function(func);
                                func = ftrace_pid_func;
                        }
@@ -231,7 +242,7 @@ static void ftrace_update_pid_func(void)
        func = __ftrace_trace_function;
 #endif
 
-       if (ftrace_pid_trace) {
+       if (!list_empty(&ftrace_pids)) {
                set_ftrace_pid_function(func);
                func = ftrace_pid_func;
        } else {
@@ -821,8 +832,6 @@ static __init void ftrace_profile_debugfs(struct dentry *d_tracer)
 }
 #endif /* CONFIG_FUNCTION_PROFILER */
 
-/* set when tracing only a pid */
-struct pid *ftrace_pid_trace;
 static struct pid * const ftrace_swapper_pid = &init_struct_pid;
 
 #ifdef CONFIG_DYNAMIC_FTRACE
@@ -1261,12 +1270,34 @@ static int ftrace_update_code(struct module *mod)
                ftrace_new_addrs = p->newlist;
                p->flags = 0L;
 
-               /* convert record (i.e, patch mcount-call with NOP) */
-               if (ftrace_code_disable(mod, p)) {
-                       p->flags |= FTRACE_FL_CONVERTED;
-                       ftrace_update_cnt++;
-               } else
+               /*
+                * Do the initial record convertion from mcount jump
+                * to the NOP instructions.
+                */
+               if (!ftrace_code_disable(mod, p)) {
                        ftrace_free_rec(p);
+                       continue;
+               }
+
+               p->flags |= FTRACE_FL_CONVERTED;
+               ftrace_update_cnt++;
+
+               /*
+                * If the tracing is enabled, go ahead and enable the record.
+                *
+                * The reason not to enable the record immediatelly is the
+                * inherent check of ftrace_make_nop/ftrace_make_call for
+                * correct previous instructions.  Making first the NOP
+                * conversion puts the module to the correct state, thus
+                * passing the ftrace_make_call check.
+                */
+               if (ftrace_start_up) {
+                       int failed = __ftrace_replace_code(p, 1);
+                       if (failed) {
+                               ftrace_bug(failed, p->ip);
+                               ftrace_free_rec(p);
+                       }
+               }
        }
 
        stop = ftrace_now(raw_smp_processor_id());
@@ -1656,60 +1687,6 @@ ftrace_regex_lseek(struct file *file, loff_t offset, int origin)
        return ret;
 }
 
-enum {
-       MATCH_FULL,
-       MATCH_FRONT_ONLY,
-       MATCH_MIDDLE_ONLY,
-       MATCH_END_ONLY,
-};
-
-/*
- * (static function - no need for kernel doc)
- *
- * Pass in a buffer containing a glob and this function will
- * set search to point to the search part of the buffer and
- * return the type of search it is (see enum above).
- * This does modify buff.
- *
- * Returns enum type.
- *  search returns the pointer to use for comparison.
- *  not returns 1 if buff started with a '!'
- *     0 otherwise.
- */
-static int
-ftrace_setup_glob(char *buff, int len, char **search, int *not)
-{
-       int type = MATCH_FULL;
-       int i;
-
-       if (buff[0] == '!') {
-               *not = 1;
-               buff++;
-               len--;
-       } else
-               *not = 0;
-
-       *search = buff;
-
-       for (i = 0; i < len; i++) {
-               if (buff[i] == '*') {
-                       if (!i) {
-                               *search = buff + 1;
-                               type = MATCH_END_ONLY;
-                       } else {
-                               if (type == MATCH_END_ONLY)
-                                       type = MATCH_MIDDLE_ONLY;
-                               else
-                                       type = MATCH_FRONT_ONLY;
-                               buff[i] = 0;
-                               break;
-                       }
-               }
-       }
-
-       return type;
-}
-
 static int ftrace_match(char *str, char *regex, int len, int type)
 {
        int matched = 0;
@@ -1758,7 +1735,7 @@ static void ftrace_match_records(char *buff, int len, int enable)
        int not;
 
        flag = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE;
-       type = ftrace_setup_glob(buff, len, &search, &not);
+       type = filter_parse_regex(buff, len, &search, &not);
 
        search_len = strlen(search);
 
@@ -1826,7 +1803,7 @@ static void ftrace_match_module_records(char *buff, char *mod, int enable)
        }
 
        if (strlen(buff)) {
-               type = ftrace_setup_glob(buff, strlen(buff), &search, &not);
+               type = filter_parse_regex(buff, strlen(buff), &search, &not);
                search_len = strlen(search);
        }
 
@@ -1991,7 +1968,7 @@ register_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,
        int count = 0;
        char *search;
 
-       type = ftrace_setup_glob(glob, strlen(glob), &search, &not);
+       type = filter_parse_regex(glob, strlen(glob), &search, &not);
        len = strlen(search);
 
        /* we do not support '!' for function probes */
@@ -2068,7 +2045,7 @@ __unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,
        else if (glob) {
                int not;
 
-               type = ftrace_setup_glob(glob, strlen(glob), &search, &not);
+               type = filter_parse_regex(glob, strlen(glob), &search, &not);
                len = strlen(search);
 
                /* we do not support '!' for function probes */
@@ -2297,6 +2274,7 @@ void ftrace_set_notrace(unsigned char *buf, int len, int reset)
 #define FTRACE_FILTER_SIZE             COMMAND_LINE_SIZE
 static char ftrace_notrace_buf[FTRACE_FILTER_SIZE] __initdata;
 static char ftrace_filter_buf[FTRACE_FILTER_SIZE] __initdata;
+static char ftrace_graph_buf[FTRACE_FILTER_SIZE] __initdata;
 
 static int __init set_ftrace_notrace(char *str)
 {
@@ -2312,6 +2290,31 @@ static int __init set_ftrace_filter(char *str)
 }
 __setup("ftrace_filter=", set_ftrace_filter);
 
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+static int __init set_graph_function(char *str)
+{
+       strlcpy(ftrace_graph_buf, str, FTRACE_FILTER_SIZE);
+       return 1;
+}
+__setup("ftrace_graph_filter=", set_graph_function);
+
+static void __init set_ftrace_early_graph(char *buf)
+{
+       int ret;
+       char *func;
+
+       while (buf) {
+               func = strsep(&buf, ",");
+               /* we allow only one expression at a time */
+               ret = ftrace_set_func(ftrace_graph_funcs, &ftrace_graph_count,
+                                     func);
+               if (ret)
+                       printk(KERN_DEBUG "ftrace: function %s not "
+                                         "traceable\n", func);
+       }
+}
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
+
 static void __init set_ftrace_early_filter(char *buf, int enable)
 {
        char *func;
@@ -2328,6 +2331,10 @@ static void __init set_ftrace_early_filters(void)
                set_ftrace_early_filter(ftrace_filter_buf, 1);
        if (ftrace_notrace_buf[0])
                set_ftrace_early_filter(ftrace_notrace_buf, 0);
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+       if (ftrace_graph_buf[0])
+               set_ftrace_early_graph(ftrace_graph_buf);
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
 }
 
 static int
@@ -2513,7 +2520,7 @@ ftrace_set_func(unsigned long *array, int *idx, char *buffer)
                return -ENODEV;
 
        /* decode regex */
-       type = ftrace_setup_glob(buffer, strlen(buffer), &search, &not);
+       type = filter_parse_regex(buffer, strlen(buffer), &search, &not);
        if (not)
                return -EINVAL;
 
@@ -2624,7 +2631,7 @@ static __init int ftrace_init_dyn_debugfs(struct dentry *d_tracer)
        return 0;
 }
 
-static int ftrace_convert_nops(struct module *mod,
+static int ftrace_process_locs(struct module *mod,
                               unsigned long *start,
                               unsigned long *end)
 {
@@ -2684,7 +2691,7 @@ static void ftrace_init_module(struct module *mod,
 {
        if (ftrace_disabled || start == end)
                return;
-       ftrace_convert_nops(mod, start, end);
+       ftrace_process_locs(mod, start, end);
 }
 
 static int ftrace_module_notify(struct notifier_block *self,
@@ -2745,7 +2752,7 @@ void __init ftrace_init(void)
 
        last_ftrace_enabled = ftrace_enabled = 1;
 
-       ret = ftrace_convert_nops(NULL,
+       ret = ftrace_process_locs(NULL,
                                  __start_mcount_loc,
                                  __stop_mcount_loc);
 
@@ -2778,23 +2785,6 @@ static inline void ftrace_startup_enable(int command) { }
 # define ftrace_shutdown_sysctl()      do { } while (0)
 #endif /* CONFIG_DYNAMIC_FTRACE */
 
-static ssize_t
-ftrace_pid_read(struct file *file, char __user *ubuf,
-                      size_t cnt, loff_t *ppos)
-{
-       char buf[64];
-       int r;
-
-       if (ftrace_pid_trace == ftrace_swapper_pid)
-               r = sprintf(buf, "swapper tasks\n");
-       else if (ftrace_pid_trace)
-               r = sprintf(buf, "%u\n", pid_vnr(ftrace_pid_trace));
-       else
-               r = sprintf(buf, "no pid\n");
-
-       return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
-}
-
 static void clear_ftrace_swapper(void)
 {
        struct task_struct *p;
@@ -2845,14 +2835,12 @@ static void set_ftrace_pid(struct pid *pid)
        rcu_read_unlock();
 }
 
-static void clear_ftrace_pid_task(struct pid **pid)
+static void clear_ftrace_pid_task(struct pid *pid)
 {
-       if (*pid == ftrace_swapper_pid)
+       if (pid == ftrace_swapper_pid)
                clear_ftrace_swapper();
        else
-               clear_ftrace_pid(*pid);
-
-       *pid = NULL;
+               clear_ftrace_pid(pid);
 }
 
 static void set_ftrace_pid_task(struct pid *pid)
@@ -2863,11 +2851,140 @@ static void set_ftrace_pid_task(struct pid *pid)
                set_ftrace_pid(pid);
 }
 
+static int ftrace_pid_add(int p)
+{
+       struct pid *pid;
+       struct ftrace_pid *fpid;
+       int ret = -EINVAL;
+
+       mutex_lock(&ftrace_lock);
+
+       if (!p)
+               pid = ftrace_swapper_pid;
+       else
+               pid = find_get_pid(p);
+
+       if (!pid)
+               goto out;
+
+       ret = 0;
+
+       list_for_each_entry(fpid, &ftrace_pids, list)
+               if (fpid->pid == pid)
+                       goto out_put;
+
+       ret = -ENOMEM;
+
+       fpid = kmalloc(sizeof(*fpid), GFP_KERNEL);
+       if (!fpid)
+               goto out_put;
+
+       list_add(&fpid->list, &ftrace_pids);
+       fpid->pid = pid;
+
+       set_ftrace_pid_task(pid);
+
+       ftrace_update_pid_func();
+       ftrace_startup_enable(0);
+
+       mutex_unlock(&ftrace_lock);
+       return 0;
+
+out_put:
+       if (pid != ftrace_swapper_pid)
+               put_pid(pid);
+
+out:
+       mutex_unlock(&ftrace_lock);
+       return ret;
+}
+
+static void ftrace_pid_reset(void)
+{
+       struct ftrace_pid *fpid, *safe;
+
+       mutex_lock(&ftrace_lock);
+       list_for_each_entry_safe(fpid, safe, &ftrace_pids, list) {
+               struct pid *pid = fpid->pid;
+
+               clear_ftrace_pid_task(pid);
+
+               list_del(&fpid->list);
+               kfree(fpid);
+       }
+
+       ftrace_update_pid_func();
+       ftrace_startup_enable(0);
+
+       mutex_unlock(&ftrace_lock);
+}
+
+static void *fpid_start(struct seq_file *m, loff_t *pos)
+{
+       mutex_lock(&ftrace_lock);
+
+       if (list_empty(&ftrace_pids) && (!*pos))
+               return (void *) 1;
+
+       return seq_list_start(&ftrace_pids, *pos);
+}
+
+static void *fpid_next(struct seq_file *m, void *v, loff_t *pos)
+{
+       if (v == (void *)1)
+               return NULL;
+
+       return seq_list_next(v, &ftrace_pids, pos);
+}
+
+static void fpid_stop(struct seq_file *m, void *p)
+{
+       mutex_unlock(&ftrace_lock);
+}
+
+static int fpid_show(struct seq_file *m, void *v)
+{
+       const struct ftrace_pid *fpid = list_entry(v, struct ftrace_pid, list);
+
+       if (v == (void *)1) {
+               seq_printf(m, "no pid\n");
+               return 0;
+       }
+
+       if (fpid->pid == ftrace_swapper_pid)
+               seq_printf(m, "swapper tasks\n");
+       else
+               seq_printf(m, "%u\n", pid_vnr(fpid->pid));
+
+       return 0;
+}
+
+static const struct seq_operations ftrace_pid_sops = {
+       .start = fpid_start,
+       .next = fpid_next,
+       .stop = fpid_stop,
+       .show = fpid_show,
+};
+
+static int
+ftrace_pid_open(struct inode *inode, struct file *file)
+{
+       int ret = 0;
+
+       if ((file->f_mode & FMODE_WRITE) &&
+           (file->f_flags & O_TRUNC))
+               ftrace_pid_reset();
+
+       if (file->f_mode & FMODE_READ)
+               ret = seq_open(file, &ftrace_pid_sops);
+
+       return ret;
+}
+
 static ssize_t
 ftrace_pid_write(struct file *filp, const char __user *ubuf,
                   size_t cnt, loff_t *ppos)
 {
-       struct pid *pid;
        char buf[64];
        long val;
        int ret;
@@ -2880,57 +2997,38 @@ ftrace_pid_write(struct file *filp, const char __user *ubuf,
 
        buf[cnt] = 0;
 
+       /*
+        * Allow "echo > set_ftrace_pid" or "echo -n '' > set_ftrace_pid"
+        * to clean the filter quietly.
+        */
+       strstrip(buf);
+       if (strlen(buf) == 0)
+               return 1;
+
        ret = strict_strtol(buf, 10, &val);
        if (ret < 0)
                return ret;
 
-       mutex_lock(&ftrace_lock);
-       if (val < 0) {
-               /* disable pid tracing */
-               if (!ftrace_pid_trace)
-                       goto out;
-
-               clear_ftrace_pid_task(&ftrace_pid_trace);
-
-       } else {
-               /* swapper task is special */
-               if (!val) {
-                       pid = ftrace_swapper_pid;
-                       if (pid == ftrace_pid_trace)
-                               goto out;
-               } else {
-                       pid = find_get_pid(val);
+       ret = ftrace_pid_add(val);
 
-                       if (pid == ftrace_pid_trace) {
-                               put_pid(pid);
-                               goto out;
-                       }
-               }
-
-               if (ftrace_pid_trace)
-                       clear_ftrace_pid_task(&ftrace_pid_trace);
-
-               if (!pid)
-                       goto out;
-
-               ftrace_pid_trace = pid;
-
-               set_ftrace_pid_task(ftrace_pid_trace);
-       }
-
-       /* update the function call */
-       ftrace_update_pid_func();
-       ftrace_startup_enable(0);
+       return ret ? ret : cnt;
+}
 
- out:
-       mutex_unlock(&ftrace_lock);
+static int
+ftrace_pid_release(struct inode *inode, struct file *file)
+{
+       if (file->f_mode & FMODE_READ)
+               seq_release(inode, file);
 
-       return cnt;
+       return 0;
 }
 
 static const struct file_operations ftrace_pid_fops = {
-       .read = ftrace_pid_read,
-       .write = ftrace_pid_write,
+       .open           = ftrace_pid_open,
+       .write          = ftrace_pid_write,
+       .read           = seq_read,
+       .llseek         = seq_lseek,
+       .release        = ftrace_pid_release,
 };
 
 static __init int ftrace_init_debugfs(void)
index 3ffa502..63446f1 100644 (file)
@@ -397,18 +397,21 @@ int ring_buffer_print_page_header(struct trace_seq *s)
        int ret;
 
        ret = trace_seq_printf(s, "\tfield: u64 timestamp;\t"
-                              "offset:0;\tsize:%u;\n",
-                              (unsigned int)sizeof(field.time_stamp));
+                              "offset:0;\tsize:%u;\tsigned:%u;\n",
+                              (unsigned int)sizeof(field.time_stamp),
+                              (unsigned int)is_signed_type(u64));
 
        ret = trace_seq_printf(s, "\tfield: local_t commit;\t"
-                              "offset:%u;\tsize:%u;\n",
+                              "offset:%u;\tsize:%u;\tsigned:%u;\n",
                               (unsigned int)offsetof(typeof(field), commit),
-                              (unsigned int)sizeof(field.commit));
+                              (unsigned int)sizeof(field.commit),
+                              (unsigned int)is_signed_type(long));
 
        ret = trace_seq_printf(s, "\tfield: char data;\t"
-                              "offset:%u;\tsize:%u;\n",
+                              "offset:%u;\tsize:%u;\tsigned:%u;\n",
                               (unsigned int)offsetof(typeof(field), data),
-                              (unsigned int)BUF_PAGE_SIZE);
+                              (unsigned int)BUF_PAGE_SIZE,
+                              (unsigned int)is_signed_type(char));
 
        return ret;
 }
index b20d3ec..9d3067a 100644 (file)
@@ -129,7 +129,7 @@ static int tracing_set_tracer(const char *buf);
 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
 static char *default_bootup_tracer;
 
-static int __init set_ftrace(char *str)
+static int __init set_cmdline_ftrace(char *str)
 {
        strncpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
        default_bootup_tracer = bootup_tracer_buf;
@@ -137,7 +137,7 @@ static int __init set_ftrace(char *str)
        ring_buffer_expanded = 1;
        return 1;
 }
-__setup("ftrace=", set_ftrace);
+__setup("ftrace=", set_cmdline_ftrace);
 
 static int __init set_ftrace_dump_on_oops(char *str)
 {
index 405cb85..4959ada 100644 (file)
@@ -483,10 +483,6 @@ static inline int ftrace_graph_addr(unsigned long addr)
        return 0;
 }
 #else
-static inline int ftrace_trace_addr(unsigned long addr)
-{
-       return 1;
-}
 static inline int ftrace_graph_addr(unsigned long addr)
 {
        return 1;
@@ -500,12 +496,12 @@ print_graph_function(struct trace_iterator *iter)
 }
 #endif /* CONFIG_FUNCTION_GRAPH_TRACER */
 
-extern struct pid *ftrace_pid_trace;
+extern struct list_head ftrace_pids;
 
 #ifdef CONFIG_FUNCTION_TRACER
 static inline int ftrace_trace_task(struct task_struct *task)
 {
-       if (!ftrace_pid_trace)
+       if (list_empty(&ftrace_pids))
                return 1;
 
        return test_tsk_trace_trace(task);
@@ -687,7 +683,6 @@ struct event_filter {
        int                     n_preds;
        struct filter_pred      **preds;
        char                    *filter_string;
-       bool                    no_reset;
 };
 
 struct event_subsystem {
@@ -699,22 +694,40 @@ struct event_subsystem {
 };
 
 struct filter_pred;
+struct regex;
 
 typedef int (*filter_pred_fn_t) (struct filter_pred *pred, void *event,
                                 int val1, int val2);
 
+typedef int (*regex_match_func)(char *str, struct regex *r, int len);
+
+enum regex_type {
+       MATCH_FULL = 0,
+       MATCH_FRONT_ONLY,
+       MATCH_MIDDLE_ONLY,
+       MATCH_END_ONLY,
+};
+
+struct regex {
+       char                    pattern[MAX_FILTER_STR_VAL];
+       int                     len;
+       int                     field_len;
+       regex_match_func        match;
+};
+
 struct filter_pred {
-       filter_pred_fn_t fn;
-       u64 val;
-       char str_val[MAX_FILTER_STR_VAL];
-       int str_len;
-       char *field_name;
-       int offset;
-       int not;
-       int op;
-       int pop_n;
+       filter_pred_fn_t        fn;
+       u64                     val;
+       struct regex            regex;
+       char                    *field_name;
+       int                     offset;
+       int                     not;
+       int                     op;
+       int                     pop_n;
 };
 
+extern enum regex_type
+filter_parse_regex(char *buff, int len, char **search, int *not);
 extern void print_event_filter(struct ftrace_event_call *call,
                               struct trace_seq *s);
 extern int apply_event_filter(struct ftrace_event_call *call,
@@ -730,7 +743,8 @@ filter_check_discard(struct ftrace_event_call *call, void *rec,
                     struct ring_buffer *buffer,
                     struct ring_buffer_event *event)
 {
-       if (unlikely(call->filter_active) && !filter_match_preds(call, rec)) {
+       if (unlikely(call->filter_active) &&
+           !filter_match_preds(call->filter, rec)) {
                ring_buffer_discard_commit(buffer, event);
                return 1;
        }
index d128f65..7c18d15 100644 (file)
@@ -507,7 +507,7 @@ extern char *__bad_type_size(void);
 #define FIELD(type, name)                                              \
        sizeof(type) != sizeof(field.name) ? __bad_type_size() :        \
        #type, "common_" #name, offsetof(typeof(field), name),          \
-               sizeof(field.name)
+               sizeof(field.name), is_signed_type(type)
 
 static int trace_write_header(struct trace_seq *s)
 {
@@ -515,17 +515,17 @@ static int trace_write_header(struct trace_seq *s)
 
        /* struct trace_entry */
        return trace_seq_printf(s,
-                               "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
-                               "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
-                               "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
-                               "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
-                               "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
-                               "\n",
-                               FIELD(unsigned short, type),
-                               FIELD(unsigned char, flags),
-                               FIELD(unsigned char, preempt_count),
-                               FIELD(int, pid),
-                               FIELD(int, lock_depth));
+                       "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n"
+                       "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n"
+                       "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n"
+                       "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n"
+                       "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n"
+                       "\n",
+                       FIELD(unsigned short, type),
+                       FIELD(unsigned char, flags),
+                       FIELD(unsigned char, preempt_count),
+                       FIELD(int, pid),
+                       FIELD(int, lock_depth));
 }
 
 static ssize_t
@@ -878,9 +878,9 @@ event_subsystem_dir(const char *name, struct dentry *d_events)
                           "'%s/filter' entry\n", name);
        }
 
-       entry = trace_create_file("enable", 0644, system->entry,
-                                 (void *)system->name,
-                                 &ftrace_system_enable_fops);
+       trace_create_file("enable", 0644, system->entry,
+                         (void *)system->name,
+                         &ftrace_system_enable_fops);
 
        return system->entry;
 }
@@ -892,7 +892,6 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events,
                 const struct file_operations *filter,
                 const struct file_operations *format)
 {
-       struct dentry *entry;
        int ret;
 
        /*
@@ -910,12 +909,12 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events,
        }
 
        if (call->regfunc)
-               entry = trace_create_file("enable", 0644, call->dir, call,
-                                         enable);
+               trace_create_file("enable", 0644, call->dir, call,
+                                 enable);
 
        if (call->id && call->profile_enable)
-               entry = trace_create_file("id", 0444, call->dir, call,
-                                         id);
+               trace_create_file("id", 0444, call->dir, call,
+                                 id);
 
        if (call->define_fields) {
                ret = call->define_fields(call);
@@ -924,16 +923,16 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events,
                                   " events/%s\n", call->name);
                        return ret;
                }
-               entry = trace_create_file("filter", 0644, call->dir, call,
-                                         filter);
+               trace_create_file("filter", 0644, call->dir, call,
+                                 filter);
        }
 
        /* A trace may not want to export its format */
        if (!call->show_format)
                return 0;
 
-       entry = trace_create_file("format", 0444, call->dir, call,
-                                 format);
+       trace_create_file("format", 0444, call->dir, call,
+                         format);
 
        return 0;
 }
index 98a6cc5..50504cb 100644 (file)
  * Copyright (C) 2009 Tom Zanussi <tzanussi@gmail.com>
  */
 
-#include <linux/debugfs.h>
-#include <linux/uaccess.h>
 #include <linux/module.h>
 #include <linux/ctype.h>
 #include <linux/mutex.h>
+#include <linux/perf_event.h>
 
 #include "trace.h"
 #include "trace_output.h"
@@ -31,6 +30,7 @@ enum filter_op_ids
 {
        OP_OR,
        OP_AND,
+       OP_GLOB,
        OP_NE,
        OP_EQ,
        OP_LT,
@@ -48,16 +48,17 @@ struct filter_op {
 };
 
 static struct filter_op filter_ops[] = {
-       { OP_OR, "||", 1 },
-       { OP_AND, "&&", 2 },
-       { OP_NE, "!=", 4 },
-       { OP_EQ, "==", 4 },
-       { OP_LT, "<", 5 },
-       { OP_LE, "<=", 5 },
-       { OP_GT, ">", 5 },
-       { OP_GE, ">=", 5 },
-       { OP_NONE, "OP_NONE", 0 },
-       { OP_OPEN_PAREN, "(", 0 },
+       { OP_OR,        "||",           1 },
+       { OP_AND,       "&&",           2 },
+       { OP_GLOB,      "~",            4 },
+       { OP_NE,        "!=",           4 },
+       { OP_EQ,        "==",           4 },
+       { OP_LT,        "<",            5 },
+       { OP_LE,        "<=",           5 },
+       { OP_GT,        ">",            5 },
+       { OP_GE,        ">=",           5 },
+       { OP_NONE,      "OP_NONE",      0 },
+       { OP_OPEN_PAREN, "(",           0 },
 };
 
 enum {
@@ -197,9 +198,9 @@ static int filter_pred_string(struct filter_pred *pred, void *event,
        char *addr = (char *)(event + pred->offset);
        int cmp, match;
 
-       cmp = strncmp(addr, pred->str_val, pred->str_len);
+       cmp = pred->regex.match(addr, &pred->regex, pred->regex.field_len);
 
-       match = (!cmp) ^ pred->not;
+       match = cmp ^ pred->not;
 
        return match;
 }
@@ -211,9 +212,9 @@ static int filter_pred_pchar(struct filter_pred *pred, void *event,
        char **addr = (char **)(event + pred->offset);
        int cmp, match;
 
-       cmp = strncmp(*addr, pred->str_val, pred->str_len);
+       cmp = pred->regex.match(*addr, &pred->regex, pred->regex.field_len);
 
-       match = (!cmp) ^ pred->not;
+       match = cmp ^ pred->not;
 
        return match;
 }
@@ -237,9 +238,9 @@ static int filter_pred_strloc(struct filter_pred *pred, void *event,
        char *addr = (char *)(event + str_loc);
        int cmp, match;
 
-       cmp = strncmp(addr, pred->str_val, str_len);
+       cmp = pred->regex.match(addr, &pred->regex, str_len);
 
-       match = (!cmp) ^ pred->not;
+       match = cmp ^ pred->not;
 
        return match;
 }
@@ -250,10 +251,121 @@ static int filter_pred_none(struct filter_pred *pred, void *event,
        return 0;
 }
 
+/* Basic regex callbacks */
+static int regex_match_full(char *str, struct regex *r, int len)
+{
+       if (strncmp(str, r->pattern, len) == 0)
+               return 1;
+       return 0;
+}
+
+static int regex_match_front(char *str, struct regex *r, int len)
+{
+       if (strncmp(str, r->pattern, len) == 0)
+               return 1;
+       return 0;
+}
+
+static int regex_match_middle(char *str, struct regex *r, int len)
+{
+       if (strstr(str, r->pattern))
+               return 1;
+       return 0;
+}
+
+static int regex_match_end(char *str, struct regex *r, int len)
+{
+       char *ptr = strstr(str, r->pattern);
+
+       if (ptr && (ptr[r->len] == 0))
+               return 1;
+       return 0;
+}
+
+/**
+ * filter_parse_regex - parse a basic regex
+ * @buff:   the raw regex
+ * @len:    length of the regex
+ * @search: will point to the beginning of the string to compare
+ * @not:    tell whether the match will have to be inverted
+ *
+ * This passes in a buffer containing a regex and this function will
+ * set search to point to the search part of the buffer and
+ * return the type of search it is (see enum above).
+ * This does modify buff.
+ *
+ * Returns enum type.
+ *  search returns the pointer to use for comparison.
+ *  not returns 1 if buff started with a '!'
+ *     0 otherwise.
+ */
+enum regex_type filter_parse_regex(char *buff, int len, char **search, int *not)
+{
+       int type = MATCH_FULL;
+       int i;
+
+       if (buff[0] == '!') {
+               *not = 1;
+               buff++;
+               len--;
+       } else
+               *not = 0;
+
+       *search = buff;
+
+       for (i = 0; i < len; i++) {
+               if (buff[i] == '*') {
+                       if (!i) {
+                               *search = buff + 1;
+                               type = MATCH_END_ONLY;
+                       } else {
+                               if (type == MATCH_END_ONLY)
+                                       type = MATCH_MIDDLE_ONLY;
+                               else
+                                       type = MATCH_FRONT_ONLY;
+                               buff[i] = 0;
+                               break;
+                       }
+               }
+       }
+
+       return type;
+}
+
+static void filter_build_regex(struct filter_pred *pred)
+{
+       struct regex *r = &pred->regex;
+       char *search;
+       enum regex_type type = MATCH_FULL;
+       int not = 0;
+
+       if (pred->op == OP_GLOB) {
+               type = filter_parse_regex(r->pattern, r->len, &search, &not);
+               r->len = strlen(search);
+               memmove(r->pattern, search, r->len+1);
+       }
+
+       switch (type) {
+       case MATCH_FULL:
+               r->match = regex_match_full;
+               break;
+       case MATCH_FRONT_ONLY:
+               r->match = regex_match_front;
+               break;
+       case MATCH_MIDDLE_ONLY:
+               r->match = regex_match_middle;
+               break;
+       case MATCH_END_ONLY:
+               r->match = regex_match_end;
+               break;
+       }
+
+       pred->not ^= not;
+}
+
 /* return 1 if event matches, 0 otherwise (discard) */
-int filter_match_preds(struct ftrace_event_call *call, void *rec)
+int filter_match_preds(struct event_filter *filter, void *rec)
 {
-       struct event_filter *filter = call->filter;
        int match, top = 0, val1 = 0, val2 = 0;
        int stack[MAX_FILTER_PRED];
        struct filter_pred *pred;
@@ -396,7 +508,7 @@ static void filter_clear_pred(struct filter_pred *pred)
 {
        kfree(pred->field_name);
        pred->field_name = NULL;
-       pred->str_len = 0;
+       pred->regex.len = 0;
 }
 
 static int filter_set_pred(struct filter_pred *dest,
@@ -426,9 +538,8 @@ static void filter_disable_preds(struct ftrace_event_call *call)
                filter->preds[i]->fn = filter_pred_none;
 }
 
-void destroy_preds(struct ftrace_event_call *call)
+static void __free_preds(struct event_filter *filter)
 {
-       struct event_filter *filter = call->filter;
        int i;
 
        if (!filter)
@@ -441,21 +552,24 @@ void destroy_preds(struct ftrace_event_call *call)
        kfree(filter->preds);
        kfree(filter->filter_string);
        kfree(filter);
+}
+
+void destroy_preds(struct ftrace_event_call *call)
+{
+       __free_preds(call->filter);
        call->filter = NULL;
+       call->filter_active = 0;
 }
 
-static int init_preds(struct ftrace_event_call *call)
+static struct event_filter *__alloc_preds(void)
 {
        struct event_filter *filter;
        struct filter_pred *pred;
        int i;
 
-       if (call->filter)
-               return 0;
-
-       filter = call->filter = kzalloc(sizeof(*filter), GFP_KERNEL);
-       if (!call->filter)
-               return -ENOMEM;
+       filter = kzalloc(sizeof(*filter), GFP_KERNEL);
+       if (!filter)
+               return ERR_PTR(-ENOMEM);
 
        filter->n_preds = 0;
 
@@ -471,12 +585,24 @@ static int init_preds(struct ftrace_event_call *call)
                filter->preds[i] = pred;
        }
 
-       return 0;
+       return filter;
 
 oom:
-       destroy_preds(call);
+       __free_preds(filter);
+       return ERR_PTR(-ENOMEM);
+}
 
-       return -ENOMEM;
+static int init_preds(struct ftrace_event_call *call)
+{
+       if (call->filter)
+               return 0;
+
+       call->filter_active = 0;
+       call->filter = __alloc_preds();
+       if (IS_ERR(call->filter))
+               return PTR_ERR(call->filter);
+
+       return 0;
 }
 
 static int init_subsystem_preds(struct event_subsystem *system)
@@ -499,14 +625,7 @@ static int init_subsystem_preds(struct event_subsystem *system)
        return 0;
 }
 
-enum {
-       FILTER_DISABLE_ALL,
-       FILTER_INIT_NO_RESET,
-       FILTER_SKIP_NO_RESET,
-};
-
-static void filter_free_subsystem_preds(struct event_subsystem *system,
-                                       int flag)
+static void filter_free_subsystem_preds(struct event_subsystem *system)
 {
        struct ftrace_event_call *call;
 
@@ -517,14 +636,6 @@ static void filter_free_subsystem_preds(struct event_subsystem *system,
                if (strcmp(call->system, system->name) != 0)
                        continue;
 
-               if (flag == FILTER_INIT_NO_RESET) {
-                       call->filter->no_reset = false;
-                       continue;
-               }
-
-               if (flag == FILTER_SKIP_NO_RESET && call->filter->no_reset)
-                       continue;
-
                filter_disable_preds(call);
                remove_filter_string(call->filter);
        }
@@ -532,10 +643,10 @@ static void filter_free_subsystem_preds(struct event_subsystem *system,
 
 static int filter_add_pred_fn(struct filter_parse_state *ps,
                              struct ftrace_event_call *call,
+                             struct event_filter *filter,
                              struct filter_pred *pred,
                              filter_pred_fn_t fn)
 {
-       struct event_filter *filter = call->filter;
        int idx, err;
 
        if (filter->n_preds == MAX_FILTER_PRED) {
@@ -550,7 +661,6 @@ static int filter_add_pred_fn(struct filter_parse_state *ps,
                return err;
 
        filter->n_preds++;
-       call->filter_active = 1;
 
        return 0;
 }
@@ -575,7 +685,10 @@ static bool is_string_field(struct ftrace_event_field *field)
 
 static int is_legal_op(struct ftrace_event_field *field, int op)
 {
-       if (is_string_field(field) && (op != OP_EQ && op != OP_NE))
+       if (is_string_field(field) &&
+           (op != OP_EQ && op != OP_NE && op != OP_GLOB))
+               return 0;
+       if (!is_string_field(field) && op == OP_GLOB)
                return 0;
 
        return 1;
@@ -626,6 +739,7 @@ static filter_pred_fn_t select_comparison_fn(int op, int field_size,
 
 static int filter_add_pred(struct filter_parse_state *ps,
                           struct ftrace_event_call *call,
+                          struct event_filter *filter,
                           struct filter_pred *pred,
                           bool dry_run)
 {
@@ -660,21 +774,22 @@ static int filter_add_pred(struct filter_parse_state *ps,
        }
 
        if (is_string_field(field)) {
-               pred->str_len = field->size;
+               filter_build_regex(pred);
 
-               if (field->filter_type == FILTER_STATIC_STRING)
+               if (field->filter_type == FILTER_STATIC_STRING) {
                        fn = filter_pred_string;
-               else if (field->filter_type == FILTER_DYN_STRING)
+                       pred->regex.field_len = field->size;
+               } else if (field->filter_type == FILTER_DYN_STRING)
                        fn = filter_pred_strloc;
                else {
                        fn = filter_pred_pchar;
-                       pred->str_len = strlen(pred->str_val);
+                       pred->regex.field_len = strlen(pred->regex.pattern);
                }
        } else {
                if (field->is_signed)
-                       ret = strict_strtoll(pred->str_val, 0, &val);
+                       ret = strict_strtoll(pred->regex.pattern, 0, &val);
                else
-                       ret = strict_strtoull(pred->str_val, 0, &val);
+                       ret = strict_strtoull(pred->regex.pattern, 0, &val);
                if (ret) {
                        parse_error(ps, FILT_ERR_ILLEGAL_INTVAL, 0);
                        return -EINVAL;
@@ -694,45 +809,7 @@ static int filter_add_pred(struct filter_parse_state *ps,
 
 add_pred_fn:
        if (!dry_run)
-               return filter_add_pred_fn(ps, call, pred, fn);
-       return 0;
-}
-
-static int filter_add_subsystem_pred(struct filter_parse_state *ps,
-                                    struct event_subsystem *system,
-                                    struct filter_pred *pred,
-                                    char *filter_string,
-                                    bool dry_run)
-{
-       struct ftrace_event_call *call;
-       int err = 0;
-       bool fail = true;
-
-       list_for_each_entry(call, &ftrace_events, list) {
-
-               if (!call->define_fields)
-                       continue;
-
-               if (strcmp(call->system, system->name))
-                       continue;
-
-               if (call->filter->no_reset)
-                       continue;
-
-               err = filter_add_pred(ps, call, pred, dry_run);
-               if (err)
-                       call->filter->no_reset = true;
-               else
-                       fail = false;
-
-               if (!dry_run)
-                       replace_filter_string(call->filter, filter_string);
-       }
-
-       if (fail) {
-               parse_error(ps, FILT_ERR_BAD_SUBSYS_FILTER, 0);
-               return err;
-       }
+               return filter_add_pred_fn(ps, call, filter, pred, fn);
        return 0;
 }
 
@@ -1045,8 +1122,8 @@ static struct filter_pred *create_pred(int op, char *operand1, char *operand2)
                return NULL;
        }
 
-       strcpy(pred->str_val, operand2);
-       pred->str_len = strlen(operand2);
+       strcpy(pred->regex.pattern, operand2);
+       pred->regex.len = strlen(pred->regex.pattern);
 
        pred->op = op;
 
@@ -1090,8 +1167,8 @@ static int check_preds(struct filter_parse_state *ps)
        return 0;
 }
 
-static int replace_preds(struct event_subsystem *system,
-                        struct ftrace_event_call *call,
+static int replace_preds(struct ftrace_event_call *call,
+                        struct event_filter *filter,
                         struct filter_parse_state *ps,
                         char *filter_string,
                         bool dry_run)
@@ -1138,11 +1215,7 @@ static int replace_preds(struct event_subsystem *system,
 add_pred:
                if (!pred)
                        return -ENOMEM;
-               if (call)
-                       err = filter_add_pred(ps, call, pred, false);
-               else
-                       err = filter_add_subsystem_pred(ps, system, pred,
-                                               filter_string, dry_run);
+               err = filter_add_pred(ps, call, filter, pred, dry_run);
                filter_free_pred(pred);
                if (err)
                        return err;
@@ -1153,10 +1226,50 @@ add_pred:
        return 0;
 }
 
-int apply_event_filter(struct ftrace_event_call *call, char *filter_string)
+static int replace_system_preds(struct event_subsystem *system,
+                               struct filter_parse_state *ps,
+                               char *filter_string)
 {
+       struct ftrace_event_call *call;
+       bool fail = true;
        int err;
 
+       list_for_each_entry(call, &ftrace_events, list) {
+               struct event_filter *filter = call->filter;
+
+               if (!call->define_fields)
+                       continue;
+
+               if (strcmp(call->system, system->name) != 0)
+                       continue;
+
+               /* try to see if the filter can be applied */
+               err = replace_preds(call, filter, ps, filter_string, true);
+               if (err)
+                       continue;
+
+               /* really apply the filter */
+               filter_disable_preds(call);
+               err = replace_preds(call, filter, ps, filter_string, false);
+               if (err)
+                       filter_disable_preds(call);
+               else {
+                       call->filter_active = 1;
+                       replace_filter_string(filter, filter_string);
+               }
+               fail = false;
+       }
+
+       if (fail) {
+               parse_error(ps, FILT_ERR_BAD_SUBSYS_FILTER, 0);
+               return -EINVAL;
+       }
+       return 0;
+}
+
+int apply_event_filter(struct ftrace_event_call *call, char *filter_string)
+{
+       int err;
        struct filter_parse_state *ps;
 
        mutex_lock(&event_mutex);
@@ -1168,8 +1281,7 @@ int apply_event_filter(struct ftrace_event_call *call, char *filter_string)
        if (!strcmp(strstrip(filter_string), "0")) {
                filter_disable_preds(call);
                remove_filter_string(call->filter);
-               mutex_unlock(&event_mutex);
-               return 0;
+               goto out_unlock;
        }
 
        err = -ENOMEM;
@@ -1187,10 +1299,11 @@ int apply_event_filter(struct ftrace_event_call *call, char *filter_string)
                goto out;
        }
 
-       err = replace_preds(NULL, call, ps, filter_string, false);
+       err = replace_preds(call, call->filter, ps, filter_string, false);
        if (err)
                append_filter_err(ps, call->filter);
-
+       else
+               call->filter_active = 1;
 out:
        filter_opstack_clear(ps);
        postfix_clear(ps);
@@ -1205,7 +1318,6 @@ int apply_subsystem_event_filter(struct event_subsystem *system,
                                 char *filter_string)
 {
        int err;
-
        struct filter_parse_state *ps;
 
        mutex_lock(&event_mutex);
@@ -1215,10 +1327,9 @@ int apply_subsystem_event_filter(struct event_subsystem *system,
                goto out_unlock;
 
        if (!strcmp(strstrip(filter_string), "0")) {
-               filter_free_subsystem_preds(system, FILTER_DISABLE_ALL);
+               filter_free_subsystem_preds(system);
                remove_filter_string(system->filter);
-               mutex_unlock(&event_mutex);
-               return 0;
+               goto out_unlock;
        }
 
        err = -ENOMEM;
@@ -1235,31 +1346,87 @@ int apply_subsystem_event_filter(struct event_subsystem *system,
                goto out;
        }
 
-       filter_free_subsystem_preds(system, FILTER_INIT_NO_RESET);
-
-       /* try to see the filter can be applied to which events */
-       err = replace_preds(system, NULL, ps, filter_string, true);
-       if (err) {
+       err = replace_system_preds(system, ps, filter_string);
+       if (err)
                append_filter_err(ps, system->filter);
-               goto out;
+
+out:
+       filter_opstack_clear(ps);
+       postfix_clear(ps);
+       kfree(ps);
+out_unlock:
+       mutex_unlock(&event_mutex);
+
+       return err;
+}
+
+#ifdef CONFIG_EVENT_PROFILE
+
+void ftrace_profile_free_filter(struct perf_event *event)
+{
+       struct event_filter *filter = event->filter;
+
+       event->filter = NULL;
+       __free_preds(filter);
+}
+
+int ftrace_profile_set_filter(struct perf_event *event, int event_id,
+                             char *filter_str)
+{
+       int err;
+       struct event_filter *filter;
+       struct filter_parse_state *ps;
+       struct ftrace_event_call *call = NULL;
+
+       mutex_lock(&event_mutex);
+
+       list_for_each_entry(call, &ftrace_events, list) {
+               if (call->id == event_id)
+                       break;
        }
 
-       filter_free_subsystem_preds(system, FILTER_SKIP_NO_RESET);
+       err = -EINVAL;
+       if (!call)
+               goto out_unlock;
 
-       /* really apply the filter to the events */
-       err = replace_preds(system, NULL, ps, filter_string, false);
-       if (err) {
-               append_filter_err(ps, system->filter);
-               filter_free_subsystem_preds(system, 2);
+       err = -EEXIST;
+       if (event->filter)
+               goto out_unlock;
+
+       filter = __alloc_preds();
+       if (IS_ERR(filter)) {
+               err = PTR_ERR(filter);
+               goto out_unlock;
        }
 
-out:
+       err = -ENOMEM;
+       ps = kzalloc(sizeof(*ps), GFP_KERNEL);
+       if (!ps)
+               goto free_preds;
+
+       parse_init(ps, filter_ops, filter_str);
+       err = filter_parse(ps);
+       if (err)
+               goto free_ps;
+
+       err = replace_preds(call, filter, ps, filter_str, false);
+       if (!err)
+               event->filter = filter;
+
+free_ps:
        filter_opstack_clear(ps);
        postfix_clear(ps);
        kfree(ps);
+
+free_preds:
+       if (err)
+               __free_preds(filter);
+
 out_unlock:
        mutex_unlock(&event_mutex);
 
        return err;
 }
 
+#endif /* CONFIG_EVENT_PROFILE */
+
index 9753fcc..31da218 100644 (file)
@@ -66,44 +66,47 @@ static void __used ____ftrace_check_##name(void)            \
 #undef __field
 #define __field(type, item)                                            \
        ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t"      \
-                              "offset:%zu;\tsize:%zu;\n",              \
+                              "offset:%zu;\tsize:%zu;\tsigned:%u;\n",  \
                               offsetof(typeof(field), item),           \
-                              sizeof(field.item));                     \
+                              sizeof(field.item), is_signed_type(type)); \
        if (!ret)                                                       \
                return 0;
 
 #undef __field_desc
 #define __field_desc(type, container, item)                            \
        ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t"      \
-                              "offset:%zu;\tsize:%zu;\n",              \
+                              "offset:%zu;\tsize:%zu;\tsigned:%u;\n",  \
                               offsetof(typeof(field), container.item), \
-                              sizeof(field.container.item));           \
+                              sizeof(field.container.item),            \
+                              is_signed_type(type));                   \
        if (!ret)                                                       \
                return 0;
 
 #undef __array
 #define __array(type, item, len)                                       \
        ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t" \
-                              "offset:%zu;\tsize:%zu;\n",              \
-                              offsetof(typeof(field), item),   \
-                              sizeof(field.item));             \
+                              "offset:%zu;\tsize:%zu;\tsigned:%u;\n",  \
+                              offsetof(typeof(field), item),           \
+                              sizeof(field.item), is_signed_type(type)); \
        if (!ret)                                                       \
                return 0;
 
 #undef __array_desc
 #define __array_desc(type, container, item, len)                       \
        ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t" \
-                              "offset:%zu;\tsize:%zu;\n",              \
+                              "offset:%zu;\tsize:%zu;\tsigned:%u;\n",  \
                               offsetof(typeof(field), container.item), \
-                              sizeof(field.container.item));           \
+                              sizeof(field.container.item),            \
+                              is_signed_type(type));                   \
        if (!ret)                                                       \
                return 0;
 
 #undef __dynamic_array
 #define __dynamic_array(type, item)                                    \
        ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t"      \
-                              "offset:%zu;\tsize:0;\n",                \
-                              offsetof(typeof(field), item));          \
+                              "offset:%zu;\tsize:0;\tsigned:%u;\n",    \
+                              offsetof(typeof(field), item),           \
+                              is_signed_type(type));                   \
        if (!ret)                                                       \
                return 0;
 
index 527e17e..d00d1a8 100644 (file)
@@ -14,6 +14,69 @@ static int sys_refcount_exit;
 static DECLARE_BITMAP(enabled_enter_syscalls, NR_syscalls);
 static DECLARE_BITMAP(enabled_exit_syscalls, NR_syscalls);
 
+extern unsigned long __start_syscalls_metadata[];
+extern unsigned long __stop_syscalls_metadata[];
+
+static struct syscall_metadata **syscalls_metadata;
+
+static struct syscall_metadata *find_syscall_meta(unsigned long syscall)
+{
+       struct syscall_metadata *start;
+       struct syscall_metadata *stop;
+       char str[KSYM_SYMBOL_LEN];
+
+
+       start = (struct syscall_metadata *)__start_syscalls_metadata;
+       stop = (struct syscall_metadata *)__stop_syscalls_metadata;
+       kallsyms_lookup(syscall, NULL, NULL, NULL, str);
+
+       for ( ; start < stop; start++) {
+               /*
+                * Only compare after the "sys" prefix. Archs that use
+                * syscall wrappers may have syscalls symbols aliases prefixed
+                * with "SyS" instead of "sys", leading to an unwanted
+                * mismatch.
+                */
+               if (start->name && !strcmp(start->name + 3, str + 3))
+                       return start;
+       }
+       return NULL;
+}
+
+static struct syscall_metadata *syscall_nr_to_meta(int nr)
+{
+       if (!syscalls_metadata || nr >= NR_syscalls || nr < 0)
+               return NULL;
+
+       return syscalls_metadata[nr];
+}
+
+int syscall_name_to_nr(char *name)
+{
+       int i;
+
+       if (!syscalls_metadata)
+               return -1;
+
+       for (i = 0; i < NR_syscalls; i++) {
+               if (syscalls_metadata[i]) {
+                       if (!strcmp(syscalls_metadata[i]->name, name))
+                               return i;
+               }
+       }
+       return -1;
+}
+
+void set_syscall_enter_id(int num, int id)
+{
+       syscalls_metadata[num]->enter_id = id;
+}
+
+void set_syscall_exit_id(int num, int id)
+{
+       syscalls_metadata[num]->exit_id = id;
+}
+
 enum print_line_t
 print_syscall_enter(struct trace_iterator *iter, int flags)
 {
@@ -103,7 +166,8 @@ extern char *__bad_type_size(void);
 #define SYSCALL_FIELD(type, name)                                      \
        sizeof(type) != sizeof(trace.name) ?                            \
                __bad_type_size() :                                     \
-               #type, #name, offsetof(typeof(trace), name), sizeof(trace.name)
+               #type, #name, offsetof(typeof(trace), name),            \
+               sizeof(trace.name), is_signed_type(type)
 
 int syscall_enter_format(struct ftrace_event_call *call, struct trace_seq *s)
 {
@@ -120,7 +184,8 @@ int syscall_enter_format(struct ftrace_event_call *call, struct trace_seq *s)
        if (!entry)
                return 0;
 
-       ret = trace_seq_printf(s, "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n",
+       ret = trace_seq_printf(s, "\tfield:%s %s;\toffset:%zu;\tsize:%zu;"
+                              "\tsigned:%u;\n",
                               SYSCALL_FIELD(int, nr));
        if (!ret)
                return 0;
@@ -130,8 +195,10 @@ int syscall_enter_format(struct ftrace_event_call *call, struct trace_seq *s)
                                        entry->args[i]);
                if (!ret)
                        return 0;
-               ret = trace_seq_printf(s, "\toffset:%d;\tsize:%zu;\n", offset,
-                                      sizeof(unsigned long));
+               ret = trace_seq_printf(s, "\toffset:%d;\tsize:%zu;"
+                                      "\tsigned:%u;\n", offset,
+                                      sizeof(unsigned long),
+                                      is_signed_type(unsigned long));
                if (!ret)
                        return 0;
                offset += sizeof(unsigned long);
@@ -163,8 +230,10 @@ int syscall_exit_format(struct ftrace_event_call *call, struct trace_seq *s)
        struct syscall_trace_exit trace;
 
        ret = trace_seq_printf(s,
-                              "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
-                              "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n",
+                              "\tfield:%s %s;\toffset:%zu;\tsize:%zu;"
+                              "\tsigned:%u;\n"
+                              "\tfield:%s %s;\toffset:%zu;\tsize:%zu;"
+                              "\tsigned:%u;\n",
                               SYSCALL_FIELD(int, nr),
                               SYSCALL_FIELD(long, ret));
        if (!ret)
@@ -212,7 +281,7 @@ int syscall_exit_define_fields(struct ftrace_event_call *call)
        if (ret)
                return ret;
 
-       ret = trace_define_field(call, SYSCALL_FIELD(long, ret), 0,
+       ret = trace_define_field(call, SYSCALL_FIELD(long, ret),
                                 FILTER_OTHER);
 
        return ret;
@@ -375,6 +444,29 @@ struct trace_event event_syscall_exit = {
        .trace                  = print_syscall_exit,
 };
 
+int __init init_ftrace_syscalls(void)
+{
+       struct syscall_metadata *meta;
+       unsigned long addr;
+       int i;
+
+       syscalls_metadata = kzalloc(sizeof(*syscalls_metadata) *
+                                       NR_syscalls, GFP_KERNEL);
+       if (!syscalls_metadata) {
+               WARN_ON(1);
+               return -ENOMEM;
+       }
+
+       for (i = 0; i < NR_syscalls; i++) {
+               addr = arch_syscall_addr(i);
+               meta = find_syscall_meta(addr);
+               syscalls_metadata[i] = meta;
+       }
+
+       return 0;
+}
+core_initcall(init_ftrace_syscalls);
+
 #ifdef CONFIG_EVENT_PROFILE
 
 static DECLARE_BITMAP(enabled_prof_enter_syscalls, NR_syscalls);
index 39f1029..4ebfa5a 100644 (file)
@@ -5,10 +5,13 @@
  * relegated to obsolescence, but used by various less
  * important (or lazy) subsystems.
  */
-#include <linux/smp_lock.h>
 #include <linux/module.h>
 #include <linux/kallsyms.h>
 #include <linux/semaphore.h>
+#include <linux/smp_lock.h>
+
+#define CREATE_TRACE_POINTS
+#include <trace/events/bkl.h>
 
 /*
  * The 'big kernel lock'
@@ -113,21 +116,26 @@ static inline void __unlock_kernel(void)
  * This cannot happen asynchronously, so we only need to
  * worry about other CPU's.
  */
-void __lockfunc lock_kernel(void)
+void __lockfunc _lock_kernel(const char *func, const char *file, int line)
 {
-       int depth = current->lock_depth+1;
+       int depth = current->lock_depth + 1;
+
+       trace_lock_kernel(func, file, line);
+
        if (likely(!depth))
                __lock_kernel();
        current->lock_depth = depth;
 }
 
-void __lockfunc unlock_kernel(void)
+void __lockfunc _unlock_kernel(const char *func, const char *file, int line)
 {
        BUG_ON(current->lock_depth < 0);
        if (likely(--current->lock_depth < 0))
                __unlock_kernel();
+
+       trace_unlock_kernel(func, file, line);
 }
 
-EXPORT_SYMBOL(lock_kernel);
-EXPORT_SYMBOL(unlock_kernel);
+EXPORT_SYMBOL(_lock_kernel);
+EXPORT_SYMBOL(_unlock_kernel);
 
index 090d300..bfb8b2c 100755 (executable)
@@ -119,6 +119,7 @@ my %text_sections = (
      ".sched.text" => 1,
      ".spinlock.text" => 1,
      ".irqentry.text" => 1,
+     ".text.unlikely" => 1,
 );
 
 $objdump = "objdump" if ((length $objdump) == 0);
index a791009..4b17883 100644 (file)
@@ -31,9 +31,12 @@ OPTIONS
 -w::
 --width=::
         Select the width of the SVG file (default: 1000)
--p::
+-P::
 --power-only::
         Only output the CPU power section of the diagram
+-p::
+--process::
+        Select the processes to display, by name or PID
 
 
 SEE ALSO
index 7e190d5..542b29e 100644 (file)
@@ -200,7 +200,14 @@ EXTRA_WARNINGS := $(EXTRA_WARNINGS) -Wold-style-definition
 EXTRA_WARNINGS := $(EXTRA_WARNINGS) -Wstrict-prototypes
 EXTRA_WARNINGS := $(EXTRA_WARNINGS) -Wdeclaration-after-statement
 
-CFLAGS = $(MBITS) -ggdb3 -Wall -Wextra -std=gnu99 -Werror -O6 -fstack-protector-all -D_FORTIFY_SOURCE=2 $(EXTRA_WARNINGS)
+ifeq ("$(origin DEBUG)", "command line")
+  PERF_DEBUG = $(DEBUG)
+endif
+ifndef PERF_DEBUG
+  CFLAGS_OPTIMIZE = -O6
+endif
+
+CFLAGS = $(MBITS) -ggdb3 -Wall -Wextra -std=gnu99 -Werror $(CFLAGS_OPTIMIZE) -fstack-protector-all -D_FORTIFY_SOURCE=2 $(EXTRA_WARNINGS)
 LDFLAGS = -lpthread -lrt -lelf -lm
 ALL_CFLAGS = $(CFLAGS)
 ALL_LDFLAGS = $(LDFLAGS)
@@ -327,8 +334,26 @@ LIB_FILE=libperf.a
 LIB_H += ../../include/linux/perf_event.h
 LIB_H += ../../include/linux/rbtree.h
 LIB_H += ../../include/linux/list.h
+LIB_H += util/include/linux/bitmap.h
+LIB_H += util/include/linux/bitops.h
+LIB_H += util/include/linux/compiler.h
+LIB_H += util/include/linux/ctype.h
+LIB_H += util/include/linux/kernel.h
 LIB_H += util/include/linux/list.h
+LIB_H += util/include/linux/module.h
+LIB_H += util/include/linux/poison.h
+LIB_H += util/include/linux/prefetch.h
+LIB_H += util/include/linux/rbtree.h
+LIB_H += util/include/linux/string.h
+LIB_H += util/include/linux/types.h
+LIB_H += util/include/asm/asm-offsets.h
+LIB_H += util/include/asm/bitops.h
+LIB_H += util/include/asm/byteorder.h
+LIB_H += util/include/asm/swab.h
+LIB_H += util/include/asm/system.h
+LIB_H += util/include/asm/uaccess.h
 LIB_H += perf.h
+LIB_H += util/event.h
 LIB_H += util/types.h
 LIB_H += util/levenshtein.h
 LIB_H += util/parse-options.h
@@ -342,15 +367,19 @@ LIB_H += util/strlist.h
 LIB_H += util/run-command.h
 LIB_H += util/sigchain.h
 LIB_H += util/symbol.h
-LIB_H += util/module.h
 LIB_H += util/color.h
 LIB_H += util/values.h
+LIB_H += util/sort.h
+LIB_H += util/hist.h
+LIB_H += util/thread.h
+LIB_H += util/data_map.h
 
 LIB_OBJS += util/abspath.o
 LIB_OBJS += util/alias.o
 LIB_OBJS += util/config.o
 LIB_OBJS += util/ctype.o
 LIB_OBJS += util/environment.o
+LIB_OBJS += util/event.o
 LIB_OBJS += util/exec_cmd.o
 LIB_OBJS += util/help.o
 LIB_OBJS += util/levenshtein.o
@@ -358,6 +387,9 @@ LIB_OBJS += util/parse-options.o
 LIB_OBJS += util/parse-events.o
 LIB_OBJS += util/path.o
 LIB_OBJS += util/rbtree.o
+LIB_OBJS += util/bitmap.o
+LIB_OBJS += util/hweight.o
+LIB_OBJS += util/find_next_bit.o
 LIB_OBJS += util/run-command.o
 LIB_OBJS += util/quote.o
 LIB_OBJS += util/strbuf.o
@@ -367,7 +399,6 @@ LIB_OBJS += util/usage.o
 LIB_OBJS += util/wrapper.o
 LIB_OBJS += util/sigchain.o
 LIB_OBJS += util/symbol.o
-LIB_OBJS += util/module.o
 LIB_OBJS += util/color.o
 LIB_OBJS += util/pager.o
 LIB_OBJS += util/header.o
@@ -380,6 +411,9 @@ LIB_OBJS += util/trace-event-parse.o
 LIB_OBJS += util/trace-event-read.o
 LIB_OBJS += util/trace-event-info.o
 LIB_OBJS += util/svghelper.o
+LIB_OBJS += util/sort.o
+LIB_OBJS += util/hist.o
+LIB_OBJS += util/data_map.o
 
 BUILTIN_OBJS += builtin-annotate.o
 BUILTIN_OBJS += builtin-help.o
@@ -787,6 +821,19 @@ util/config.o: util/config.c PERF-CFLAGS
 util/rbtree.o: ../../lib/rbtree.c PERF-CFLAGS
        $(QUIET_CC)$(CC) -o util/rbtree.o -c $(ALL_CFLAGS) -DETC_PERFCONFIG='"$(ETC_PERFCONFIG_SQ)"' $<
 
+# some perf warning policies can't fit to lib/bitmap.c, eg: it warns about variable shadowing
+# from <string.h> that comes from kernel headers wrapping.
+KBITMAP_FLAGS=`echo $(ALL_CFLAGS) | sed s/-Wshadow// | sed s/-Wswitch-default// | sed s/-Wextra//`
+
+util/bitmap.o: ../../lib/bitmap.c PERF-CFLAGS
+       $(QUIET_CC)$(CC) -o util/bitmap.o -c $(KBITMAP_FLAGS) -DETC_PERFCONFIG='"$(ETC_PERFCONFIG_SQ)"' $<
+
+util/hweight.o: ../../lib/hweight.c PERF-CFLAGS
+       $(QUIET_CC)$(CC) -o util/hweight.o -c $(ALL_CFLAGS) -DETC_PERFCONFIG='"$(ETC_PERFCONFIG_SQ)"' $<
+
+util/find_next_bit.o: ../../lib/find_next_bit.c PERF-CFLAGS
+       $(QUIET_CC)$(CC) -o util/find_next_bit.o -c $(ALL_CFLAGS) -DETC_PERFCONFIG='"$(ETC_PERFCONFIG_SQ)"' $<
+
 perf-%$X: %.o $(PERFLIBS)
        $(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) $(LIBS)
 
index 1ec7416..77d50a6 100644 (file)
 #include "util/parse-options.h"
 #include "util/parse-events.h"
 #include "util/thread.h"
+#include "util/sort.h"
+#include "util/hist.h"
 
 static char            const *input_name = "perf.data";
 
-static char            default_sort_order[] = "comm,symbol";
-static char            *sort_order = default_sort_order;
-
 static int             force;
 static int             input;
-static int             show_mask = SHOW_KERNEL | SHOW_USER | SHOW_HV;
 
 static int             full_paths;
 
@@ -39,9 +37,10 @@ static int           print_line;
 static unsigned long   page_size;
 static unsigned long   mmap_window = 32;
 
-static struct rb_root  threads;
-static struct thread   *last_match;
-
+struct sym_hist {
+       u64             sum;
+       u64             ip[0];
+};
 
 struct sym_ext {
        struct rb_node  node;
@@ -49,247 +48,33 @@ struct sym_ext {
        char            *path;
 };
 
-/*
- * histogram, sorted on item, collects counts
- */
-
-static struct rb_root hist;
-
-struct hist_entry {
-       struct rb_node   rb_node;
-
-       struct thread    *thread;
-       struct map       *map;
-       struct dso       *dso;
-       struct symbol    *sym;
-       u64      ip;
-       char             level;
-
-       uint32_t         count;
-};
-
-/*
- * configurable sorting bits
- */
-
-struct sort_entry {
-       struct list_head list;
-
-       const char *header;
-
-       int64_t (*cmp)(struct hist_entry *, struct hist_entry *);
-       int64_t (*collapse)(struct hist_entry *, struct hist_entry *);
-       size_t  (*print)(FILE *fp, struct hist_entry *);
-};
-
-/* --sort pid */
-
-static int64_t
-sort__thread_cmp(struct hist_entry *left, struct hist_entry *right)
-{
-       return right->thread->pid - left->thread->pid;
-}
-
-static size_t
-sort__thread_print(FILE *fp, struct hist_entry *self)
-{
-       return fprintf(fp, "%16s:%5d", self->thread->comm ?: "", self->thread->pid);
-}
-
-static struct sort_entry sort_thread = {
-       .header = "         Command:  Pid",
-       .cmp    = sort__thread_cmp,
-       .print  = sort__thread_print,
-};
-
-/* --sort comm */
-
-static int64_t
-sort__comm_cmp(struct hist_entry *left, struct hist_entry *right)
-{
-       return right->thread->pid - left->thread->pid;
-}
-
-static int64_t
-sort__comm_collapse(struct hist_entry *left, struct hist_entry *right)
-{
-       char *comm_l = left->thread->comm;
-       char *comm_r = right->thread->comm;
-
-       if (!comm_l || !comm_r) {
-               if (!comm_l && !comm_r)
-                       return 0;
-               else if (!comm_l)
-                       return -1;
-               else
-                       return 1;
-       }
-
-       return strcmp(comm_l, comm_r);
-}
-
-static size_t
-sort__comm_print(FILE *fp, struct hist_entry *self)
-{
-       return fprintf(fp, "%16s", self->thread->comm);
-}
-
-static struct sort_entry sort_comm = {
-       .header         = "         Command",
-       .cmp            = sort__comm_cmp,
-       .collapse       = sort__comm_collapse,
-       .print          = sort__comm_print,
-};
-
-/* --sort dso */
-
-static int64_t
-sort__dso_cmp(struct hist_entry *left, struct hist_entry *right)
-{
-       struct dso *dso_l = left->dso;
-       struct dso *dso_r = right->dso;
-
-       if (!dso_l || !dso_r) {
-               if (!dso_l && !dso_r)
-                       return 0;
-               else if (!dso_l)
-                       return -1;
-               else
-                       return 1;
-       }
-
-       return strcmp(dso_l->name, dso_r->name);
-}
-
-static size_t
-sort__dso_print(FILE *fp, struct hist_entry *self)
-{
-       if (self->dso)
-               return fprintf(fp, "%-25s", self->dso->name);
-
-       return fprintf(fp, "%016llx         ", (u64)self->ip);
-}
-
-static struct sort_entry sort_dso = {
-       .header = "Shared Object            ",
-       .cmp    = sort__dso_cmp,
-       .print  = sort__dso_print,
-};
-
-/* --sort symbol */
-
-static int64_t
-sort__sym_cmp(struct hist_entry *left, struct hist_entry *right)
-{
-       u64 ip_l, ip_r;
-
-       if (left->sym == right->sym)
-               return 0;
-
-       ip_l = left->sym ? left->sym->start : left->ip;
-       ip_r = right->sym ? right->sym->start : right->ip;
-
-       return (int64_t)(ip_r - ip_l);
-}
-
-static size_t
-sort__sym_print(FILE *fp, struct hist_entry *self)
-{
-       size_t ret = 0;
-
-       if (verbose)
-               ret += fprintf(fp, "%#018llx  ", (u64)self->ip);
-
-       if (self->sym) {
-               ret += fprintf(fp, "[%c] %s",
-                       self->dso == kernel_dso ? 'k' : '.', self->sym->name);
-       } else {
-               ret += fprintf(fp, "%#016llx", (u64)self->ip);
-       }
-
-       return ret;
-}
-
-static struct sort_entry sort_sym = {
-       .header = "Symbol",
-       .cmp    = sort__sym_cmp,
-       .print  = sort__sym_print,
-};
-
-static int sort__need_collapse = 0;
-
-struct sort_dimension {
-       const char              *name;
-       struct sort_entry       *entry;
-       int                     taken;
-};
-
-static struct sort_dimension sort_dimensions[] = {
-       { .name = "pid",        .entry = &sort_thread,  },
-       { .name = "comm",       .entry = &sort_comm,    },
-       { .name = "dso",        .entry = &sort_dso,     },
-       { .name = "symbol",     .entry = &sort_sym,     },
+struct sym_priv {
+       struct sym_hist *hist;
+       struct sym_ext  *ext;
 };
 
-static LIST_HEAD(hist_entry__sort_list);
+static const char *sym_hist_filter;
 
-static int sort_dimension__add(char *tok)
+static int symbol_filter(struct map *map __used, struct symbol *sym)
 {
-       unsigned int i;
-
-       for (i = 0; i < ARRAY_SIZE(sort_dimensions); i++) {
-               struct sort_dimension *sd = &sort_dimensions[i];
-
-               if (sd->taken)
-                       continue;
-
-               if (strncasecmp(tok, sd->name, strlen(tok)))
-                       continue;
-
-               if (sd->entry->collapse)
-                       sort__need_collapse = 1;
-
-               list_add_tail(&sd->entry->list, &hist_entry__sort_list);
-               sd->taken = 1;
+       if (sym_hist_filter == NULL ||
+           strcmp(sym->name, sym_hist_filter) == 0) {
+               struct sym_priv *priv = symbol__priv(sym);
+               const int size = (sizeof(*priv->hist) +
+                                 (sym->end - sym->start) * sizeof(u64));
 
+               priv->hist = malloc(size);
+               if (priv->hist)
+                       memset(priv->hist, 0, size);
                return 0;
        }
-
-       return -ESRCH;
-}
-
-static int64_t
-hist_entry__cmp(struct hist_entry *left, struct hist_entry *right)
-{
-       struct sort_entry *se;
-       int64_t cmp = 0;
-
-       list_for_each_entry(se, &hist_entry__sort_list, list) {
-               cmp = se->cmp(left, right);
-               if (cmp)
-                       break;
-       }
-
-       return cmp;
-}
-
-static int64_t
-hist_entry__collapse(struct hist_entry *left, struct hist_entry *right)
-{
-       struct sort_entry *se;
-       int64_t cmp = 0;
-
-       list_for_each_entry(se, &hist_entry__sort_list, list) {
-               int64_t (*f)(struct hist_entry *, struct hist_entry *);
-
-               f = se->collapse ?: se->cmp;
-
-               cmp = f(left, right);
-               if (cmp)
-                       break;
-       }
-
-       return cmp;
+       /*
+        * FIXME: We should really filter it out, as we don't want to go thru symbols
+        * we're not interested, and if a DSO ends up with no symbols, delete it too,
+        * but right now the kernel loading routines in symbol.c bail out if no symbols
+        * are found, fix it later.
+        */
+       return 0;
 }
 
 /*
@@ -299,196 +84,60 @@ static void hist_hit(struct hist_entry *he, u64 ip)
 {
        unsigned int sym_size, offset;
        struct symbol *sym = he->sym;
+       struct sym_priv *priv;
+       struct sym_hist *h;
 
        he->count++;
 
-       if (!sym || !sym->hist)
+       if (!sym || !he->map)
+               return;
+
+       priv = symbol__priv(sym);
+       if (!priv->hist)
                return;
 
        sym_size = sym->end - sym->start;
        offset = ip - sym->start;
 
+       if (verbose)
+               fprintf(stderr, "%s: ip=%Lx\n", __func__,
+                       he->map->unmap_ip(he->map, ip));
+
        if (offset >= sym_size)
                return;
 
-       sym->hist_sum++;
-       sym->hist[offset]++;
+       h = priv->hist;
+       h->sum++;
+       h->ip[offset]++;
 
        if (verbose >= 3)
                printf("%p %s: count++ [ip: %p, %08Lx] => %Ld\n",
                        (void *)(unsigned long)he->sym->start,
                        he->sym->name,
                        (void *)(unsigned long)ip, ip - he->sym->start,
-                       sym->hist[offset]);
+                       h->ip[offset]);
 }
 
-static int
-hist_entry__add(struct thread *thread, struct map *map, struct dso *dso,
-               struct symbol *sym, u64 ip, char level)
+static int hist_entry__add(struct thread *thread, struct map *map,
+                          struct symbol *sym, u64 ip, u64 count, char level)
 {
-       struct rb_node **p = &hist.rb_node;
-       struct rb_node *parent = NULL;
-       struct hist_entry *he;
-       struct hist_entry entry = {
-               .thread = thread,
-               .map    = map,
-               .dso    = dso,
-               .sym    = sym,
-               .ip     = ip,
-               .level  = level,
-               .count  = 1,
-       };
-       int cmp;
-
-       while (*p != NULL) {
-               parent = *p;
-               he = rb_entry(parent, struct hist_entry, rb_node);
-
-               cmp = hist_entry__cmp(&entry, he);
-
-               if (!cmp) {
-                       hist_hit(he, ip);
-
-                       return 0;
-               }
-
-               if (cmp < 0)
-                       p = &(*p)->rb_left;
-               else
-                       p = &(*p)->rb_right;
-       }
-
-       he = malloc(sizeof(*he));
-       if (!he)
+       bool hit;
+       struct hist_entry *he = __hist_entry__add(thread, map, sym, NULL, ip,
+                                                 count, level, &hit);
+       if (he == NULL)
                return -ENOMEM;
-       *he = entry;
-       rb_link_node(&he->rb_node, parent, p);
-       rb_insert_color(&he->rb_node, &hist);
-
+       hist_hit(he, ip);
        return 0;
 }
 
-static void hist_entry__free(struct hist_entry *he)
-{
-       free(he);
-}
-
-/*
- * collapse the histogram
- */
-
-static struct rb_root collapse_hists;
-
-static void collapse__insert_entry(struct hist_entry *he)
-{
-       struct rb_node **p = &collapse_hists.rb_node;
-       struct rb_node *parent = NULL;
-       struct hist_entry *iter;
-       int64_t cmp;
-
-       while (*p != NULL) {
-               parent = *p;
-               iter = rb_entry(parent, struct hist_entry, rb_node);
-
-               cmp = hist_entry__collapse(iter, he);
-
-               if (!cmp) {
-                       iter->count += he->count;
-                       hist_entry__free(he);
-                       return;
-               }
-
-               if (cmp < 0)
-                       p = &(*p)->rb_left;
-               else
-                       p = &(*p)->rb_right;
-       }
-
-       rb_link_node(&he->rb_node, parent, p);
-       rb_insert_color(&he->rb_node, &collapse_hists);
-}
-
-static void collapse__resort(void)
-{
-       struct rb_node *next;
-       struct hist_entry *n;
-
-       if (!sort__need_collapse)
-               return;
-
-       next = rb_first(&hist);
-       while (next) {
-               n = rb_entry(next, struct hist_entry, rb_node);
-               next = rb_next(&n->rb_node);
-
-               rb_erase(&n->rb_node, &hist);
-               collapse__insert_entry(n);
-       }
-}
-
-/*
- * reverse the map, sort on count.
- */
-
-static struct rb_root output_hists;
-
-static void output__insert_entry(struct hist_entry *he)
-{
-       struct rb_node **p = &output_hists.rb_node;
-       struct rb_node *parent = NULL;
-       struct hist_entry *iter;
-
-       while (*p != NULL) {
-               parent = *p;
-               iter = rb_entry(parent, struct hist_entry, rb_node);
-
-               if (he->count > iter->count)
-                       p = &(*p)->rb_left;
-               else
-                       p = &(*p)->rb_right;
-       }
-
-       rb_link_node(&he->rb_node, parent, p);
-       rb_insert_color(&he->rb_node, &output_hists);
-}
-
-static void output__resort(void)
-{
-       struct rb_node *next;
-       struct hist_entry *n;
-       struct rb_root *tree = &hist;
-
-       if (sort__need_collapse)
-               tree = &collapse_hists;
-
-       next = rb_first(tree);
-
-       while (next) {
-               n = rb_entry(next, struct hist_entry, rb_node);
-               next = rb_next(&n->rb_node);
-
-               rb_erase(&n->rb_node, tree);
-               output__insert_entry(n);
-       }
-}
-
-static unsigned long total = 0,
-                    total_mmap = 0,
-                    total_comm = 0,
-                    total_fork = 0,
-                    total_unknown = 0;
-
 static int
 process_sample_event(event_t *event, unsigned long offset, unsigned long head)
 {
        char level;
-       int show = 0;
-       struct dso *dso = NULL;
-       struct thread *thread;
        u64 ip = event->ip.ip;
        struct map *map = NULL;
-
-       thread = threads__findnew(event->ip.pid, &threads, &last_match);
+       struct symbol *sym = NULL;
+       struct thread *thread = threads__findnew(event->ip.pid);
 
        dump_printf("%p [%p]: PERF_EVENT (IP, %d): %d: %p\n",
                (void *)(offset + head),
@@ -497,60 +146,53 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head)
                event->ip.pid,
                (void *)(long)ip);
 
-       dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid);
-
        if (thread == NULL) {
                fprintf(stderr, "problem processing %d event, skipping it.\n",
                        event->header.type);
                return -1;
        }
 
+       dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid);
+
        if (event->header.misc & PERF_RECORD_MISC_KERNEL) {
-               show = SHOW_KERNEL;
                level = 'k';
-
-               dso = kernel_dso;
-
-               dump_printf(" ...... dso: %s\n", dso->name);
-
+               sym = kernel_maps__find_symbol(ip, &map);
+               dump_printf(" ...... dso: %s\n",
+                           map ? map->dso->long_name : "<not found>");
        } else if (event->header.misc & PERF_RECORD_MISC_USER) {
-
-               show = SHOW_USER;
                level = '.';
-
                map = thread__find_map(thread, ip);
                if (map != NULL) {
+got_map:
                        ip = map->map_ip(map, ip);
-                       dso = map->dso;
+                       sym = map__find_symbol(map, ip, symbol_filter);
                } else {
                        /*
                         * If this is outside of all known maps,
                         * and is a negative address, try to look it
                         * up in the kernel dso, as it might be a
-                        * vsyscall (which executes in user-mode):
+                        * vsyscall or vdso (which executes in user-mode).
+                        *
+                        * XXX This is nasty, we should have a symbol list in
+                        * the "[vdso]" dso, but for now lets use the old
+                        * trick of looking in the whole kernel symbol list.
                         */
-                       if ((long long)ip < 0)
-                               dso = kernel_dso;
+                       if ((long long)ip < 0) {
+                               map = kernel_map;
+                               goto got_map;
+                       }
                }
-               dump_printf(" ...... dso: %s\n", dso ? dso->name : "<not found>");
-
+               dump_printf(" ...... dso: %s\n",
+                           map ? map->dso->long_name : "<not found>");
        } else {
-               show = SHOW_HV;
                level = 'H';
                dump_printf(" ...... dso: [hypervisor]\n");
        }
 
-       if (show & show_mask) {
-               struct symbol *sym = NULL;
-
-               if (dso)
-                       sym = dso->find_symbol(dso, ip);
-
-               if (hist_entry__add(thread, map, dso, sym, ip, level)) {
-                       fprintf(stderr,
-               "problem incrementing symbol count, skipping event\n");
-                       return -1;
-               }
+       if (hist_entry__add(thread, map, sym, ip, 1, level)) {
+               fprintf(stderr, "problem incrementing symbol count, "
+                               "skipping event\n");
+               return -1;
        }
        total++;
 
@@ -560,10 +202,8 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head)
 static int
 process_mmap_event(event_t *event, unsigned long offset, unsigned long head)
 {
-       struct thread *thread;
        struct map *map = map__new(&event->mmap, NULL, 0);
-
-       thread = threads__findnew(event->mmap.pid, &threads, &last_match);
+       struct thread *thread = threads__findnew(event->mmap.pid);
 
        dump_printf("%p [%p]: PERF_RECORD_MMAP %d: [%p(%p) @ %p]: %s\n",
                (void *)(offset + head),
@@ -588,9 +228,8 @@ process_mmap_event(event_t *event, unsigned long offset, unsigned long head)
 static int
 process_comm_event(event_t *event, unsigned long offset, unsigned long head)
 {
-       struct thread *thread;
+       struct thread *thread = threads__findnew(event->comm.pid);
 
-       thread = threads__findnew(event->comm.pid, &threads, &last_match);
        dump_printf("%p [%p]: PERF_RECORD_COMM: %s:%d\n",
                (void *)(offset + head),
                (void *)(long)(event->header.size),
@@ -609,11 +248,9 @@ process_comm_event(event_t *event, unsigned long offset, unsigned long head)
 static int
 process_fork_event(event_t *event, unsigned long offset, unsigned long head)
 {
-       struct thread *thread;
-       struct thread *parent;
+       struct thread *thread = threads__findnew(event->fork.pid);
+       struct thread *parent = threads__findnew(event->fork.ppid);
 
-       thread = threads__findnew(event->fork.pid, &threads, &last_match);
-       parent = threads__findnew(event->fork.ppid, &threads, &last_match);
        dump_printf("%p [%p]: PERF_RECORD_FORK: %d:%d\n",
                (void *)(offset + head),
                (void *)(long)(event->header.size),
@@ -665,14 +302,15 @@ process_event(event_t *event, unsigned long offset, unsigned long head)
        return 0;
 }
 
-static int
-parse_line(FILE *file, struct symbol *sym, u64 start, u64 len)
+static int parse_line(FILE *file, struct hist_entry *he, u64 len)
 {
+       struct symbol *sym = he->sym;
        char *line = NULL, *tmp, *tmp2;
        static const char *prev_line;
        static const char *prev_color;
        unsigned int offset;
        size_t line_len;
+       u64 start;
        s64 line_ip;
        int ret;
        char *c;
@@ -709,22 +347,26 @@ parse_line(FILE *file, struct symbol *sym, u64 start, u64 len)
                        line_ip = -1;
        }
 
+       start = he->map->unmap_ip(he->map, sym->start);
+
        if (line_ip != -1) {
                const char *path = NULL;
                unsigned int hits = 0;
                double percent = 0.0;
                const char *color;
-               struct sym_ext *sym_ext = sym->priv;
+               struct sym_priv *priv = symbol__priv(sym);
+               struct sym_ext *sym_ext = priv->ext;
+               struct sym_hist *h = priv->hist;
 
                offset = line_ip - start;
                if (offset < len)
-                       hits = sym->hist[offset];
+                       hits = h->ip[offset];
 
                if (offset < len && sym_ext) {
                        path = sym_ext[offset].path;
                        percent = sym_ext[offset].percent;
-               } else if (sym->hist_sum)
-                       percent = 100.0 * hits / sym->hist_sum;
+               } else if (h->sum)
+                       percent = 100.0 * hits / h->sum;
 
                color = get_percent_color(percent);
 
@@ -777,9 +419,10 @@ static void insert_source_line(struct sym_ext *sym_ext)
        rb_insert_color(&sym_ext->node, &root_sym_ext);
 }
 
-static void free_source_line(struct symbol *sym, int len)
+static void free_source_line(struct hist_entry *he, int len)
 {
-       struct sym_ext *sym_ext = sym->priv;
+       struct sym_priv *priv = symbol__priv(he->sym);
+       struct sym_ext *sym_ext = priv->ext;
        int i;
 
        if (!sym_ext)
@@ -789,26 +432,30 @@ static void free_source_line(struct symbol *sym, int len)
                free(sym_ext[i].path);
        free(sym_ext);
 
-       sym->priv = NULL;
+       priv->ext = NULL;
        root_sym_ext = RB_ROOT;
 }
 
 /* Get the filename:line for the colored entries */
 static void
-get_source_line(struct symbol *sym, u64 start, int len, const char *filename)
+get_source_line(struct hist_entry *he, int len, const char *filename)
 {
+       struct symbol *sym = he->sym;
+       u64 start;
        int i;
        char cmd[PATH_MAX * 2];
        struct sym_ext *sym_ext;
+       struct sym_priv *priv = symbol__priv(sym);
+       struct sym_hist *h = priv->hist;
 
-       if (!sym->hist_sum)
+       if (!h->sum)
                return;
 
-       sym->priv = calloc(len, sizeof(struct sym_ext));
-       if (!sym->priv)
+       sym_ext = priv->ext = calloc(len, sizeof(struct sym_ext));
+       if (!priv->ext)
                return;
 
-       sym_ext = sym->priv;
+       start = he->map->unmap_ip(he->map, sym->start);
 
        for (i = 0; i < len; i++) {
                char *path = NULL;
@@ -816,7 +463,7 @@ get_source_line(struct symbol *sym, u64 start, int len, const char *filename)
                u64 offset;
                FILE *fp;
 
-               sym_ext[i].percent = 100.0 * sym->hist[i] / sym->hist_sum;
+               sym_ext[i].percent = 100.0 * h->ip[i] / h->sum;
                if (sym_ext[i].percent <= 0.5)
                        continue;
 
@@ -870,33 +517,34 @@ static void print_summary(const char *filename)
        }
 }
 
-static void annotate_sym(struct dso *dso, struct symbol *sym)
+static void annotate_sym(struct hist_entry *he)
 {
-       const char *filename = dso->name, *d_filename;
-       u64 start, end, len;
+       struct map *map = he->map;
+       struct dso *dso = map->dso;
+       struct symbol *sym = he->sym;
+       const char *filename = dso->long_name, *d_filename;
+       u64 len;
        char command[PATH_MAX*2];
        FILE *file;
 
        if (!filename)
                return;
-       if (sym->module)
-               filename = sym->module->path;
-       else if (dso == kernel_dso)
-               filename = vmlinux_name;
-
-       start = sym->obj_start;
-       if (!start)
-               start = sym->start;
+
+       if (verbose)
+               fprintf(stderr, "%s: filename=%s, sym=%s, start=%Lx, end=%Lx\n",
+                       __func__, filename, sym->name,
+                       map->unmap_ip(map, sym->start),
+                       map->unmap_ip(map, sym->end));
+
        if (full_paths)
                d_filename = filename;
        else
                d_filename = basename(filename);
 
-       end = start + sym->end - sym->start + 1;
        len = sym->end - sym->start;
 
        if (print_line) {
-               get_source_line(sym, start, len, filename);
+               get_source_line(he, len, filename);
                print_summary(filename);
        }
 
@@ -905,10 +553,12 @@ static void annotate_sym(struct dso *dso, struct symbol *sym)
        printf("------------------------------------------------\n");
 
        if (verbose >= 2)
-               printf("annotating [%p] %30s : [%p] %30s\n", dso, dso->name, sym, sym->name);
+               printf("annotating [%p] %30s : [%p] %30s\n",
+                      dso, dso->long_name, sym, sym->name);
 
        sprintf(command, "objdump --start-address=0x%016Lx --stop-address=0x%016Lx -dS %s|grep -v %s",
-                       (u64)start, (u64)end, filename, filename);
+               map->unmap_ip(map, sym->start), map->unmap_ip(map, sym->end),
+               filename, filename);
 
        if (verbose >= 3)
                printf("doing: %s\n", command);
@@ -918,35 +568,38 @@ static void annotate_sym(struct dso *dso, struct symbol *sym)
                return;
 
        while (!feof(file)) {
-               if (parse_line(file, sym, start, len) < 0)
+               if (parse_line(file, he, len) < 0)
                        break;
        }
 
        pclose(file);
        if (print_line)
-               free_source_line(sym, len);
+               free_source_line(he, len);
 }
 
 static void find_annotations(void)
 {
        struct rb_node *nd;
-       struct dso *dso;
-       int count = 0;
 
-       list_for_each_entry(dso, &dsos, node) {
+       for (nd = rb_first(&output_hists); nd; nd = rb_next(nd)) {
+               struct hist_entry *he = rb_entry(nd, struct hist_entry, rb_node);
+               struct sym_priv *priv;
 
-               for (nd = rb_first(&dso->syms); nd; nd = rb_next(nd)) {
-                       struct symbol *sym = rb_entry(nd, struct symbol, rb_node);
+               if (he->sym == NULL)
+                       continue;
 
-                       if (sym->hist) {
-                               annotate_sym(dso, sym);
-                               count++;
-                       }
-               }
-       }
+               priv = symbol__priv(he->sym);
+               if (priv->hist == NULL)
+                       continue;
 
-       if (!count)
-               printf(" Error: symbol '%s' not present amongst the samples.\n", sym_hist_filter);
+               annotate_sym(he);
+               /*
+                * Since we have a hist_entry per IP for the same symbol, free
+                * he->sym->hist to signal we already processed this symbol.
+                */
+               free(priv->hist);
+               priv->hist = NULL;
+       }
 }
 
 static int __cmd_annotate(void)
@@ -959,7 +612,7 @@ static int __cmd_annotate(void)
        uint32_t size;
        char *buf;
 
-       register_idle_thread(&threads, &last_match);
+       register_idle_thread();
 
        input = open(input_name, O_RDONLY);
        if (input < 0) {
@@ -983,7 +636,7 @@ static int __cmd_annotate(void)
                exit(0);
        }
 
-       if (load_kernel() < 0) {
+       if (load_kernel(symbol_filter) < 0) {
                perror("failed to load kernel symbols");
                return EXIT_FAILURE;
        }
@@ -1059,14 +712,14 @@ more:
        if (dump_trace)
                return 0;
 
-       if (verbose >= 3)
-               threads__fprintf(stdout, &threads);
+       if (verbose > 3)
+               threads__fprintf(stdout);
 
-       if (verbose >= 2)
+       if (verbose > 2)
                dsos__fprintf(stdout);
 
        collapse__resort();
-       output__resort();
+       output__resort(total);
 
        find_annotations();
 
@@ -1115,7 +768,7 @@ static void setup_sorting(void)
 
 int cmd_annotate(int argc, const char **argv, const char *prefix __used)
 {
-       symbol__init();
+       symbol__init(sizeof(struct sym_priv));
 
        page_size = getpagesize();
 
@@ -1134,10 +787,13 @@ int cmd_annotate(int argc, const char **argv, const char *prefix __used)
                sym_hist_filter = argv[0];
        }
 
-       if (!sym_hist_filter)
-               usage_with_options(annotate_usage, options);
-
        setup_pager();
 
+       if (field_sep && *field_sep == '.') {
+               fputs("'.' is the only non valid --field-separator argument\n",
+                               stderr);
+               exit(129);
+       }
+
        return __cmd_annotate();
 }
index 4fb8734..768f9c8 100644 (file)
@@ -61,8 +61,7 @@ static const char *get_man_viewer_info(const char *name)
 {
        struct man_viewer_info_list *viewer;
 
-       for (viewer = man_viewer_info_list; viewer; viewer = viewer->next)
-       {
+       for (viewer = man_viewer_info_list; viewer; viewer = viewer->next) {
                if (!strcasecmp(name, viewer->name))
                        return viewer->info;
        }
@@ -115,7 +114,7 @@ static int check_emacsclient_version(void)
        return 0;
 }
 
-static void exec_woman_emacs(const char* path, const char *page)
+static void exec_woman_emacs(const char *path, const char *page)
 {
        if (!check_emacsclient_version()) {
                /* This works only with emacsclient version >= 22. */
@@ -129,7 +128,7 @@ static void exec_woman_emacs(const char* path, const char *page)
        }
 }
 
-static void exec_man_konqueror(const char* path, const char *page)
+static void exec_man_konqueror(const char *path, const char *page)
 {
        const char *display = getenv("DISPLAY");
        if (display && *display) {
@@ -157,7 +156,7 @@ static void exec_man_konqueror(const char* path, const char *page)
        }
 }
 
-static void exec_man_man(const char* path, const char *page)
+static void exec_man_man(const char *path, const char *page)
 {
        if (!path)
                path = "man";
@@ -364,9 +363,8 @@ static void show_man_page(const char *perf_cmd)
 
        setup_man_path();
        for (viewer = man_viewer_list; viewer; viewer = viewer->next)
-       {
                exec_viewer(viewer->name, page); /* will return when unable */
-       }
+
        if (fallback)
                exec_viewer(fallback, page);
        exec_viewer("man", page);
index 3eeef33..4a73d89 100644 (file)
 #include "util/header.h"
 #include "util/event.h"
 #include "util/debug.h"
-#include "util/trace-event.h"
 
 #include <unistd.h>
 #include <sched.h>
 
-#define ALIGN(x, a)            __ALIGN_MASK(x, (typeof(x))(a)-1)
-#define __ALIGN_MASK(x, mask)  (((x)+(mask))&~(mask))
-
 static int                     fd[MAX_NR_CPUS][MAX_COUNTERS];
 
-static long                    default_interval                = 100000;
+static long                    default_interval                =      0;
 
-static int                     nr_cpus                         = 0;
+static int                     nr_cpus                         =      0;
 static unsigned int            page_size;
-static unsigned int            mmap_pages                      = 128;
-static int                     freq                            = 0;
+static unsigned int            mmap_pages                      =    128;
+static int                     freq                            =   1000;
 static int                     output;
 static const char              *output_name                    = "perf.data";
-static int                     group                           = 0;
-static unsigned int            realtime_prio                   = 0;
-static int                     raw_samples                     = 0;
-static int                     system_wide                     = 0;
-static int                     profile_cpu                     = -1;
-static pid_t                   target_pid                      = -1;
-static pid_t                   child_pid                       = -1;
-static int                     inherit                         = 1;
-static int                     force                           = 0;
-static int                     append_file                     = 0;
-static int                     call_graph                      = 0;
-static int                     inherit_stat                    = 0;
-static int                     no_samples                      = 0;
-static int                     sample_address                  = 0;
-static int                     multiplex                       = 0;
-static int                     multiplex_fd                    = -1;
-
-static long                    samples;
+static int                     group                           =      0;
+static unsigned int            realtime_prio                   =      0;
+static int                     raw_samples                     =      0;
+static int                     system_wide                     =      0;
+static int                     profile_cpu                     =     -1;
+static pid_t                   target_pid                      =     -1;
+static pid_t                   child_pid                       =     -1;
+static int                     inherit                         =      1;
+static int                     force                           =      0;
+static int                     append_file                     =      0;
+static int                     call_graph                      =      0;
+static int                     inherit_stat                    =      0;
+static int                     no_samples                      =      0;
+static int                     sample_address                  =      0;
+static int                     multiplex                       =      0;
+static int                     multiplex_fd                    =     -1;
+
+static long                    samples                         =      0;
 static struct timeval          last_read;
 static struct timeval          this_read;
 
-static u64                     bytes_written;
+static u64                     bytes_written                   =      0;
 
 static struct pollfd           event_array[MAX_NR_CPUS * MAX_COUNTERS];
 
-static int                     nr_poll;
-static int                     nr_cpu;
+static int                     nr_poll                         =      0;
+static int                     nr_cpu                          =      0;
 
-static int                     file_new = 1;
+static int                     file_new                        =      1;
 
-struct perf_header             *header;
+struct perf_header             *header                         =   NULL;
 
 struct mmap_data {
        int                     counter;
@@ -113,6 +109,12 @@ static void write_output(void *buf, size_t size)
        }
 }
 
+static int process_synthesized_event(event_t *event)
+{
+       write_output(event, event->header.size);
+       return 0;
+}
+
 static void mmap_read(struct mmap_data *md)
 {
        unsigned int head = mmap_read_head(md);
@@ -195,168 +197,6 @@ static void sig_atexit(void)
        kill(getpid(), signr);
 }
 
-static pid_t pid_synthesize_comm_event(pid_t pid, int full)
-{
-       struct comm_event comm_ev;
-       char filename[PATH_MAX];
-       char bf[BUFSIZ];
-       FILE *fp;
-       size_t size = 0;
-       DIR *tasks;
-       struct dirent dirent, *next;
-       pid_t tgid = 0;
-
-       snprintf(filename, sizeof(filename), "/proc/%d/status", pid);
-
-       fp = fopen(filename, "r");
-       if (fp == NULL) {
-               /*
-                * We raced with a task exiting - just return:
-                */
-               if (verbose)
-                       fprintf(stderr, "couldn't open %s\n", filename);
-               return 0;
-       }
-
-       memset(&comm_ev, 0, sizeof(comm_ev));
-       while (!comm_ev.comm[0] || !comm_ev.pid) {
-               if (fgets(bf, sizeof(bf), fp) == NULL)
-                       goto out_failure;
-
-               if (memcmp(bf, "Name:", 5) == 0) {
-                       char *name = bf + 5;
-                       while (*name && isspace(*name))
-                               ++name;
-                       size = strlen(name) - 1;
-                       memcpy(comm_ev.comm, name, size++);
-               } else if (memcmp(bf, "Tgid:", 5) == 0) {
-                       char *tgids = bf + 5;
-                       while (*tgids && isspace(*tgids))
-                               ++tgids;
-                       tgid = comm_ev.pid = atoi(tgids);
-               }
-       }
-
-       comm_ev.header.type = PERF_RECORD_COMM;
-       size = ALIGN(size, sizeof(u64));
-       comm_ev.header.size = sizeof(comm_ev) - (sizeof(comm_ev.comm) - size);
-
-       if (!full) {
-               comm_ev.tid = pid;
-
-               write_output(&comm_ev, comm_ev.header.size);
-               goto out_fclose;
-       }
-
-       snprintf(filename, sizeof(filename), "/proc/%d/task", pid);
-
-       tasks = opendir(filename);
-       while (!readdir_r(tasks, &dirent, &next) && next) {
-               char *end;
-               pid = strtol(dirent.d_name, &end, 10);
-               if (*end)
-                       continue;
-
-               comm_ev.tid = pid;
-
-               write_output(&comm_ev, comm_ev.header.size);
-       }
-       closedir(tasks);
-
-out_fclose:
-       fclose(fp);
-       return tgid;
-
-out_failure:
-       fprintf(stderr, "couldn't get COMM and pgid, malformed %s\n",
-               filename);
-       exit(EXIT_FAILURE);
-}
-
-static void pid_synthesize_mmap_samples(pid_t pid, pid_t tgid)
-{
-       char filename[PATH_MAX];
-       FILE *fp;
-
-       snprintf(filename, sizeof(filename), "/proc/%d/maps", pid);
-
-       fp = fopen(filename, "r");
-       if (fp == NULL) {
-               /*
-                * We raced with a task exiting - just return:
-                */
-               if (verbose)
-                       fprintf(stderr, "couldn't open %s\n", filename);
-               return;
-       }
-       while (1) {
-               char bf[BUFSIZ], *pbf = bf;
-               struct mmap_event mmap_ev = {
-                       .header = { .type = PERF_RECORD_MMAP },
-               };
-               int n;
-               size_t size;
-               if (fgets(bf, sizeof(bf), fp) == NULL)
-                       break;
-
-               /* 00400000-0040c000 r-xp 00000000 fd:01 41038  /bin/cat */
-               n = hex2u64(pbf, &mmap_ev.start);
-               if (n < 0)
-                       continue;
-               pbf += n + 1;
-               n = hex2u64(pbf, &mmap_ev.len);
-               if (n < 0)
-                       continue;
-               pbf += n + 3;
-               if (*pbf == 'x') { /* vm_exec */
-                       char *execname = strchr(bf, '/');
-
-                       /* Catch VDSO */
-                       if (execname == NULL)
-                               execname = strstr(bf, "[vdso]");
-
-                       if (execname == NULL)
-                               continue;
-
-                       size = strlen(execname);
-                       execname[size - 1] = '\0'; /* Remove \n */
-                       memcpy(mmap_ev.filename, execname, size);
-                       size = ALIGN(size, sizeof(u64));
-                       mmap_ev.len -= mmap_ev.start;
-                       mmap_ev.header.size = (sizeof(mmap_ev) -
-                                              (sizeof(mmap_ev.filename) - size));
-                       mmap_ev.pid = tgid;
-                       mmap_ev.tid = pid;
-
-                       write_output(&mmap_ev, mmap_ev.header.size);
-               }
-       }
-
-       fclose(fp);
-}
-
-static void synthesize_all(void)
-{
-       DIR *proc;
-       struct dirent dirent, *next;
-
-       proc = opendir("/proc");
-
-       while (!readdir_r(proc, &dirent, &next) && next) {
-               char *end;
-               pid_t pid, tgid;
-
-               pid = strtol(dirent.d_name, &end, 10);
-               if (*end) /* only interested in proper numerical dirents */
-                       continue;
-
-               tgid = pid_synthesize_comm_event(pid, 1);
-               pid_synthesize_mmap_samples(pid, tgid);
-       }
-
-       closedir(proc);
-}
-
 static int group_fd;
 
 static struct perf_header_attr *get_header_attr(struct perf_event_attr *a, int nr)
@@ -375,9 +215,11 @@ static struct perf_header_attr *get_header_attr(struct perf_event_attr *a, int n
 
 static void create_counter(int counter, int cpu, pid_t pid)
 {
+       char *filter = filters[counter];
        struct perf_event_attr *attr = attrs + counter;
        struct perf_header_attr *h_attr;
        int track = !counter; /* only the first counter needs these */
+       int ret;
        struct {
                u64 count;
                u64 time_enabled;
@@ -480,7 +322,6 @@ try_again:
                multiplex_fd = fd[nr_cpu][counter];
 
        if (multiplex && fd[nr_cpu][counter] != multiplex_fd) {
-               int ret;
 
                ret = ioctl(fd[nr_cpu][counter], PERF_EVENT_IOC_SET_OUTPUT, multiplex_fd);
                assert(ret != -1);
@@ -500,6 +341,16 @@ try_again:
                }
        }
 
+       if (filter != NULL) {
+               ret = ioctl(fd[nr_cpu][counter],
+                           PERF_EVENT_IOC_SET_FILTER, filter);
+               if (ret) {
+                       error("failed to set filter with %d (%s)\n", errno,
+                             strerror(errno));
+                       exit(-1);
+               }
+       }
+
        ioctl(fd[nr_cpu][counter], PERF_EVENT_IOC_ENABLE);
 }
 
@@ -566,17 +417,17 @@ static int __cmd_record(int argc, const char **argv)
        else
                header = perf_header__new();
 
-
        if (raw_samples) {
-               read_tracing_data(attrs, nr_counters);
+               perf_header__feat_trace_info(header);
        } else {
                for (i = 0; i < nr_counters; i++) {
                        if (attrs[i].sample_type & PERF_SAMPLE_RAW) {
-                               read_tracing_data(attrs, nr_counters);
+                               perf_header__feat_trace_info(header);
                                break;
                        }
                }
        }
+
        atexit(atexit_header);
 
        if (!system_wide) {
@@ -597,11 +448,10 @@ static int __cmd_record(int argc, const char **argv)
        if (file_new)
                perf_header__write(header, output);
 
-       if (!system_wide) {
-               pid_t tgid = pid_synthesize_comm_event(pid, 0);
-               pid_synthesize_mmap_samples(pid, tgid);
-       } else
-               synthesize_all();
+       if (!system_wide)
+               event__synthesize_thread(pid, process_synthesized_event);
+       else
+               event__synthesize_threads(process_synthesized_event);
 
        if (target_pid == -1 && argc) {
                pid = fork();
@@ -623,7 +473,7 @@ static int __cmd_record(int argc, const char **argv)
 
                param.sched_priority = realtime_prio;
                if (sched_setscheduler(0, SCHED_FIFO, &param)) {
-                       printf("Could not set realtime priority.\n");
+                       pr_err("Could not set realtime priority.\n");
                        exit(-1);
                }
        }
@@ -677,6 +527,8 @@ static const struct option options[] = {
        OPT_CALLBACK('e', "event", NULL, "event",
                     "event selector. use 'perf list' to list available events",
                     parse_events),
+       OPT_CALLBACK(0, "filter", NULL, "filter",
+                    "event filter", parse_filter),
        OPT_INTEGER('p', "pid", &target_pid,
                    "record events on existing pid"),
        OPT_INTEGER('r', "realtime", &realtime_prio,
@@ -731,6 +583,18 @@ int cmd_record(int argc, const char **argv, const char *prefix __used)
                attrs[0].config = PERF_COUNT_HW_CPU_CYCLES;
        }
 
+       /*
+        * User specified count overrides default frequency.
+        */
+       if (default_interval)
+               freq = 0;
+       else if (freq) {
+               default_interval = freq;
+       } else {
+               fprintf(stderr, "frequency and count are zero, aborting\n");
+               exit(EXIT_FAILURE);
+       }
+
        for (counter = 0; counter < nr_counters; counter++) {
                if (attrs[counter].sample_period)
                        continue;
index 19669c2..1a806d5 100644 (file)
 #include "util/parse-options.h"
 #include "util/parse-events.h"
 
+#include "util/data_map.h"
 #include "util/thread.h"
+#include "util/sort.h"
+#include "util/hist.h"
 
 static char            const *input_name = "perf.data";
 
-static char            default_sort_order[] = "comm,dso,symbol";
-static char            *sort_order = default_sort_order;
 static char            *dso_list_str, *comm_list_str, *sym_list_str,
                        *col_width_list_str;
 static struct strlist  *dso_list, *comm_list, *sym_list;
-static char            *field_sep;
 
 static int             force;
-static int             input;
-static int             show_mask = SHOW_KERNEL | SHOW_USER | SHOW_HV;
 
 static int             full_paths;
 static int             show_nr_samples;
@@ -50,374 +48,39 @@ static struct perf_read_values     show_threads_values;
 static char            default_pretty_printing_style[] = "normal";
 static char            *pretty_printing_style = default_pretty_printing_style;
 
-static unsigned long   page_size;
-static unsigned long   mmap_window = 32;
-
-static char            default_parent_pattern[] = "^sys_|^do_page_fault";
-static char            *parent_pattern = default_parent_pattern;
-static regex_t         parent_regex;
-
 static int             exclude_other = 1;
 
 static char            callchain_default_opt[] = "fractal,0.5";
 
-static int             callchain;
-
-static char            __cwd[PATH_MAX];
-static char            *cwd = __cwd;
+static char            *cwd;
 static int             cwdlen;
 
-static struct rb_root  threads;
-static struct thread   *last_match;
-
 static struct perf_header *header;
 
-static
-struct callchain_param callchain_param = {
-       .mode   = CHAIN_GRAPH_REL,
-       .min_percent = 0.5
-};
-
 static u64             sample_type;
 
-static int repsep_fprintf(FILE *fp, const char *fmt, ...)
-{
-       int n;
-       va_list ap;
-
-       va_start(ap, fmt);
-       if (!field_sep)
-               n = vfprintf(fp, fmt, ap);
-       else {
-               char *bf = NULL;
-               n = vasprintf(&bf, fmt, ap);
-               if (n > 0) {
-                       char *sep = bf;
-
-                       while (1) {
-                               sep = strchr(sep, *field_sep);
-                               if (sep == NULL)
-                                       break;
-                               *sep = '.';
-                       }
-               }
-               fputs(bf, fp);
-               free(bf);
-       }
-       va_end(ap);
-       return n;
-}
-
-static unsigned int dsos__col_width,
-                   comms__col_width,
-                   threads__col_width;
-
-/*
- * histogram, sorted on item, collects counts
- */
-
-static struct rb_root hist;
-
-struct hist_entry {
-       struct rb_node          rb_node;
-
-       struct thread           *thread;
-       struct map              *map;
-       struct dso              *dso;
-       struct symbol           *sym;
-       struct symbol           *parent;
-       u64                     ip;
-       char                    level;
-       struct callchain_node   callchain;
-       struct rb_root          sorted_chain;
-
-       u64                     count;
-};
-
-/*
- * configurable sorting bits
- */
-
-struct sort_entry {
-       struct list_head list;
-
-       const char *header;
-
-       int64_t (*cmp)(struct hist_entry *, struct hist_entry *);
-       int64_t (*collapse)(struct hist_entry *, struct hist_entry *);
-       size_t  (*print)(FILE *fp, struct hist_entry *, unsigned int width);
-       unsigned int *width;
-       bool    elide;
-};
-
-static int64_t cmp_null(void *l, void *r)
-{
-       if (!l && !r)
-               return 0;
-       else if (!l)
-               return -1;
-       else
-               return 1;
-}
-
-/* --sort pid */
-
-static int64_t
-sort__thread_cmp(struct hist_entry *left, struct hist_entry *right)
-{
-       return right->thread->pid - left->thread->pid;
-}
-
-static size_t
-sort__thread_print(FILE *fp, struct hist_entry *self, unsigned int width)
-{
-       return repsep_fprintf(fp, "%*s:%5d", width - 6,
-                             self->thread->comm ?: "", self->thread->pid);
-}
-
-static struct sort_entry sort_thread = {
-       .header = "Command:  Pid",
-       .cmp    = sort__thread_cmp,
-       .print  = sort__thread_print,
-       .width  = &threads__col_width,
-};
-
-/* --sort comm */
-
-static int64_t
-sort__comm_cmp(struct hist_entry *left, struct hist_entry *right)
-{
-       return right->thread->pid - left->thread->pid;
-}
-
-static int64_t
-sort__comm_collapse(struct hist_entry *left, struct hist_entry *right)
-{
-       char *comm_l = left->thread->comm;
-       char *comm_r = right->thread->comm;
-
-       if (!comm_l || !comm_r)
-               return cmp_null(comm_l, comm_r);
-
-       return strcmp(comm_l, comm_r);
-}
-
-static size_t
-sort__comm_print(FILE *fp, struct hist_entry *self, unsigned int width)
-{
-       return repsep_fprintf(fp, "%*s", width, self->thread->comm);
-}
-
-static struct sort_entry sort_comm = {
-       .header         = "Command",
-       .cmp            = sort__comm_cmp,
-       .collapse       = sort__comm_collapse,
-       .print          = sort__comm_print,
-       .width          = &comms__col_width,
-};
-
-/* --sort dso */
-
-static int64_t
-sort__dso_cmp(struct hist_entry *left, struct hist_entry *right)
-{
-       struct dso *dso_l = left->dso;
-       struct dso *dso_r = right->dso;
-
-       if (!dso_l || !dso_r)
-               return cmp_null(dso_l, dso_r);
-
-       return strcmp(dso_l->name, dso_r->name);
-}
-
-static size_t
-sort__dso_print(FILE *fp, struct hist_entry *self, unsigned int width)
-{
-       if (self->dso)
-               return repsep_fprintf(fp, "%-*s", width, self->dso->name);
-
-       return repsep_fprintf(fp, "%*llx", width, (u64)self->ip);
-}
-
-static struct sort_entry sort_dso = {
-       .header = "Shared Object",
-       .cmp    = sort__dso_cmp,
-       .print  = sort__dso_print,
-       .width  = &dsos__col_width,
-};
-
-/* --sort symbol */
-
-static int64_t
-sort__sym_cmp(struct hist_entry *left, struct hist_entry *right)
-{
-       u64 ip_l, ip_r;
-
-       if (left->sym == right->sym)
-               return 0;
-
-       ip_l = left->sym ? left->sym->start : left->ip;
-       ip_r = right->sym ? right->sym->start : right->ip;
-
-       return (int64_t)(ip_r - ip_l);
-}
 
 static size_t
-sort__sym_print(FILE *fp, struct hist_entry *self, unsigned int width __used)
+callchain__fprintf_left_margin(FILE *fp, int left_margin)
 {
-       size_t ret = 0;
+       int i;
+       int ret;
 
-       if (verbose)
-               ret += repsep_fprintf(fp, "%#018llx %c ", (u64)self->ip,
-                                     dso__symtab_origin(self->dso));
+       ret = fprintf(fp, "            ");
 
-       ret += repsep_fprintf(fp, "[%c] ", self->level);
-       if (self->sym) {
-               ret += repsep_fprintf(fp, "%s", self->sym->name);
-
-               if (self->sym->module)
-                       ret += repsep_fprintf(fp, "\t[%s]",
-                                            self->sym->module->name);
-       } else {
-               ret += repsep_fprintf(fp, "%#016llx", (u64)self->ip);
-       }
+       for (i = 0; i < left_margin; i++)
+               ret += fprintf(fp, " ");
 
        return ret;
 }
 
-static struct sort_entry sort_sym = {
-       .header = "Symbol",
-       .cmp    = sort__sym_cmp,
-       .print  = sort__sym_print,
-};
-
-/* --sort parent */
-
-static int64_t
-sort__parent_cmp(struct hist_entry *left, struct hist_entry *right)
-{
-       struct symbol *sym_l = left->parent;
-       struct symbol *sym_r = right->parent;
-
-       if (!sym_l || !sym_r)
-               return cmp_null(sym_l, sym_r);
-
-       return strcmp(sym_l->name, sym_r->name);
-}
-
-static size_t
-sort__parent_print(FILE *fp, struct hist_entry *self, unsigned int width)
-{
-       return repsep_fprintf(fp, "%-*s", width,
-                             self->parent ? self->parent->name : "[other]");
-}
-
-static unsigned int parent_symbol__col_width;
-
-static struct sort_entry sort_parent = {
-       .header = "Parent symbol",
-       .cmp    = sort__parent_cmp,
-       .print  = sort__parent_print,
-       .width  = &parent_symbol__col_width,
-};
-
-static int sort__need_collapse = 0;
-static int sort__has_parent = 0;
-
-struct sort_dimension {
-       const char              *name;
-       struct sort_entry       *entry;
-       int                     taken;
-};
-
-static struct sort_dimension sort_dimensions[] = {
-       { .name = "pid",        .entry = &sort_thread,  },
-       { .name = "comm",       .entry = &sort_comm,    },
-       { .name = "dso",        .entry = &sort_dso,     },
-       { .name = "symbol",     .entry = &sort_sym,     },
-       { .name = "parent",     .entry = &sort_parent,  },
-};
-
-static LIST_HEAD(hist_entry__sort_list);
-
-static int sort_dimension__add(const char *tok)
-{
-       unsigned int i;
-
-       for (i = 0; i < ARRAY_SIZE(sort_dimensions); i++) {
-               struct sort_dimension *sd = &sort_dimensions[i];
-
-               if (sd->taken)
-                       continue;
-
-               if (strncasecmp(tok, sd->name, strlen(tok)))
-                       continue;
-
-               if (sd->entry->collapse)
-                       sort__need_collapse = 1;
-
-               if (sd->entry == &sort_parent) {
-                       int ret = regcomp(&parent_regex, parent_pattern, REG_EXTENDED);
-                       if (ret) {
-                               char err[BUFSIZ];
-
-                               regerror(ret, &parent_regex, err, sizeof(err));
-                               fprintf(stderr, "Invalid regex: %s\n%s",
-                                       parent_pattern, err);
-                               exit(-1);
-                       }
-                       sort__has_parent = 1;
-               }
-
-               list_add_tail(&sd->entry->list, &hist_entry__sort_list);
-               sd->taken = 1;
-
-               return 0;
-       }
-
-       return -ESRCH;
-}
-
-static int64_t
-hist_entry__cmp(struct hist_entry *left, struct hist_entry *right)
-{
-       struct sort_entry *se;
-       int64_t cmp = 0;
-
-       list_for_each_entry(se, &hist_entry__sort_list, list) {
-               cmp = se->cmp(left, right);
-               if (cmp)
-                       break;
-       }
-
-       return cmp;
-}
-
-static int64_t
-hist_entry__collapse(struct hist_entry *left, struct hist_entry *right)
-{
-       struct sort_entry *se;
-       int64_t cmp = 0;
-
-       list_for_each_entry(se, &hist_entry__sort_list, list) {
-               int64_t (*f)(struct hist_entry *, struct hist_entry *);
-
-               f = se->collapse ?: se->cmp;
-
-               cmp = f(left, right);
-               if (cmp)
-                       break;
-       }
-
-       return cmp;
-}
-
-static size_t ipchain__fprintf_graph_line(FILE *fp, int depth, int depth_mask)
+static size_t ipchain__fprintf_graph_line(FILE *fp, int depth, int depth_mask,
+                                         int left_margin)
 {
        int i;
        size_t ret = 0;
 
-       ret += fprintf(fp, "%s", "                ");
+       ret += callchain__fprintf_left_margin(fp, left_margin);
 
        for (i = 0; i < depth; i++)
                if (depth_mask & (1 << i))
@@ -432,12 +95,12 @@ static size_t ipchain__fprintf_graph_line(FILE *fp, int depth, int depth_mask)
 static size_t
 ipchain__fprintf_graph(FILE *fp, struct callchain_list *chain, int depth,
                       int depth_mask, int count, u64 total_samples,
-                      int hits)
+                      int hits, int left_margin)
 {
        int i;
        size_t ret = 0;
 
-       ret += fprintf(fp, "%s", "                ");
+       ret += callchain__fprintf_left_margin(fp, left_margin);
        for (i = 0; i < depth; i++) {
                if (depth_mask & (1 << i))
                        ret += fprintf(fp, "|");
@@ -475,8 +138,9 @@ static void init_rem_hits(void)
 }
 
 static size_t
-callchain__fprintf_graph(FILE *fp, struct callchain_node *self,
-                       u64 total_samples, int depth, int depth_mask)
+__callchain__fprintf_graph(FILE *fp, struct callchain_node *self,
+                          u64 total_samples, int depth, int depth_mask,
+                          int left_margin)
 {
        struct rb_node *node, *next;
        struct callchain_node *child;
@@ -517,7 +181,8 @@ callchain__fprintf_graph(FILE *fp, struct callchain_node *self,
                 * But we keep the older depth mask for the line seperator
                 * to keep the level link until we reach the last child
                 */
-               ret += ipchain__fprintf_graph_line(fp, depth, depth_mask);
+               ret += ipchain__fprintf_graph_line(fp, depth, depth_mask,
+                                                  left_margin);
                i = 0;
                list_for_each_entry(chain, &child->val, list) {
                        if (chain->ip >= PERF_CONTEXT_MAX)
@@ -525,11 +190,13 @@ callchain__fprintf_graph(FILE *fp, struct callchain_node *self,
                        ret += ipchain__fprintf_graph(fp, chain, depth,
                                                      new_depth_mask, i++,
                                                      new_total,
-                                                     cumul);
+                                                     cumul,
+                                                     left_margin);
                }
-               ret += callchain__fprintf_graph(fp, child, new_total,
-                                               depth + 1,
-                                               new_depth_mask | (1 << depth));
+               ret += __callchain__fprintf_graph(fp, child, new_total,
+                                                 depth + 1,
+                                                 new_depth_mask | (1 << depth),
+                                                 left_margin);
                node = next;
        }
 
@@ -543,12 +210,51 @@ callchain__fprintf_graph(FILE *fp, struct callchain_node *self,
 
                ret += ipchain__fprintf_graph(fp, &rem_hits, depth,
                                              new_depth_mask, 0, new_total,
-                                             remaining);
+                                             remaining, left_margin);
        }
 
        return ret;
 }
 
+
+static size_t
+callchain__fprintf_graph(FILE *fp, struct callchain_node *self,
+                        u64 total_samples, int left_margin)
+{
+       struct callchain_list *chain;
+       bool printed = false;
+       int i = 0;
+       int ret = 0;
+
+       list_for_each_entry(chain, &self->val, list) {
+               if (chain->ip >= PERF_CONTEXT_MAX)
+                       continue;
+
+               if (!i++ && sort__first_dimension == SORT_SYM)
+                       continue;
+
+               if (!printed) {
+                       ret += callchain__fprintf_left_margin(fp, left_margin);
+                       ret += fprintf(fp, "|\n");
+                       ret += callchain__fprintf_left_margin(fp, left_margin);
+                       ret += fprintf(fp, "---");
+
+                       left_margin += 3;
+                       printed = true;
+               } else
+                       ret += callchain__fprintf_left_margin(fp, left_margin);
+
+               if (chain->sym)
+                       ret += fprintf(fp, " %s\n", chain->sym->name);
+               else
+                       ret += fprintf(fp, " %p\n", (void *)(long)chain->ip);
+       }
+
+       ret += __callchain__fprintf_graph(fp, self, total_samples, 1, 1, left_margin);
+
+       return ret;
+}
+
 static size_t
 callchain__fprintf_flat(FILE *fp, struct callchain_node *self,
                        u64 total_samples)
@@ -577,7 +283,7 @@ callchain__fprintf_flat(FILE *fp, struct callchain_node *self,
 
 static size_t
 hist_entry_callchain__fprintf(FILE *fp, struct hist_entry *self,
-                             u64 total_samples)
+                             u64 total_samples, int left_margin)
 {
        struct rb_node *rb_node;
        struct callchain_node *chain;
@@ -597,8 +303,8 @@ hist_entry_callchain__fprintf(FILE *fp, struct hist_entry *self,
                        break;
                case CHAIN_GRAPH_ABS: /* Falldown */
                case CHAIN_GRAPH_REL:
-                       ret += callchain__fprintf_graph(fp, chain,
-                                                       total_samples, 1, 1);
+                       ret += callchain__fprintf_graph(fp, chain, total_samples,
+                                                       left_margin);
                case CHAIN_NONE:
                default:
                        break;
@@ -610,7 +316,6 @@ hist_entry_callchain__fprintf(FILE *fp, struct hist_entry *self,
        return ret;
 }
 
-
 static size_t
 hist_entry__fprintf(FILE *fp, struct hist_entry *self, u64 total_samples)
 {
@@ -644,8 +349,19 @@ hist_entry__fprintf(FILE *fp, struct hist_entry *self, u64 total_samples)
 
        ret += fprintf(fp, "\n");
 
-       if (callchain)
-               hist_entry_callchain__fprintf(fp, self, total_samples);
+       if (callchain) {
+               int left_margin = 0;
+
+               if (sort__first_dimension == SORT_COMM) {
+                       se = list_first_entry(&hist_entry__sort_list, typeof(*se),
+                                               list);
+                       left_margin = se->width ? *se->width : 0;
+                       left_margin -= thread__comm_len(self->thread);
+               }
+
+               hist_entry_callchain__fprintf(fp, self, total_samples,
+                                             left_margin);
+       }
 
        return ret;
 }
@@ -695,22 +411,17 @@ static int thread__set_comm_adjust(struct thread *self, const char *comm)
 
 
 static struct symbol *
-resolve_symbol(struct thread *thread, struct map **mapp,
-              struct dso **dsop, u64 *ipp)
+resolve_symbol(struct thread *thread, struct map **mapp, u64 *ipp)
 {
-       struct dso *dso = dsop ? *dsop : NULL;
        struct map *map = mapp ? *mapp : NULL;
        u64 ip = *ipp;
 
-       if (!thread)
-               return NULL;
-
-       if (dso)
-               goto got_dso;
-
        if (map)
                goto got_map;
 
+       if (!thread)
+               return NULL;
+
        map = thread__find_map(thread, ip);
        if (map != NULL) {
                /*
@@ -725,29 +436,26 @@ resolve_symbol(struct thread *thread, struct map **mapp,
                        *mapp = map;
 got_map:
                ip = map->map_ip(map, ip);
-
-               dso = map->dso;
        } else {
                /*
                 * If this is outside of all known maps,
                 * and is a negative address, try to look it
                 * up in the kernel dso, as it might be a
-                * vsyscall (which executes in user-mode):
+                * vsyscall or vdso (which executes in user-mode).
+                *
+                * XXX This is nasty, we should have a symbol list in
+                * the "[vdso]" dso, but for now lets use the old
+                * trick of looking in the whole kernel symbol list.
                 */
                if ((long long)ip < 0)
-               dso = kernel_dso;
+                       return kernel_maps__find_symbol(ip, mapp);
        }
-       dump_printf(" ...... dso: %s\n", dso ? dso->name : "<not found>");
+       dump_printf(" ...... dso: %s\n",
+                   map ? map->dso->long_name : "<not found>");
        dump_printf(" ...... map: %Lx -> %Lx\n", *ipp, ip);
        *ipp  = ip;
 
-       if (dsop)
-               *dsop = dso;
-
-       if (!dso)
-               return NULL;
-got_dso:
-       return dso->find_symbol(dso, ip);
+       return map ? map__find_symbol(map, ip, NULL) : NULL;
 }
 
 static int call__match(struct symbol *sym)
@@ -758,9 +466,9 @@ static int call__match(struct symbol *sym)
        return 0;
 }
 
-static struct symbol **
-resolve_callchain(struct thread *thread, struct map *map __used,
-                   struct ip_callchain *chain, struct hist_entry *entry)
+static struct symbol **resolve_callchain(struct thread *thread, struct map *map,
+                                        struct ip_callchain *chain,
+                                        struct symbol **parent)
 {
        u64 context = PERF_CONTEXT_MAX;
        struct symbol **syms = NULL;
@@ -776,8 +484,7 @@ resolve_callchain(struct thread *thread, struct map *map __used,
 
        for (i = 0; i < chain->nr; i++) {
                u64 ip = chain->ips[i];
-               struct dso *dso = NULL;
-               struct symbol *sym;
+               struct symbol *sym = NULL;
 
                if (ip >= PERF_CONTEXT_MAX) {
                        context = ip;
@@ -786,21 +493,18 @@ resolve_callchain(struct thread *thread, struct map *map __used,
 
                switch (context) {
                case PERF_CONTEXT_HV:
-                       dso = hypervisor_dso;
                        break;
                case PERF_CONTEXT_KERNEL:
-                       dso = kernel_dso;
+                       sym = kernel_maps__find_symbol(ip, &map);
                        break;
                default:
+                       sym = resolve_symbol(thread, &map, &ip);
                        break;
                }
 
-               sym = resolve_symbol(thread, NULL, &dso, &ip);
-
                if (sym) {
-                       if (sort__has_parent && call__match(sym) &&
-                           !entry->parent)
-                               entry->parent = sym;
+                       if (sort__has_parent && !*parent && call__match(sym))
+                               *parent = sym;
                        if (!callchain)
                                break;
                        syms[i] = sym;
@@ -815,177 +519,35 @@ resolve_callchain(struct thread *thread, struct map *map __used,
  */
 
 static int
-hist_entry__add(struct thread *thread, struct map *map, struct dso *dso,
+hist_entry__add(struct thread *thread, struct map *map,
                struct symbol *sym, u64 ip, struct ip_callchain *chain,
                char level, u64 count)
 {
-       struct rb_node **p = &hist.rb_node;
-       struct rb_node *parent = NULL;
+       struct symbol **syms = NULL, *parent = NULL;
+       bool hit;
        struct hist_entry *he;
-       struct symbol **syms = NULL;
-       struct hist_entry entry = {
-               .thread = thread,
-               .map    = map,
-               .dso    = dso,
-               .sym    = sym,
-               .ip     = ip,
-               .level  = level,
-               .count  = count,
-               .parent = NULL,
-               .sorted_chain = RB_ROOT
-       };
-       int cmp;
 
        if ((sort__has_parent || callchain) && chain)
-               syms = resolve_callchain(thread, map, chain, &entry);
-
-       while (*p != NULL) {
-               parent = *p;
-               he = rb_entry(parent, struct hist_entry, rb_node);
-
-               cmp = hist_entry__cmp(&entry, he);
+               syms = resolve_callchain(thread, map, chain, &parent);
 
-               if (!cmp) {
-                       he->count += count;
-                       if (callchain) {
-                               append_chain(&he->callchain, chain, syms);
-                               free(syms);
-                       }
-                       return 0;
-               }
+       he = __hist_entry__add(thread, map, sym, parent,
+                              ip, count, level, &hit);
+       if (he == NULL)
+               return -ENOMEM;
 
-               if (cmp < 0)
-                       p = &(*p)->rb_left;
-               else
-                       p = &(*p)->rb_right;
-       }
+       if (hit)
+               he->count += count;
 
-       he = malloc(sizeof(*he));
-       if (!he)
-               return -ENOMEM;
-       *he = entry;
        if (callchain) {
-               callchain_init(&he->callchain);
+               if (!hit)
+                       callchain_init(&he->callchain);
                append_chain(&he->callchain, chain, syms);
                free(syms);
        }
-       rb_link_node(&he->rb_node, parent, p);
-       rb_insert_color(&he->rb_node, &hist);
 
        return 0;
 }
 
-static void hist_entry__free(struct hist_entry *he)
-{
-       free(he);
-}
-
-/*
- * collapse the histogram
- */
-
-static struct rb_root collapse_hists;
-
-static void collapse__insert_entry(struct hist_entry *he)
-{
-       struct rb_node **p = &collapse_hists.rb_node;
-       struct rb_node *parent = NULL;
-       struct hist_entry *iter;
-       int64_t cmp;
-
-       while (*p != NULL) {
-               parent = *p;
-               iter = rb_entry(parent, struct hist_entry, rb_node);
-
-               cmp = hist_entry__collapse(iter, he);
-
-               if (!cmp) {
-                       iter->count += he->count;
-                       hist_entry__free(he);
-                       return;
-               }
-
-               if (cmp < 0)
-                       p = &(*p)->rb_left;
-               else
-                       p = &(*p)->rb_right;
-       }
-
-       rb_link_node(&he->rb_node, parent, p);
-       rb_insert_color(&he->rb_node, &collapse_hists);
-}
-
-static void collapse__resort(void)
-{
-       struct rb_node *next;
-       struct hist_entry *n;
-
-       if (!sort__need_collapse)
-               return;
-
-       next = rb_first(&hist);
-       while (next) {
-               n = rb_entry(next, struct hist_entry, rb_node);
-               next = rb_next(&n->rb_node);
-
-               rb_erase(&n->rb_node, &hist);
-               collapse__insert_entry(n);
-       }
-}
-
-/*
- * reverse the map, sort on count.
- */
-
-static struct rb_root output_hists;
-
-static void output__insert_entry(struct hist_entry *he, u64 min_callchain_hits)
-{
-       struct rb_node **p = &output_hists.rb_node;
-       struct rb_node *parent = NULL;
-       struct hist_entry *iter;
-
-       if (callchain)
-               callchain_param.sort(&he->sorted_chain, &he->callchain,
-                                     min_callchain_hits, &callchain_param);
-
-       while (*p != NULL) {
-               parent = *p;
-               iter = rb_entry(parent, struct hist_entry, rb_node);
-
-               if (he->count > iter->count)
-                       p = &(*p)->rb_left;
-               else
-                       p = &(*p)->rb_right;
-       }
-
-       rb_link_node(&he->rb_node, parent, p);
-       rb_insert_color(&he->rb_node, &output_hists);
-}
-
-static void output__resort(u64 total_samples)
-{
-       struct rb_node *next;
-       struct hist_entry *n;
-       struct rb_root *tree = &hist;
-       u64 min_callchain_hits;
-
-       min_callchain_hits = total_samples * (callchain_param.min_percent / 100);
-
-       if (sort__need_collapse)
-               tree = &collapse_hists;
-
-       next = rb_first(tree);
-
-       while (next) {
-               n = rb_entry(next, struct hist_entry, rb_node);
-               next = rb_next(&n->rb_node);
-
-               rb_erase(&n->rb_node, tree);
-               output__insert_entry(n, min_callchain_hits);
-       }
-}
-
 static size_t output__fprintf(FILE *fp, u64 total_samples)
 {
        struct hist_entry *pos;
@@ -1080,13 +642,6 @@ print_entries:
        return ret;
 }
 
-static unsigned long total = 0,
-                    total_mmap = 0,
-                    total_comm = 0,
-                    total_fork = 0,
-                    total_unknown = 0,
-                    total_lost = 0;
-
 static int validate_chain(struct ip_callchain *chain, event_t *event)
 {
        unsigned int chain_size;
@@ -1104,17 +659,14 @@ static int
 process_sample_event(event_t *event, unsigned long offset, unsigned long head)
 {
        char level;
-       int show = 0;
-       struct dso *dso = NULL;
-       struct thread *thread;
+       struct symbol *sym = NULL;
        u64 ip = event->ip.ip;
        u64 period = 1;
        struct map *map = NULL;
        void *more_data = event->ip.__more_data;
        struct ip_callchain *chain = NULL;
        int cpumode;
-
-       thread = threads__findnew(event->ip.pid, &threads, &last_match);
+       struct thread *thread = threads__findnew(event->ip.pid);
 
        if (sample_type & PERF_SAMPLE_PERIOD) {
                period = *(u64 *)more_data;
@@ -1137,7 +689,8 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head)
                dump_printf("... chain: nr:%Lu\n", chain->nr);
 
                if (validate_chain(chain, event) < 0) {
-                       eprintf("call-chain problem with event, skipping it.\n");
+                       pr_debug("call-chain problem with event, "
+                                "skipping it.\n");
                        return 0;
                }
 
@@ -1147,56 +700,49 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head)
                }
        }
 
-       dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid);
-
        if (thread == NULL) {
-               eprintf("problem processing %d event, skipping it.\n",
+               pr_debug("problem processing %d event, skipping it.\n",
                        event->header.type);
                return -1;
        }
 
+       dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid);
+
        if (comm_list && !strlist__has_entry(comm_list, thread->comm))
                return 0;
 
        cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
 
        if (cpumode == PERF_RECORD_MISC_KERNEL) {
-               show = SHOW_KERNEL;
                level = 'k';
-
-               dso = kernel_dso;
-
-               dump_printf(" ...... dso: %s\n", dso->name);
-
+               sym = kernel_maps__find_symbol(ip, &map);
+               dump_printf(" ...... dso: %s\n",
+                           map ? map->dso->long_name : "<not found>");
        } else if (cpumode == PERF_RECORD_MISC_USER) {
-
-               show = SHOW_USER;
                level = '.';
+               sym = resolve_symbol(thread, &map, &ip);
 
        } else {
-               show = SHOW_HV;
                level = 'H';
-
-               dso = hypervisor_dso;
-
                dump_printf(" ...... dso: [hypervisor]\n");
        }
 
-       if (show & show_mask) {
-               struct symbol *sym = resolve_symbol(thread, &map, &dso, &ip);
-
-               if (dso_list && (!dso || !dso->name ||
-                                !strlist__has_entry(dso_list, dso->name)))
-                       return 0;
+       if (dso_list &&
+           (!map || !map->dso ||
+            !(strlist__has_entry(dso_list, map->dso->short_name) ||
+              (map->dso->short_name != map->dso->long_name &&
+               strlist__has_entry(dso_list, map->dso->long_name)))))
+               return 0;
 
-               if (sym_list && (!sym || !strlist__has_entry(sym_list, sym->name)))
-                       return 0;
+       if (sym_list && sym && !strlist__has_entry(sym_list, sym->name))
+               return 0;
 
-               if (hist_entry__add(thread, map, dso, sym, ip, chain, level, period)) {
-                       eprintf("problem incrementing symbol count, skipping event\n");
-                       return -1;
-               }
+       if (hist_entry__add(thread, map, sym, ip,
+                           chain, level, period)) {
+               pr_debug("problem incrementing symbol count, skipping event\n");
+               return -1;
        }
+
        total += period;
 
        return 0;
@@ -1205,10 +751,8 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head)
 static int
 process_mmap_event(event_t *event, unsigned long offset, unsigned long head)
 {
-       struct thread *thread;
        struct map *map = map__new(&event->mmap, cwd, cwdlen);
-
-       thread = threads__findnew(event->mmap.pid, &threads, &last_match);
+       struct thread *thread = threads__findnew(event->mmap.pid);
 
        dump_printf("%p [%p]: PERF_RECORD_MMAP %d/%d: [%p(%p) @ %p]: %s\n",
                (void *)(offset + head),
@@ -1234,9 +778,7 @@ process_mmap_event(event_t *event, unsigned long offset, unsigned long head)
 static int
 process_comm_event(event_t *event, unsigned long offset, unsigned long head)
 {
-       struct thread *thread;
-
-       thread = threads__findnew(event->comm.pid, &threads, &last_match);
+       struct thread *thread = threads__findnew(event->comm.pid);
 
        dump_printf("%p [%p]: PERF_RECORD_COMM: %s:%d\n",
                (void *)(offset + head),
@@ -1256,11 +798,8 @@ process_comm_event(event_t *event, unsigned long offset, unsigned long head)
 static int
 process_task_event(event_t *event, unsigned long offset, unsigned long head)
 {
-       struct thread *thread;
-       struct thread *parent;
-
-       thread = threads__findnew(event->fork.pid, &threads, &last_match);
-       parent = threads__findnew(event->fork.ppid, &threads, &last_match);
+       struct thread *thread = threads__findnew(event->fork.pid);
+       struct thread *parent = threads__findnew(event->fork.ppid);
 
        dump_printf("%p [%p]: PERF_RECORD_%s: (%d:%d):(%d:%d)\n",
                (void *)(offset + head),
@@ -1331,216 +870,79 @@ process_read_event(event_t *event, unsigned long offset, unsigned long head)
        return 0;
 }
 
-static int
-process_event(event_t *event, unsigned long offset, unsigned long head)
-{
-       trace_event(event);
-
-       switch (event->header.type) {
-       case PERF_RECORD_SAMPLE:
-               return process_sample_event(event, offset, head);
-
-       case PERF_RECORD_MMAP:
-               return process_mmap_event(event, offset, head);
-
-       case PERF_RECORD_COMM:
-               return process_comm_event(event, offset, head);
-
-       case PERF_RECORD_FORK:
-       case PERF_RECORD_EXIT:
-               return process_task_event(event, offset, head);
-
-       case PERF_RECORD_LOST:
-               return process_lost_event(event, offset, head);
-
-       case PERF_RECORD_READ:
-               return process_read_event(event, offset, head);
-
-       /*
-        * We dont process them right now but they are fine:
-        */
-
-       case PERF_RECORD_THROTTLE:
-       case PERF_RECORD_UNTHROTTLE:
-               return 0;
-
-       default:
-               return -1;
-       }
-
-       return 0;
-}
-
-static int __cmd_report(void)
+static int sample_type_check(u64 type)
 {
-       int ret, rc = EXIT_FAILURE;
-       unsigned long offset = 0;
-       unsigned long head, shift;
-       struct stat input_stat;
-       struct thread *idle;
-       event_t *event;
-       uint32_t size;
-       char *buf;
-
-       idle = register_idle_thread(&threads, &last_match);
-       thread__comm_adjust(idle);
-
-       if (show_threads)
-               perf_read_values_init(&show_threads_values);
-
-       input = open(input_name, O_RDONLY);
-       if (input < 0) {
-               fprintf(stderr, " failed to open file: %s", input_name);
-               if (!strcmp(input_name, "perf.data"))
-                       fprintf(stderr, "  (try 'perf record' first)");
-               fprintf(stderr, "\n");
-               exit(-1);
-       }
-
-       ret = fstat(input, &input_stat);
-       if (ret < 0) {
-               perror("failed to stat file");
-               exit(-1);
-       }
-
-       if (!force && input_stat.st_uid && (input_stat.st_uid != geteuid())) {
-               fprintf(stderr, "file: %s not owned by current user or root\n", input_name);
-               exit(-1);
-       }
-
-       if (!input_stat.st_size) {
-               fprintf(stderr, "zero-sized file, nothing to do!\n");
-               exit(0);
-       }
-
-       header = perf_header__read(input);
-       head = header->data_offset;
-
-       sample_type = perf_header__sample_type(header);
+       sample_type = type;
 
        if (!(sample_type & PERF_SAMPLE_CALLCHAIN)) {
                if (sort__has_parent) {
                        fprintf(stderr, "selected --sort parent, but no"
                                        " callchain data. Did you call"
                                        " perf record without -g?\n");
-                       exit(-1);
+                       return -1;
                }
                if (callchain) {
                        fprintf(stderr, "selected -g but no callchain data."
                                        " Did you call perf record without"
                                        " -g?\n");
-                       exit(-1);
+                       return -1;
                }
        } else if (callchain_param.mode != CHAIN_NONE && !callchain) {
                        callchain = 1;
                        if (register_callchain_param(&callchain_param) < 0) {
                                fprintf(stderr, "Can't register callchain"
                                                " params\n");
-                               exit(-1);
+                               return -1;
                        }
        }
 
-       if (load_kernel() < 0) {
-               perror("failed to load kernel symbols");
-               return EXIT_FAILURE;
-       }
-
-       if (!full_paths) {
-               if (getcwd(__cwd, sizeof(__cwd)) == NULL) {
-                       perror("failed to get the current directory");
-                       return EXIT_FAILURE;
-               }
-               cwdlen = strlen(cwd);
-       } else {
-               cwd = NULL;
-               cwdlen = 0;
-       }
-
-       shift = page_size * (head / page_size);
-       offset += shift;
-       head -= shift;
-
-remap:
-       buf = (char *)mmap(NULL, page_size * mmap_window, PROT_READ,
-                          MAP_SHARED, input, offset);
-       if (buf == MAP_FAILED) {
-               perror("failed to mmap file");
-               exit(-1);
-       }
-
-more:
-       event = (event_t *)(buf + head);
-
-       size = event->header.size;
-       if (!size)
-               size = 8;
-
-       if (head + event->header.size >= page_size * mmap_window) {
-               int munmap_ret;
-
-               shift = page_size * (head / page_size);
-
-               munmap_ret = munmap(buf, page_size * mmap_window);
-               assert(munmap_ret == 0);
-
-               offset += shift;
-               head -= shift;
-               goto remap;
-       }
-
-       size = event->header.size;
-
-       dump_printf("\n%p [%p]: event: %d\n",
-                       (void *)(offset + head),
-                       (void *)(long)event->header.size,
-                       event->header.type);
-
-       if (!size || process_event(event, offset, head) < 0) {
-
-               dump_printf("%p [%p]: skipping unknown header type: %d\n",
-                       (void *)(offset + head),
-                       (void *)(long)(event->header.size),
-                       event->header.type);
-
-               total_unknown++;
+       return 0;
+}
 
-               /*
-                * assume we lost track of the stream, check alignment, and
-                * increment a single u64 in the hope to catch on again 'soon'.
-                */
+static struct perf_file_handler file_handler = {
+       .process_sample_event   = process_sample_event,
+       .process_mmap_event     = process_mmap_event,
+       .process_comm_event     = process_comm_event,
+       .process_exit_event     = process_task_event,
+       .process_fork_event     = process_task_event,
+       .process_lost_event     = process_lost_event,
+       .process_read_event     = process_read_event,
+       .sample_type_check      = sample_type_check,
+};
 
-               if (unlikely(head & 7))
-                       head &= ~7ULL;
 
-               size = 8;
-       }
+static int __cmd_report(void)
+{
+       struct thread *idle;
+       int ret;
 
-       head += size;
+       idle = register_idle_thread();
+       thread__comm_adjust(idle);
 
-       if (offset + head >= header->data_offset + header->data_size)
-               goto done;
+       if (show_threads)
+               perf_read_values_init(&show_threads_values);
 
-       if (offset + head < (unsigned long)input_stat.st_size)
-               goto more;
+       register_perf_file_handler(&file_handler);
 
-done:
-       rc = EXIT_SUCCESS;
-       close(input);
+       ret = mmap_dispatch_perf_file(&header, input_name, force, full_paths,
+                                     &cwdlen, &cwd);
+       if (ret)
+               return ret;
 
        dump_printf("      IP events: %10ld\n", total);
        dump_printf("    mmap events: %10ld\n", total_mmap);
        dump_printf("    comm events: %10ld\n", total_comm);
        dump_printf("    fork events: %10ld\n", total_fork);
        dump_printf("    lost events: %10ld\n", total_lost);
-       dump_printf(" unknown events: %10ld\n", total_unknown);
+       dump_printf(" unknown events: %10ld\n", file_handler.total_unknown);
 
        if (dump_trace)
                return 0;
 
-       if (verbose >= 3)
-               threads__fprintf(stdout, &threads);
+       if (verbose > 3)
+               threads__fprintf(stdout);
 
-       if (verbose >= 2)
+       if (verbose > 2)
                dsos__fprintf(stdout);
 
        collapse__resort();
@@ -1550,7 +952,7 @@ done:
        if (show_threads)
                perf_read_values_destroy(&show_threads_values);
 
-       return rc;
+       return ret;
 }
 
 static int
@@ -1606,7 +1008,8 @@ setup:
        return 0;
 }
 
-static const char * const report_usage[] = {
+//static const char * const report_usage[] = {
+const char * const report_usage[] = {
        "perf report [<options>] <command>",
        NULL
 };
@@ -1690,9 +1093,7 @@ static void setup_list(struct strlist **list, const char *list_str,
 
 int cmd_report(int argc, const char **argv, const char *prefix __used)
 {
-       symbol__init();
-
-       page_size = getpagesize();
+       symbol__init(0);
 
        argc = parse_options(argc, argv, options, report_usage, 0);
 
index ce2d5be..df44b75 100644 (file)
@@ -11,6 +11,7 @@
 #include "util/trace-event.h"
 
 #include "util/debug.h"
+#include "util/data_map.h"
 
 #include <sys/types.h>
 #include <sys/prctl.h>
 #include <math.h>
 
 static char                    const *input_name = "perf.data";
-static int                     input;
-static unsigned long           page_size;
-static unsigned long           mmap_window = 32;
 
 static unsigned long           total_comm = 0;
 
-static struct rb_root          threads;
-static struct thread           *last_match;
-
 static struct perf_header      *header;
 static u64                     sample_type;
 
 static char                    default_sort_order[] = "avg, max, switch, runtime";
 static char                    *sort_order = default_sort_order;
 
+static int                     profile_cpu = -1;
+
+static char                    *cwd;
+static int                     cwdlen;
+
 #define PR_SET_NAME            15               /* Set process name */
 #define MAX_CPUS               4096
 
-#define BUG_ON(x)              assert(!(x))
-
 static u64                     run_measurement_overhead;
 static u64                     sleep_measurement_overhead;
 
@@ -74,6 +72,7 @@ enum sched_event_type {
        SCHED_EVENT_RUN,
        SCHED_EVENT_SLEEP,
        SCHED_EVENT_WAKEUP,
+       SCHED_EVENT_MIGRATION,
 };
 
 struct sched_atom {
@@ -398,6 +397,8 @@ process_sched_event(struct task_desc *this_task __used, struct sched_atom *atom)
                                ret = sem_post(atom->wait_sem);
                        BUG_ON(ret);
                        break;
+               case SCHED_EVENT_MIGRATION:
+                       break;
                default:
                        BUG_ON(1);
        }
@@ -635,9 +636,7 @@ static void test_calibrations(void)
 static int
 process_comm_event(event_t *event, unsigned long offset, unsigned long head)
 {
-       struct thread *thread;
-
-       thread = threads__findnew(event->comm.pid, &threads, &last_match);
+       struct thread *thread = threads__findnew(event->comm.tid);
 
        dump_printf("%p [%p]: perf_event_comm: %s:%d\n",
                (void *)(offset + head),
@@ -745,6 +744,22 @@ struct trace_fork_event {
        u32 child_pid;
 };
 
+struct trace_migrate_task_event {
+       u32 size;
+
+       u16 common_type;
+       u8 common_flags;
+       u8 common_preempt_count;
+       u32 common_pid;
+       u32 common_tgid;
+
+       char comm[16];
+       u32 pid;
+
+       u32 prio;
+       u32 cpu;
+};
+
 struct trace_sched_handler {
        void (*switch_event)(struct trace_switch_event *,
                             struct event *,
@@ -769,6 +784,12 @@ struct trace_sched_handler {
                           int cpu,
                           u64 timestamp,
                           struct thread *thread);
+
+       void (*migrate_task_event)(struct trace_migrate_task_event *,
+                          struct event *,
+                          int cpu,
+                          u64 timestamp,
+                          struct thread *thread);
 };
 
 
@@ -1058,8 +1079,8 @@ latency_switch_event(struct trace_switch_event *switch_event,
                die("hm, delta: %Ld < 0 ?\n", delta);
 
 
-       sched_out = threads__findnew(switch_event->prev_pid, &threads, &last_match);
-       sched_in = threads__findnew(switch_event->next_pid, &threads, &last_match);
+       sched_out = threads__findnew(switch_event->prev_pid);
+       sched_in = threads__findnew(switch_event->next_pid);
 
        out_events = thread_atoms_search(&atom_root, sched_out, &cmp_pid);
        if (!out_events) {
@@ -1092,13 +1113,10 @@ latency_runtime_event(struct trace_runtime_event *runtime_event,
                     u64 timestamp,
                     struct thread *this_thread __used)
 {
-       struct work_atoms *atoms;
-       struct thread *thread;
+       struct thread *thread = threads__findnew(runtime_event->pid);
+       struct work_atoms *atoms = thread_atoms_search(&atom_root, thread, &cmp_pid);
 
        BUG_ON(cpu >= MAX_CPUS || cpu < 0);
-
-       thread = threads__findnew(runtime_event->pid, &threads, &last_match);
-       atoms = thread_atoms_search(&atom_root, thread, &cmp_pid);
        if (!atoms) {
                thread_atoms_insert(thread);
                atoms = thread_atoms_search(&atom_root, thread, &cmp_pid);
@@ -1125,7 +1143,7 @@ latency_wakeup_event(struct trace_wakeup_event *wakeup_event,
        if (!wakeup_event->success)
                return;
 
-       wakee = threads__findnew(wakeup_event->pid, &threads, &last_match);
+       wakee = threads__findnew(wakeup_event->pid);
        atoms = thread_atoms_search(&atom_root, wakee, &cmp_pid);
        if (!atoms) {
                thread_atoms_insert(wakee);
@@ -1139,7 +1157,12 @@ latency_wakeup_event(struct trace_wakeup_event *wakeup_event,
 
        atom = list_entry(atoms->work_list.prev, struct work_atom, list);
 
-       if (atom->state != THREAD_SLEEPING)
+       /*
+        * You WILL be missing events if you've recorded only
+        * one CPU, or are only looking at only one, so don't
+        * make useless noise.
+        */
+       if (profile_cpu == -1 && atom->state != THREAD_SLEEPING)
                nr_state_machine_bugs++;
 
        nr_timestamps++;
@@ -1152,11 +1175,51 @@ latency_wakeup_event(struct trace_wakeup_event *wakeup_event,
        atom->wake_up_time = timestamp;
 }
 
+static void
+latency_migrate_task_event(struct trace_migrate_task_event *migrate_task_event,
+                    struct event *__event __used,
+                    int cpu __used,
+                    u64 timestamp,
+                    struct thread *thread __used)
+{
+       struct work_atoms *atoms;
+       struct work_atom *atom;
+       struct thread *migrant;
+
+       /*
+        * Only need to worry about migration when profiling one CPU.
+        */
+       if (profile_cpu == -1)
+               return;
+
+       migrant = threads__findnew(migrate_task_event->pid);
+       atoms = thread_atoms_search(&atom_root, migrant, &cmp_pid);
+       if (!atoms) {
+               thread_atoms_insert(migrant);
+               register_pid(migrant->pid, migrant->comm);
+               atoms = thread_atoms_search(&atom_root, migrant, &cmp_pid);
+               if (!atoms)
+                       die("migration-event: Internal tree error");
+               add_sched_out_event(atoms, 'R', timestamp);
+       }
+
+       BUG_ON(list_empty(&atoms->work_list));
+
+       atom = list_entry(atoms->work_list.prev, struct work_atom, list);
+       atom->sched_in_time = atom->sched_out_time = atom->wake_up_time = timestamp;
+
+       nr_timestamps++;
+
+       if (atom->sched_out_time > timestamp)
+               nr_unordered_timestamps++;
+}
+
 static struct trace_sched_handler lat_ops  = {
        .wakeup_event           = latency_wakeup_event,
        .switch_event           = latency_switch_event,
        .runtime_event          = latency_runtime_event,
        .fork_event             = latency_fork_event,
+       .migrate_task_event     = latency_migrate_task_event,
 };
 
 static void output_lat_thread(struct work_atoms *work_list)
@@ -1385,8 +1448,8 @@ map_switch_event(struct trace_switch_event *switch_event,
                die("hm, delta: %Ld < 0 ?\n", delta);
 
 
-       sched_out = threads__findnew(switch_event->prev_pid, &threads, &last_match);
-       sched_in = threads__findnew(switch_event->next_pid, &threads, &last_match);
+       sched_out = threads__findnew(switch_event->prev_pid);
+       sched_in = threads__findnew(switch_event->next_pid);
 
        curr_thread[this_cpu] = sched_in;
 
@@ -1517,6 +1580,26 @@ process_sched_exit_event(struct event *event,
 }
 
 static void
+process_sched_migrate_task_event(struct raw_event_sample *raw,
+                          struct event *event,
+                          int cpu __used,
+                          u64 timestamp __used,
+                          struct thread *thread __used)
+{
+       struct trace_migrate_task_event migrate_task_event;
+
+       FILL_COMMON_FIELDS(migrate_task_event, event, raw->data);
+
+       FILL_ARRAY(migrate_task_event, comm, event, raw->data);
+       FILL_FIELD(migrate_task_event, pid, event, raw->data);
+       FILL_FIELD(migrate_task_event, prio, event, raw->data);
+       FILL_FIELD(migrate_task_event, cpu, event, raw->data);
+
+       if (trace_handler->migrate_task_event)
+               trace_handler->migrate_task_event(&migrate_task_event, event, cpu, timestamp, thread);
+}
+
+static void
 process_raw_event(event_t *raw_event __used, void *more_data,
                  int cpu, u64 timestamp, struct thread *thread)
 {
@@ -1539,23 +1622,24 @@ process_raw_event(event_t *raw_event __used, void *more_data,
                process_sched_fork_event(raw, event, cpu, timestamp, thread);
        if (!strcmp(event->name, "sched_process_exit"))
                process_sched_exit_event(event, cpu, timestamp, thread);
+       if (!strcmp(event->name, "sched_migrate_task"))
+               process_sched_migrate_task_event(raw, event, cpu, timestamp, thread);
 }
 
 static int
 process_sample_event(event_t *event, unsigned long offset, unsigned long head)
 {
-       char level;
-       int show = 0;
-       struct dso *dso = NULL;
        struct thread *thread;
        u64 ip = event->ip.ip;
        u64 timestamp = -1;
        u32 cpu = -1;
        u64 period = 1;
        void *more_data = event->ip.__more_data;
-       int cpumode;
 
-       thread = threads__findnew(event->ip.pid, &threads, &last_match);
+       if (!(sample_type & PERF_SAMPLE_RAW))
+               return 0;
+
+       thread = threads__findnew(event->ip.pid);
 
        if (sample_type & PERF_SAMPLE_TIME) {
                timestamp = *(u64 *)more_data;
@@ -1581,169 +1665,60 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head)
                (void *)(long)ip,
                (long long)period);
 
-       dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid);
-
        if (thread == NULL) {
-               eprintf("problem processing %d event, skipping it.\n",
-                       event->header.type);
+               pr_debug("problem processing %d event, skipping it.\n",
+                        event->header.type);
                return -1;
        }
 
-       cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
-
-       if (cpumode == PERF_RECORD_MISC_KERNEL) {
-               show = SHOW_KERNEL;
-               level = 'k';
-
-               dso = kernel_dso;
-
-               dump_printf(" ...... dso: %s\n", dso->name);
-
-       } else if (cpumode == PERF_RECORD_MISC_USER) {
-
-               show = SHOW_USER;
-               level = '.';
-
-       } else {
-               show = SHOW_HV;
-               level = 'H';
-
-               dso = hypervisor_dso;
+       dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid);
 
-               dump_printf(" ...... dso: [hypervisor]\n");
-       }
+       if (profile_cpu != -1 && profile_cpu != (int) cpu)
+               return 0;
 
-       if (sample_type & PERF_SAMPLE_RAW)
-               process_raw_event(event, more_data, cpu, timestamp, thread);
+       process_raw_event(event, more_data, cpu, timestamp, thread);
 
        return 0;
 }
 
 static int
-process_event(event_t *event, unsigned long offset, unsigned long head)
+process_lost_event(event_t *event __used,
+                  unsigned long offset __used,
+                  unsigned long head __used)
 {
-       trace_event(event);
+       nr_lost_chunks++;
+       nr_lost_events += event->lost.lost;
 
-       nr_events++;
-       switch (event->header.type) {
-       case PERF_RECORD_MMAP:
-               return 0;
-       case PERF_RECORD_LOST:
-               nr_lost_chunks++;
-               nr_lost_events += event->lost.lost;
-               return 0;
-
-       case PERF_RECORD_COMM:
-               return process_comm_event(event, offset, head);
-
-       case PERF_RECORD_EXIT ... PERF_RECORD_READ:
-               return 0;
+       return 0;
+}
 
-       case PERF_RECORD_SAMPLE:
-               return process_sample_event(event, offset, head);
+static int sample_type_check(u64 type)
+{
+       sample_type = type;
 
-       case PERF_RECORD_MAX:
-       default:
+       if (!(sample_type & PERF_SAMPLE_RAW)) {
+               fprintf(stderr,
+                       "No trace sample to read. Did you call perf record "
+                       "without -R?");
                return -1;
        }
 
        return 0;
 }
 
+static struct perf_file_handler file_handler = {
+       .process_sample_event   = process_sample_event,
+       .process_comm_event     = process_comm_event,
+       .process_lost_event     = process_lost_event,
+       .sample_type_check      = sample_type_check,
+};
+
 static int read_events(void)
 {
-       int ret, rc = EXIT_FAILURE;
-       unsigned long offset = 0;
-       unsigned long head = 0;
-       struct stat perf_stat;
-       event_t *event;
-       uint32_t size;
-       char *buf;
-
-       trace_report();
-       register_idle_thread(&threads, &last_match);
-
-       input = open(input_name, O_RDONLY);
-       if (input < 0) {
-               perror("failed to open file");
-               exit(-1);
-       }
-
-       ret = fstat(input, &perf_stat);
-       if (ret < 0) {
-               perror("failed to stat file");
-               exit(-1);
-       }
-
-       if (!perf_stat.st_size) {
-               fprintf(stderr, "zero-sized file, nothing to do!\n");
-               exit(0);
-       }
-       header = perf_header__read(input);
-       head = header->data_offset;
-       sample_type = perf_header__sample_type(header);
-
-       if (!(sample_type & PERF_SAMPLE_RAW))
-               die("No trace sample to read. Did you call perf record "
-                   "without -R?");
-
-       if (load_kernel() < 0) {
-               perror("failed to load kernel symbols");
-               return EXIT_FAILURE;
-       }
-
-remap:
-       buf = (char *)mmap(NULL, page_size * mmap_window, PROT_READ,
-                          MAP_SHARED, input, offset);
-       if (buf == MAP_FAILED) {
-               perror("failed to mmap file");
-               exit(-1);
-       }
-
-more:
-       event = (event_t *)(buf + head);
-
-       size = event->header.size;
-       if (!size)
-               size = 8;
-
-       if (head + event->header.size >= page_size * mmap_window) {
-               unsigned long shift = page_size * (head / page_size);
-               int res;
-
-               res = munmap(buf, page_size * mmap_window);
-               assert(res == 0);
-
-               offset += shift;
-               head -= shift;
-               goto remap;
-       }
-
-       size = event->header.size;
-
-
-       if (!size || process_event(event, offset, head) < 0) {
-
-               /*
-                * assume we lost track of the stream, check alignment, and
-                * increment a single u64 in the hope to catch on again 'soon'.
-                */
-
-               if (unlikely(head & 7))
-                       head &= ~7ULL;
-
-               size = 8;
-       }
-
-       head += size;
-
-       if (offset + head < (unsigned long)perf_stat.st_size)
-               goto more;
-
-       rc = EXIT_SUCCESS;
-       close(input);
+       register_idle_thread();
+       register_perf_file_handler(&file_handler);
 
-       return rc;
+       return mmap_dispatch_perf_file(&header, input_name, 0, 0, &cwdlen, &cwd);
 }
 
 static void print_bad_events(void)
@@ -1883,6 +1858,8 @@ static const struct option latency_options[] = {
                   "sort by key(s): runtime, switch, avg, max"),
        OPT_BOOLEAN('v', "verbose", &verbose,
                    "be more verbose (show symbol address, etc)"),
+       OPT_INTEGER('C', "CPU", &profile_cpu,
+                   "CPU to profile on"),
        OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
                    "dump raw trace in ASCII"),
        OPT_END()
@@ -1960,8 +1937,7 @@ static int __cmd_record(int argc, const char **argv)
 
 int cmd_sched(int argc, const char **argv, const char *prefix __used)
 {
-       symbol__init();
-       page_size = getpagesize();
+       symbol__init(0);
 
        argc = parse_options(argc, argv, sched_options, sched_usage,
                             PARSE_OPT_STOP_AT_NON_OPTION);
index 3db31e7..c6df377 100644 (file)
 
 static struct perf_event_attr default_attrs[] = {
 
-  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK     },
-  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES},
-  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_MIGRATIONS },
-  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS    },
-
-  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES     },
-  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS   },
-  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_REFERENCES},
-  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_MISSES   },
+  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK             },
+  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES       },
+  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_MIGRATIONS         },
+  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS            },
+
+  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES             },
+  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS           },
+  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS    },
+  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES          },
+  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_REFERENCES       },
+  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_MISSES           },
 
 };
 
@@ -125,6 +127,7 @@ struct stats                        event_res_stats[MAX_COUNTERS][3];
 struct stats                   runtime_nsecs_stats;
 struct stats                   walltime_nsecs_stats;
 struct stats                   runtime_cycles_stats;
+struct stats                   runtime_branches_stats;
 
 #define MATCH_EVENT(t, c, counter)                     \
        (attrs[counter].type == PERF_TYPE_##t &&        \
@@ -235,6 +238,8 @@ static void read_counter(int counter)
                update_stats(&runtime_nsecs_stats, count[0]);
        if (MATCH_EVENT(HARDWARE, HW_CPU_CYCLES, counter))
                update_stats(&runtime_cycles_stats, count[0]);
+       if (MATCH_EVENT(HARDWARE, HW_BRANCH_INSTRUCTIONS, counter))
+               update_stats(&runtime_branches_stats, count[0]);
 }
 
 static int run_perf_stat(int argc __used, const char **argv)
@@ -352,6 +357,14 @@ static void abs_printout(int counter, double avg)
                        ratio = avg / total;
 
                fprintf(stderr, " # %10.3f IPC  ", ratio);
+       } else if (MATCH_EVENT(HARDWARE, HW_BRANCH_MISSES, counter)) {
+               total = avg_stats(&runtime_branches_stats);
+
+               if (total)
+                       ratio = avg * 100 / total;
+
+               fprintf(stderr, " # %10.3f %%    ", ratio);
+
        } else {
                total = avg_stats(&runtime_nsecs_stats);
 
index e8a510d..665877e 100644 (file)
@@ -153,6 +153,17 @@ static struct wake_event     *wake_events;
 
 struct sample_wrapper *all_samples;
 
+
+struct process_filter;
+struct process_filter {
+       char                    *name;
+       int                     pid;
+       struct process_filter   *next;
+};
+
+static struct process_filter *process_filter;
+
+
 static struct per_pid *find_create_pid(int pid)
 {
        struct per_pid *cursor = all_data;
@@ -763,11 +774,11 @@ static void draw_wakeups(void)
                                c = p->all;
                                while (c) {
                                        if (c->Y && c->start_time <= we->time && c->end_time >= we->time) {
-                                               if (p->pid == we->waker) {
+                                               if (p->pid == we->waker && !from) {
                                                        from = c->Y;
                                                        task_from = strdup(c->comm);
                                                }
-                                               if (p->pid == we->wakee) {
+                                               if (p->pid == we->wakee && !to) {
                                                        to = c->Y;
                                                        task_to = strdup(c->comm);
                                                }
@@ -882,12 +893,89 @@ static void draw_process_bars(void)
        }
 }
 
+static void add_process_filter(const char *string)
+{
+       struct process_filter *filt;
+       int pid;
+
+       pid = strtoull(string, NULL, 10);
+       filt = malloc(sizeof(struct process_filter));
+       if (!filt)
+               return;
+
+       filt->name = strdup(string);
+       filt->pid  = pid;
+       filt->next = process_filter;
+
+       process_filter = filt;
+}
+
+static int passes_filter(struct per_pid *p, struct per_pidcomm *c)
+{
+       struct process_filter *filt;
+       if (!process_filter)
+               return 1;
+
+       filt = process_filter;
+       while (filt) {
+               if (filt->pid && p->pid == filt->pid)
+                       return 1;
+               if (strcmp(filt->name, c->comm) == 0)
+                       return 1;
+               filt = filt->next;
+       }
+       return 0;
+}
+
+static int determine_display_tasks_filtered(void)
+{
+       struct per_pid *p;
+       struct per_pidcomm *c;
+       int count = 0;
+
+       p = all_data;
+       while (p) {
+               p->display = 0;
+               if (p->start_time == 1)
+                       p->start_time = first_time;
+
+               /* no exit marker, task kept running to the end */
+               if (p->end_time == 0)
+                       p->end_time = last_time;
+
+               c = p->all;
+
+               while (c) {
+                       c->display = 0;
+
+                       if (c->start_time == 1)
+                               c->start_time = first_time;
+
+                       if (passes_filter(p, c)) {
+                               c->display = 1;
+                               p->display = 1;
+                               count++;
+                       }
+
+                       if (c->end_time == 0)
+                               c->end_time = last_time;
+
+                       c = c->next;
+               }
+               p = p->next;
+       }
+       return count;
+}
+
 static int determine_display_tasks(u64 threshold)
 {
        struct per_pid *p;
        struct per_pidcomm *c;
        int count = 0;
 
+       if (process_filter)
+               return determine_display_tasks_filtered();
+
        p = all_data;
        while (p) {
                p->display = 0;
@@ -1074,12 +1162,10 @@ more:
        size = event->header.size;
 
        if (!size || process_event(event) < 0) {
-
-               printf("%p [%p]: skipping unknown header type: %d\n",
-                       (void *)(offset + head),
-                       (void *)(long)(event->header.size),
-                       event->header.type);
-
+               pr_warning("%p [%p]: skipping unknown header type: %d\n",
+                          (void *)(offset + head),
+                          (void *)(long)(event->header.size),
+                          event->header.type);
                /*
                 * assume we lost track of the stream, check alignment, and
                 * increment a single u64 in the hope to catch on again 'soon'.
@@ -1112,7 +1198,8 @@ done:
 
        write_svg_file(output_name);
 
-       printf("Written %2.1f seconds of trace to %s.\n", (last_time - first_time) / 1000000000.0, output_name);
+       pr_info("Written %2.1f seconds of trace to %s.\n",
+               (last_time - first_time) / 1000000000.0, output_name);
 
        return rc;
 }
@@ -1153,6 +1240,14 @@ static int __cmd_record(int argc, const char **argv)
        return cmd_record(i, rec_argv, NULL);
 }
 
+static int
+parse_process(const struct option *opt __used, const char *arg, int __used unset)
+{
+       if (arg)
+               add_process_filter(arg);
+       return 0;
+}
+
 static const struct option options[] = {
        OPT_STRING('i', "input", &input_name, "file",
                    "input file name"),
@@ -1160,15 +1255,18 @@ static const struct option options[] = {
                    "output file name"),
        OPT_INTEGER('w', "width", &svg_page_width,
                    "page width"),
-       OPT_BOOLEAN('p', "power-only", &power_only,
+       OPT_BOOLEAN('P', "power-only", &power_only,
                    "output power data only"),
+       OPT_CALLBACK('p', "process", NULL, "process",
+                     "process selector. Pass a pid or process name.",
+                      parse_process),
        OPT_END()
 };
 
 
 int cmd_timechart(int argc, const char **argv, const char *prefix __used)
 {
-       symbol__init();
+       symbol__init(0);
 
        page_size = getpagesize();
 
index a1b1d10..2aea913 100644 (file)
@@ -22,6 +22,7 @@
 
 #include "util/symbol.h"
 #include "util/color.h"
+#include "util/thread.h"
 #include "util/util.h"
 #include <linux/rbtree.h>
 #include "util/parse-options.h"
 
 static int                     fd[MAX_NR_CPUS][MAX_COUNTERS];
 
-static int                     system_wide                     =  0;
+static int                     system_wide                     =      0;
 
-static int                     default_interval                = 100000;
+static int                     default_interval                =      0;
 
-static int                     count_filter                    =  5;
-static int                     print_entries                   = 15;
+static int                     count_filter                    =      5;
+static int                     print_entries                   =     15;
 
-static int                     target_pid                      = -1;
-static int                     inherit                         =  0;
-static int                     profile_cpu                     = -1;
-static int                     nr_cpus                         =  0;
-static unsigned int            realtime_prio                   =  0;
-static int                     group                           =  0;
+static int                     target_pid                      =     -1;
+static int                     inherit                         =      0;
+static int                     profile_cpu                     =     -1;
+static int                     nr_cpus                         =      0;
+static unsigned int            realtime_prio                   =      0;
+static int                     group                           =      0;
 static unsigned int            page_size;
-static unsigned int            mmap_pages                      = 16;
-static int                     freq                            =  0;
+static unsigned int            mmap_pages                      =     16;
+static int                     freq                            =   1000; /* 1 KHz */
 
-static int                     delay_secs                      =  2;
-static int                     zero;
-static int                     dump_symtab;
+static int                     delay_secs                      =      2;
+static int                     zero                            =      0;
+static int                     dump_symtab                     =      0;
 
 /*
  * Source
@@ -86,19 +87,16 @@ struct source_line {
        struct source_line      *next;
 };
 
-static char                    *sym_filter                     =  NULL;
-struct sym_entry               *sym_filter_entry               =  NULL;
-static int                     sym_pcnt_filter                 =  5;
-static int                     sym_counter                     =  0;
-static int                     display_weighted                = -1;
+static char                    *sym_filter                     =   NULL;
+struct sym_entry               *sym_filter_entry               =   NULL;
+static int                     sym_pcnt_filter                 =      5;
+static int                     sym_counter                     =      0;
+static int                     display_weighted                =     -1;
 
 /*
  * Symbols
  */
 
-static u64                     min_ip;
-static u64                     max_ip = -1ll;
-
 struct sym_entry {
        struct rb_node          rb_node;
        struct list_head        node;
@@ -106,6 +104,7 @@ struct sym_entry {
        unsigned long           snap_count;
        double                  weight;
        int                     skip;
+       struct map              *map;
        struct source_line      *source;
        struct source_line      *lines;
        struct source_line      **lines_tail;
@@ -119,12 +118,11 @@ struct sym_entry {
 static void parse_source(struct sym_entry *syme)
 {
        struct symbol *sym;
-       struct module *module;
-       struct section *section = NULL;
+       struct map *map;
        FILE *file;
        char command[PATH_MAX*2];
-       const char *path = vmlinux_name;
-       u64 start, end, len;
+       const char *path;
+       u64 len;
 
        if (!syme)
                return;
@@ -135,27 +133,16 @@ static void parse_source(struct sym_entry *syme)
        }
 
        sym = (struct symbol *)(syme + 1);
-       module = sym->module;
-
-       if (module)
-               path = module->path;
-       if (!path)
-               return;
+       map = syme->map;
+       path = map->dso->long_name;
 
-       start = sym->obj_start;
-       if (!start)
-               start = sym->start;
-
-       if (module) {
-               section = module->sections->find_section(module->sections, ".text");
-               if (section)
-                       start -= section->vma;
-       }
-
-       end = start + sym->end - sym->start + 1;
        len = sym->end - sym->start;
 
-       sprintf(command, "objdump --start-address=0x%016Lx --stop-address=0x%016Lx -dS %s", start, end, path);
+       sprintf(command,
+               "objdump --start-address=0x%016Lx "
+                        "--stop-address=0x%016Lx -dS %s",
+               map->unmap_ip(map, sym->start),
+               map->unmap_ip(map, sym->end), path);
 
        file = popen(command, "r");
        if (!file)
@@ -187,13 +174,11 @@ static void parse_source(struct sym_entry *syme)
 
                if (strlen(src->line)>8 && src->line[8] == ':') {
                        src->eip = strtoull(src->line, NULL, 16);
-                       if (section)
-                               src->eip += section->vma;
+                       src->eip = map->unmap_ip(map, src->eip);
                }
                if (strlen(src->line)>8 && src->line[16] == ':') {
                        src->eip = strtoull(src->line, NULL, 16);
-                       if (section)
-                               src->eip += section->vma;
+                       src->eip = map->unmap_ip(map, src->eip);
                }
        }
        pclose(file);
@@ -245,16 +230,9 @@ static void lookup_sym_source(struct sym_entry *syme)
        struct symbol *symbol = (struct symbol *)(syme + 1);
        struct source_line *line;
        char pattern[PATH_MAX];
-       char *idx;
 
        sprintf(pattern, "<%s>:", symbol->name);
 
-       if (symbol->module) {
-               idx = strstr(pattern, "\t");
-               if (idx)
-                       *idx = 0;
-       }
-
        pthread_mutex_lock(&syme->source_lock);
        for (line = syme->lines; line; line = line->next) {
                if (strstr(line->line, pattern)) {
@@ -340,7 +318,7 @@ static void show_details(struct sym_entry *syme)
 }
 
 /*
- * Symbols will be added here in record_ip and will get out
+ * Symbols will be added here in event__process_sample and will get out
  * after decayed.
  */
 static LIST_HEAD(active_symbols);
@@ -481,18 +459,18 @@ static void print_sym_table(void)
        }
 
        if (nr_counters == 1)
-               printf("             samples    pcnt");
+               printf("             samples  pcnt");
        else
-               printf("   weight    samples    pcnt");
+               printf("   weight    samples  pcnt");
 
        if (verbose)
                printf("         RIP       ");
-       printf("   kernel function\n");
-       printf("   %s    _______   _____",
+       printf(" function                                 DSO\n");
+       printf("   %s    _______ _____",
               nr_counters == 1 ? "     &nbs