Merge branch 'perf-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...
Linus Torvalds [Thu, 28 Oct 2010 01:48:00 +0000 (18:48 -0700)]
* 'perf-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (50 commits)
  perf python scripting: Add futex-contention script
  perf python scripting: Fixup cut'n'paste error in sctop script
  perf scripting: Shut up 'perf record' final status
  perf record: Remove newline character from perror() argument
  perf python scripting: Support fedora 11 (audit 1.7.17)
  perf python scripting: Improve the syscalls-by-pid script
  perf python scripting: print the syscall name on sctop
  perf python scripting: Improve the syscalls-counts script
  perf python scripting: Improve the failed-syscalls-by-pid script
  kprobes: Remove redundant text_mutex lock in optimize
  x86/oprofile: Fix uninitialized variable use in debug printk
  tracing: Fix 'faild' -> 'failed' typo
  perf probe: Fix format specified for Dwarf_Off parameter
  perf trace: Fix detection of script extension
  perf trace: Use $PERF_EXEC_PATH in canned report scripts
  perf tools: Document event modifiers
  perf tools: Remove direct slang.h include
  perf_events: Fix for transaction recovery in group_sched_in()
  perf_events: Revert: Fix transaction recovery in group_sched_in()
  perf, x86: Use NUMA aware allocations for PEBS/BTS/DS allocations
  ...

48 files changed:
arch/x86/include/asm/msr-index.h
arch/x86/include/asm/perf_event.h
arch/x86/kernel/cpu/perf_event.c
arch/x86/kernel/cpu/perf_event_intel_ds.c
arch/x86/oprofile/nmi_int.c
arch/x86/oprofile/op_model_amd.c
include/linux/interrupt.h
include/linux/ring_buffer.h
include/trace/events/irq.h
kernel/kprobes.c
kernel/perf_event.c
kernel/softirq.c
kernel/trace/ring_buffer.c
kernel/trace/trace.c
tools/perf/Documentation/perf-list.txt
tools/perf/Documentation/perf-probe.txt
tools/perf/Documentation/perf-record.txt
tools/perf/builtin-probe.c
tools/perf/builtin-record.c
tools/perf/builtin-trace.c
tools/perf/scripts/perl/bin/failed-syscalls-report
tools/perf/scripts/perl/bin/rw-by-file-report
tools/perf/scripts/perl/bin/rw-by-pid-report
tools/perf/scripts/perl/bin/rwtop-report
tools/perf/scripts/perl/bin/wakeup-latency-report
tools/perf/scripts/perl/bin/workqueue-stats-report
tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Util.py
tools/perf/scripts/python/bin/failed-syscalls-by-pid-report
tools/perf/scripts/python/bin/futex-contention-record [new file with mode: 0644]
tools/perf/scripts/python/bin/futex-contention-report [new file with mode: 0644]
tools/perf/scripts/python/bin/netdev-times-report
tools/perf/scripts/python/bin/sched-migration-report
tools/perf/scripts/python/bin/sctop-report
tools/perf/scripts/python/bin/syscall-counts-by-pid-report
tools/perf/scripts/python/bin/syscall-counts-report
tools/perf/scripts/python/failed-syscalls-by-pid.py
tools/perf/scripts/python/futex-contention.py [new file with mode: 0644]
tools/perf/scripts/python/sctop.py
tools/perf/scripts/python/syscall-counts-by-pid.py
tools/perf/scripts/python/syscall-counts.py
tools/perf/util/debug.c
tools/perf/util/debug.h
tools/perf/util/map.h
tools/perf/util/probe-event.c
tools/perf/util/probe-event.h
tools/perf/util/probe-finder.c
tools/perf/util/probe-finder.h
tools/perf/util/ui/browser.c

index 83c4bb1..3ea3dc4 100644 (file)
 #define MSR_AMD64_IBSDCLINAD           0xc0011038
 #define MSR_AMD64_IBSDCPHYSAD          0xc0011039
 #define MSR_AMD64_IBSCTL               0xc001103a
+#define MSR_AMD64_IBSBRTARGET          0xc001103b
 
 /* Fam 10h MSRs */
 #define MSR_FAM10H_MMIO_CONF_BASE      0xc0010058
index 6e742cc..550e26b 100644 (file)
@@ -111,17 +111,18 @@ union cpuid10_edx {
 #define X86_PMC_IDX_FIXED_BTS                          (X86_PMC_IDX_FIXED + 16)
 
 /* IbsFetchCtl bits/masks */
-#define IBS_FETCH_RAND_EN              (1ULL<<57)
-#define IBS_FETCH_VAL                  (1ULL<<49)
-#define IBS_FETCH_ENABLE               (1ULL<<48)
-#define IBS_FETCH_CNT                  0xFFFF0000ULL
-#define IBS_FETCH_MAX_CNT              0x0000FFFFULL
+#define IBS_FETCH_RAND_EN      (1ULL<<57)
+#define IBS_FETCH_VAL          (1ULL<<49)
+#define IBS_FETCH_ENABLE       (1ULL<<48)
+#define IBS_FETCH_CNT          0xFFFF0000ULL
+#define IBS_FETCH_MAX_CNT      0x0000FFFFULL
 
 /* IbsOpCtl bits */
-#define IBS_OP_CNT_CTL                 (1ULL<<19)
-#define IBS_OP_VAL                     (1ULL<<18)
-#define IBS_OP_ENABLE                  (1ULL<<17)
-#define IBS_OP_MAX_CNT                 0x0000FFFFULL
+#define IBS_OP_CNT_CTL         (1ULL<<19)
+#define IBS_OP_VAL             (1ULL<<18)
+#define IBS_OP_ENABLE          (1ULL<<17)
+#define IBS_OP_MAX_CNT         0x0000FFFFULL
+#define IBS_OP_MAX_CNT_EXT     0x007FFFFFULL   /* not a register bit mask */
 
 #ifdef CONFIG_PERF_EVENTS
 extern void init_hw_perf_events(void);
index c1e8c7a..ed63101 100644 (file)
@@ -237,6 +237,7 @@ struct x86_pmu {
         * Intel DebugStore bits
         */
        int             bts, pebs;
+       int             bts_active, pebs_active;
        int             pebs_record_size;
        void            (*drain_pebs)(struct pt_regs *regs);
        struct event_constraint *pebs_constraints;
@@ -380,7 +381,7 @@ static void release_pmc_hardware(void) {}
 
 #endif
 
-static int reserve_ds_buffers(void);
+static void reserve_ds_buffers(void);
 static void release_ds_buffers(void);
 
 static void hw_perf_event_destroy(struct perf_event *event)
@@ -477,7 +478,7 @@ static int x86_setup_perfctr(struct perf_event *event)
        if ((attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS) &&
            (hwc->sample_period == 1)) {
                /* BTS is not supported by this architecture. */
-               if (!x86_pmu.bts)
+               if (!x86_pmu.bts_active)
                        return -EOPNOTSUPP;
 
                /* BTS is currently only allowed for user-mode. */
@@ -496,12 +497,13 @@ static int x86_pmu_hw_config(struct perf_event *event)
                int precise = 0;
 
                /* Support for constant skid */
-               if (x86_pmu.pebs)
+               if (x86_pmu.pebs_active) {
                        precise++;
 
-               /* Support for IP fixup */
-               if (x86_pmu.lbr_nr)
-                       precise++;
+                       /* Support for IP fixup */
+                       if (x86_pmu.lbr_nr)
+                               precise++;
+               }
 
                if (event->attr.precise_ip > precise)
                        return -EOPNOTSUPP;
@@ -543,11 +545,8 @@ static int __x86_pmu_event_init(struct perf_event *event)
                if (atomic_read(&active_events) == 0) {
                        if (!reserve_pmc_hardware())
                                err = -EBUSY;
-                       else {
-                               err = reserve_ds_buffers();
-                               if (err)
-                                       release_pmc_hardware();
-                       }
+                       else
+                               reserve_ds_buffers();
                }
                if (!err)
                        atomic_inc(&active_events);
index 4977f9c..b7dcd9f 100644 (file)
@@ -74,6 +74,107 @@ static void fini_debug_store_on_cpu(int cpu)
        wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0);
 }
 
+static int alloc_pebs_buffer(int cpu)
+{
+       struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
+       int node = cpu_to_node(cpu);
+       int max, thresh = 1; /* always use a single PEBS record */
+       void *buffer;
+
+       if (!x86_pmu.pebs)
+               return 0;
+
+       buffer = kmalloc_node(PEBS_BUFFER_SIZE, GFP_KERNEL | __GFP_ZERO, node);
+       if (unlikely(!buffer))
+               return -ENOMEM;
+
+       max = PEBS_BUFFER_SIZE / x86_pmu.pebs_record_size;
+
+       ds->pebs_buffer_base = (u64)(unsigned long)buffer;
+       ds->pebs_index = ds->pebs_buffer_base;
+       ds->pebs_absolute_maximum = ds->pebs_buffer_base +
+               max * x86_pmu.pebs_record_size;
+
+       ds->pebs_interrupt_threshold = ds->pebs_buffer_base +
+               thresh * x86_pmu.pebs_record_size;
+
+       return 0;
+}
+
+static void release_pebs_buffer(int cpu)
+{
+       struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
+
+       if (!ds || !x86_pmu.pebs)
+               return;
+
+       kfree((void *)(unsigned long)ds->pebs_buffer_base);
+       ds->pebs_buffer_base = 0;
+}
+
+static int alloc_bts_buffer(int cpu)
+{
+       struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
+       int node = cpu_to_node(cpu);
+       int max, thresh;
+       void *buffer;
+
+       if (!x86_pmu.bts)
+               return 0;
+
+       buffer = kmalloc_node(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_ZERO, node);
+       if (unlikely(!buffer))
+               return -ENOMEM;
+
+       max = BTS_BUFFER_SIZE / BTS_RECORD_SIZE;
+       thresh = max / 16;
+
+       ds->bts_buffer_base = (u64)(unsigned long)buffer;
+       ds->bts_index = ds->bts_buffer_base;
+       ds->bts_absolute_maximum = ds->bts_buffer_base +
+               max * BTS_RECORD_SIZE;
+       ds->bts_interrupt_threshold = ds->bts_absolute_maximum -
+               thresh * BTS_RECORD_SIZE;
+
+       return 0;
+}
+
+static void release_bts_buffer(int cpu)
+{
+       struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
+
+       if (!ds || !x86_pmu.bts)
+               return;
+
+       kfree((void *)(unsigned long)ds->bts_buffer_base);
+       ds->bts_buffer_base = 0;
+}
+
+static int alloc_ds_buffer(int cpu)
+{
+       int node = cpu_to_node(cpu);
+       struct debug_store *ds;
+
+       ds = kmalloc_node(sizeof(*ds), GFP_KERNEL | __GFP_ZERO, node);
+       if (unlikely(!ds))
+               return -ENOMEM;
+
+       per_cpu(cpu_hw_events, cpu).ds = ds;
+
+       return 0;
+}
+
+static void release_ds_buffer(int cpu)
+{
+       struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
+
+       if (!ds)
+               return;
+
+       per_cpu(cpu_hw_events, cpu).ds = NULL;
+       kfree(ds);
+}
+
 static void release_ds_buffers(void)
 {
        int cpu;
@@ -82,93 +183,77 @@ static void release_ds_buffers(void)
                return;
 
        get_online_cpus();
-
        for_each_online_cpu(cpu)
                fini_debug_store_on_cpu(cpu);
 
        for_each_possible_cpu(cpu) {
-               struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
-
-               if (!ds)
-                       continue;
-
-               per_cpu(cpu_hw_events, cpu).ds = NULL;
-
-               kfree((void *)(unsigned long)ds->pebs_buffer_base);
-               kfree((void *)(unsigned long)ds->bts_buffer_base);
-               kfree(ds);
+               release_pebs_buffer(cpu);
+               release_bts_buffer(cpu);
+               release_ds_buffer(cpu);
        }
-
        put_online_cpus();
 }
 
-static int reserve_ds_buffers(void)
+static void reserve_ds_buffers(void)
 {
-       int cpu, err = 0;
+       int bts_err = 0, pebs_err = 0;
+       int cpu;
+
+       x86_pmu.bts_active = 0;
+       x86_pmu.pebs_active = 0;
 
        if (!x86_pmu.bts && !x86_pmu.pebs)
-               return 0;
+               return;
+
+       if (!x86_pmu.bts)
+               bts_err = 1;
+
+       if (!x86_pmu.pebs)
+               pebs_err = 1;
 
        get_online_cpus();
 
        for_each_possible_cpu(cpu) {
-               struct debug_store *ds;
-               void *buffer;
-               int max, thresh;
+               if (alloc_ds_buffer(cpu)) {
+                       bts_err = 1;
+                       pebs_err = 1;
+               }
+
+               if (!bts_err && alloc_bts_buffer(cpu))
+                       bts_err = 1;
 
-               err = -ENOMEM;
-               ds = kzalloc(sizeof(*ds), GFP_KERNEL);
-               if (unlikely(!ds))
+               if (!pebs_err && alloc_pebs_buffer(cpu))
+                       pebs_err = 1;
+
+               if (bts_err && pebs_err)
                        break;
-               per_cpu(cpu_hw_events, cpu).ds = ds;
-
-               if (x86_pmu.bts) {
-                       buffer = kzalloc(BTS_BUFFER_SIZE, GFP_KERNEL);
-                       if (unlikely(!buffer))
-                               break;
-
-                       max = BTS_BUFFER_SIZE / BTS_RECORD_SIZE;
-                       thresh = max / 16;
-
-                       ds->bts_buffer_base = (u64)(unsigned long)buffer;
-                       ds->bts_index = ds->bts_buffer_base;
-                       ds->bts_absolute_maximum = ds->bts_buffer_base +
-                               max * BTS_RECORD_SIZE;
-                       ds->bts_interrupt_threshold = ds->bts_absolute_maximum -
-                               thresh * BTS_RECORD_SIZE;
-               }
+       }
 
-               if (x86_pmu.pebs) {
-                       buffer = kzalloc(PEBS_BUFFER_SIZE, GFP_KERNEL);
-                       if (unlikely(!buffer))
-                               break;
-
-                       max = PEBS_BUFFER_SIZE / x86_pmu.pebs_record_size;
-
-                       ds->pebs_buffer_base = (u64)(unsigned long)buffer;
-                       ds->pebs_index = ds->pebs_buffer_base;
-                       ds->pebs_absolute_maximum = ds->pebs_buffer_base +
-                               max * x86_pmu.pebs_record_size;
-                       /*
-                        * Always use single record PEBS
-                        */
-                       ds->pebs_interrupt_threshold = ds->pebs_buffer_base +
-                               x86_pmu.pebs_record_size;
-               }
+       if (bts_err) {
+               for_each_possible_cpu(cpu)
+                       release_bts_buffer(cpu);
+       }
 
-               err = 0;
+       if (pebs_err) {
+               for_each_possible_cpu(cpu)
+                       release_pebs_buffer(cpu);
        }
 
-       if (err)
-               release_ds_buffers();
-       else {
+       if (bts_err && pebs_err) {
+               for_each_possible_cpu(cpu)
+                       release_ds_buffer(cpu);
+       } else {
+               if (x86_pmu.bts && !bts_err)
+                       x86_pmu.bts_active = 1;
+
+               if (x86_pmu.pebs && !pebs_err)
+                       x86_pmu.pebs_active = 1;
+
                for_each_online_cpu(cpu)
                        init_debug_store_on_cpu(cpu);
        }
 
        put_online_cpus();
-
-       return err;
 }
 
 /*
@@ -233,7 +318,7 @@ static int intel_pmu_drain_bts_buffer(void)
        if (!event)
                return 0;
 
-       if (!ds)
+       if (!x86_pmu.bts_active)
                return 0;
 
        at  = (struct bts_record *)(unsigned long)ds->bts_buffer_base;
@@ -503,7 +588,7 @@ static void intel_pmu_drain_pebs_core(struct pt_regs *iregs)
        struct pebs_record_core *at, *top;
        int n;
 
-       if (!ds || !x86_pmu.pebs)
+       if (!x86_pmu.pebs_active)
                return;
 
        at  = (struct pebs_record_core *)(unsigned long)ds->pebs_buffer_base;
@@ -545,7 +630,7 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
        u64 status = 0;
        int bit, n;
 
-       if (!ds || !x86_pmu.pebs)
+       if (!x86_pmu.pebs_active)
                return;
 
        at  = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base;
@@ -630,9 +715,8 @@ static void intel_ds_init(void)
 
 #else /* CONFIG_CPU_SUP_INTEL */
 
-static int reserve_ds_buffers(void)
+static void reserve_ds_buffers(void)
 {
-       return 0;
 }
 
 static void release_ds_buffers(void)
index bd1489c..4e8baad 100644 (file)
@@ -726,6 +726,12 @@ int __init op_nmi_init(struct oprofile_operations *ops)
                case 0x11:
                        cpu_type = "x86-64/family11h";
                        break;
+               case 0x12:
+                       cpu_type = "x86-64/family12h";
+                       break;
+               case 0x14:
+                       cpu_type = "x86-64/family14h";
+                       break;
                default:
                        return -ENODEV;
                }
index 42fb46f..a011bcc 100644 (file)
@@ -48,17 +48,24 @@ static unsigned long reset_value[NUM_VIRT_COUNTERS];
 
 static u32 ibs_caps;
 
-struct op_ibs_config {
+struct ibs_config {
        unsigned long op_enabled;
        unsigned long fetch_enabled;
        unsigned long max_cnt_fetch;
        unsigned long max_cnt_op;
        unsigned long rand_en;
        unsigned long dispatched_ops;
+       unsigned long branch_target;
 };
 
-static struct op_ibs_config ibs_config;
-static u64 ibs_op_ctl;
+struct ibs_state {
+       u64             ibs_op_ctl;
+       int             branch_target;
+       unsigned long   sample_size;
+};
+
+static struct ibs_config ibs_config;
+static struct ibs_state ibs_state;
 
 /*
  * IBS cpuid feature detection
@@ -71,8 +78,16 @@ static u64 ibs_op_ctl;
  * bit 0 is used to indicate the existence of IBS.
  */
 #define IBS_CAPS_AVAIL                 (1U<<0)
+#define IBS_CAPS_FETCHSAM              (1U<<1)
+#define IBS_CAPS_OPSAM                 (1U<<2)
 #define IBS_CAPS_RDWROPCNT             (1U<<3)
 #define IBS_CAPS_OPCNT                 (1U<<4)
+#define IBS_CAPS_BRNTRGT               (1U<<5)
+#define IBS_CAPS_OPCNTEXT              (1U<<6)
+
+#define IBS_CAPS_DEFAULT               (IBS_CAPS_AVAIL         \
+                                        | IBS_CAPS_FETCHSAM    \
+                                        | IBS_CAPS_OPSAM)
 
 /*
  * IBS APIC setup
@@ -99,12 +114,12 @@ static u32 get_ibs_caps(void)
        /* check IBS cpuid feature flags */
        max_level = cpuid_eax(0x80000000);
        if (max_level < IBS_CPUID_FEATURES)
-               return IBS_CAPS_AVAIL;
+               return IBS_CAPS_DEFAULT;
 
        ibs_caps = cpuid_eax(IBS_CPUID_FEATURES);
        if (!(ibs_caps & IBS_CAPS_AVAIL))
                /* cpuid flags not valid */
-               return IBS_CAPS_AVAIL;
+               return IBS_CAPS_DEFAULT;
 
        return ibs_caps;
 }
@@ -197,8 +212,8 @@ op_amd_handle_ibs(struct pt_regs * const regs,
                rdmsrl(MSR_AMD64_IBSOPCTL, ctl);
                if (ctl & IBS_OP_VAL) {
                        rdmsrl(MSR_AMD64_IBSOPRIP, val);
-                       oprofile_write_reserve(&entry, regs, val,
-                                              IBS_OP_CODE, IBS_OP_SIZE);
+                       oprofile_write_reserve(&entry, regs, val, IBS_OP_CODE,
+                                              ibs_state.sample_size);
                        oprofile_add_data64(&entry, val);
                        rdmsrl(MSR_AMD64_IBSOPDATA, val);
                        oprofile_add_data64(&entry, val);
@@ -210,10 +225,14 @@ op_amd_handle_ibs(struct pt_regs * const regs,
                        oprofile_add_data64(&entry, val);
                        rdmsrl(MSR_AMD64_IBSDCPHYSAD, val);
                        oprofile_add_data64(&entry, val);
+                       if (ibs_state.branch_target) {
+                               rdmsrl(MSR_AMD64_IBSBRTARGET, val);
+                               oprofile_add_data(&entry, (unsigned long)val);
+                       }
                        oprofile_write_commit(&entry);
 
                        /* reenable the IRQ */
-                       ctl = op_amd_randomize_ibs_op(ibs_op_ctl);
+                       ctl = op_amd_randomize_ibs_op(ibs_state.ibs_op_ctl);
                        wrmsrl(MSR_AMD64_IBSOPCTL, ctl);
                }
        }
@@ -226,21 +245,32 @@ static inline void op_amd_start_ibs(void)
        if (!ibs_caps)
                return;
 
+       memset(&ibs_state, 0, sizeof(ibs_state));
+
+       /*
+        * Note: Since the max count settings may out of range we
+        * write back the actual used values so that userland can read
+        * it.
+        */
+
        if (ibs_config.fetch_enabled) {
-               val = (ibs_config.max_cnt_fetch >> 4) & IBS_FETCH_MAX_CNT;
+               val = ibs_config.max_cnt_fetch >> 4;
+               val = min(val, IBS_FETCH_MAX_CNT);
+               ibs_config.max_cnt_fetch = val << 4;
                val |= ibs_config.rand_en ? IBS_FETCH_RAND_EN : 0;
                val |= IBS_FETCH_ENABLE;
                wrmsrl(MSR_AMD64_IBSFETCHCTL, val);
        }
 
        if (ibs_config.op_enabled) {
-               ibs_op_ctl = ibs_config.max_cnt_op >> 4;
+               val = ibs_config.max_cnt_op >> 4;
                if (!(ibs_caps & IBS_CAPS_RDWROPCNT)) {
                        /*
                         * IbsOpCurCnt not supported.  See
                         * op_amd_randomize_ibs_op() for details.
                         */
-                       ibs_op_ctl = clamp(ibs_op_ctl, 0x0081ULL, 0xFF80ULL);
+                       val = clamp(val, 0x0081ULL, 0xFF80ULL);
+                       ibs_config.max_cnt_op = val << 4;
                } else {
                        /*
                         * The start value is randomized with a
@@ -248,13 +278,24 @@ static inline void op_amd_start_ibs(void)
                         * with the half of the randomized range. Also
                         * avoid underflows.
                         */
-                       ibs_op_ctl = min(ibs_op_ctl + IBS_RANDOM_MAXCNT_OFFSET,
-                                        IBS_OP_MAX_CNT);
+                       val += IBS_RANDOM_MAXCNT_OFFSET;
+                       if (ibs_caps & IBS_CAPS_OPCNTEXT)
+                               val = min(val, IBS_OP_MAX_CNT_EXT);
+                       else
+                               val = min(val, IBS_OP_MAX_CNT);
+                       ibs_config.max_cnt_op =
+                               (val - IBS_RANDOM_MAXCNT_OFFSET) << 4;
+               }
+               val = ((val & ~IBS_OP_MAX_CNT) << 4) | (val & IBS_OP_MAX_CNT);
+               val |= ibs_config.dispatched_ops ? IBS_OP_CNT_CTL : 0;
+               val |= IBS_OP_ENABLE;
+               ibs_state.ibs_op_ctl = val;
+               ibs_state.sample_size = IBS_OP_SIZE;
+               if (ibs_config.branch_target) {
+                       ibs_state.branch_target = 1;
+                       ibs_state.sample_size++;
                }
-               if (ibs_caps & IBS_CAPS_OPCNT && ibs_config.dispatched_ops)
-                       ibs_op_ctl |= IBS_OP_CNT_CTL;
-               ibs_op_ctl |= IBS_OP_ENABLE;
-               val = op_amd_randomize_ibs_op(ibs_op_ctl);
+               val = op_amd_randomize_ibs_op(ibs_state.ibs_op_ctl);
                wrmsrl(MSR_AMD64_IBSOPCTL, val);
        }
 }
@@ -281,29 +322,25 @@ static inline int eilvt_is_available(int offset)
 
 static inline int ibs_eilvt_valid(void)
 {
-       u64 val;
        int offset;
+       u64 val;
 
        rdmsrl(MSR_AMD64_IBSCTL, val);
+       offset = val & IBSCTL_LVT_OFFSET_MASK;
+
        if (!(val & IBSCTL_LVT_OFFSET_VALID)) {
-               pr_err(FW_BUG "cpu %d, invalid IBS "
-                      "interrupt offset %d (MSR%08X=0x%016llx)",
-                      smp_processor_id(), offset,
-                      MSR_AMD64_IBSCTL, val);
+               pr_err(FW_BUG "cpu %d, invalid IBS interrupt offset %d (MSR%08X=0x%016llx)\n",
+                      smp_processor_id(), offset, MSR_AMD64_IBSCTL, val);
                return 0;
        }
 
-       offset = val & IBSCTL_LVT_OFFSET_MASK;
-
-       if (eilvt_is_available(offset))
-               return !0;
-
-       pr_err(FW_BUG "cpu %d, IBS interrupt offset %d "
-              "not available (MSR%08X=0x%016llx)",
-              smp_processor_id(), offset,
-              MSR_AMD64_IBSCTL, val);
+       if (!eilvt_is_available(offset)) {
+               pr_err(FW_BUG "cpu %d, IBS interrupt offset %d not available (MSR%08X=0x%016llx)\n",
+                      smp_processor_id(), offset, MSR_AMD64_IBSCTL, val);
+               return 0;
+       }
 
-       return 0;
+       return 1;
 }
 
 static inline int get_ibs_offset(void)
@@ -630,28 +667,33 @@ static int setup_ibs_files(struct super_block *sb, struct dentry *root)
        /* model specific files */
 
        /* setup some reasonable defaults */
+       memset(&ibs_config, 0, sizeof(ibs_config));
        ibs_config.max_cnt_fetch = 250000;
-       ibs_config.fetch_enabled = 0;
        ibs_config.max_cnt_op = 250000;
-       ibs_config.op_enabled = 0;
-       ibs_config.dispatched_ops = 0;
-
-       dir = oprofilefs_mkdir(sb, root, "ibs_fetch");
-       oprofilefs_create_ulong(sb, dir, "enable",
-                               &ibs_config.fetch_enabled);
-       oprofilefs_create_ulong(sb, dir, "max_count",
-                               &ibs_config.max_cnt_fetch);
-       oprofilefs_create_ulong(sb, dir, "rand_enable",
-                               &ibs_config.rand_en);
-
-       dir = oprofilefs_mkdir(sb, root, "ibs_op");
-       oprofilefs_create_ulong(sb, dir, "enable",
-                               &ibs_config.op_enabled);
-       oprofilefs_create_ulong(sb, dir, "max_count",
-                               &ibs_config.max_cnt_op);
-       if (ibs_caps & IBS_CAPS_OPCNT)
-               oprofilefs_create_ulong(sb, dir, "dispatched_ops",
-                                       &ibs_config.dispatched_ops);
+
+       if (ibs_caps & IBS_CAPS_FETCHSAM) {
+               dir = oprofilefs_mkdir(sb, root, "ibs_fetch");
+               oprofilefs_create_ulong(sb, dir, "enable",
+                                       &ibs_config.fetch_enabled);
+               oprofilefs_create_ulong(sb, dir, "max_count",
+                                       &ibs_config.max_cnt_fetch);
+               oprofilefs_create_ulong(sb, dir, "rand_enable",
+                                       &ibs_config.rand_en);
+       }
+
+       if (ibs_caps & IBS_CAPS_OPSAM) {
+               dir = oprofilefs_mkdir(sb, root, "ibs_op");
+               oprofilefs_create_ulong(sb, dir, "enable",
+                                       &ibs_config.op_enabled);
+               oprofilefs_create_ulong(sb, dir, "max_count",
+                                       &ibs_config.max_cnt_op);
+               if (ibs_caps & IBS_CAPS_OPCNT)
+                       oprofilefs_create_ulong(sb, dir, "dispatched_ops",
+                                               &ibs_config.dispatched_ops);
+               if (ibs_caps & IBS_CAPS_BRNTRGT)
+                       oprofilefs_create_ulong(sb, dir, "branch_target",
+                                               &ibs_config.branch_target);
+       }
 
        return 0;
 }
index 01b2816..79d0c4f 100644 (file)
@@ -410,7 +410,7 @@ extern void open_softirq(int nr, void (*action)(struct softirq_action *));
 extern void softirq_init(void);
 static inline void __raise_softirq_irqoff(unsigned int nr)
 {
-       trace_softirq_raise((struct softirq_action *)(unsigned long)nr, NULL);
+       trace_softirq_raise(nr);
        or_softirq_pending(1UL << nr);
 }
 
index 25b4f68..8d3a248 100644 (file)
@@ -62,18 +62,6 @@ enum ring_buffer_type {
 unsigned ring_buffer_event_length(struct ring_buffer_event *event);
 void *ring_buffer_event_data(struct ring_buffer_event *event);
 
-/**
- * ring_buffer_event_time_delta - return the delta timestamp of the event
- * @event: the event to get the delta timestamp of
- *
- * The delta timestamp is the 27 bit timestamp since the last event.
- */
-static inline unsigned
-ring_buffer_event_time_delta(struct ring_buffer_event *event)
-{
-       return event->time_delta;
-}
-
 /*
  * ring_buffer_discard_commit will remove an event that has not
  *   ben committed yet. If this is used, then ring_buffer_unlock_commit
index 6fa7cba..1c09820 100644 (file)
@@ -86,76 +86,62 @@ TRACE_EVENT(irq_handler_exit,
 
 DECLARE_EVENT_CLASS(softirq,
 
-       TP_PROTO(struct softirq_action *h, struct softirq_action *vec),
+       TP_PROTO(unsigned int vec_nr),
 
-       TP_ARGS(h, vec),
+       TP_ARGS(vec_nr),
 
        TP_STRUCT__entry(
-               __field(        int,    vec                     )
+               __field(        unsigned int,   vec     )
        ),
 
        TP_fast_assign(
-               if (vec)
-                       __entry->vec = (int)(h - vec);
-               else
-                       __entry->vec = (int)(long)h;
+               __entry->vec = vec_nr;
        ),
 
-       TP_printk("vec=%d [action=%s]", __entry->vec,
+       TP_printk("vec=%u [action=%s]", __entry->vec,
                  show_softirq_name(__entry->vec))
 );
 
 /**
  * softirq_entry - called immediately before the softirq handler
- * @h: pointer to struct softirq_action
- * @vec: pointer to first struct softirq_action in softirq_vec array
+ * @vec_nr:  softirq vector number
  *
- * The @h parameter, contains a pointer to the struct softirq_action
- * which has a pointer to the action handler that is called. By subtracting
- * the @vec pointer from the @h pointer, we can determine the softirq
- * number. Also, when used in combination with the softirq_exit tracepoint
- * we can determine the softirq latency.
+ * When used in combination with the softirq_exit tracepoint
+ * we can determine the softirq handler runtine.
  */
 DEFINE_EVENT(softirq, softirq_entry,
 
-       TP_PROTO(struct softirq_action *h, struct softirq_action *vec),
+       TP_PROTO(unsigned int vec_nr),
 
-       TP_ARGS(h, vec)
+       TP_ARGS(vec_nr)
 );
 
 /**
  * softirq_exit - called immediately after the softirq handler returns
- * @h: pointer to struct softirq_action
- * @vec: pointer to first struct softirq_action in softirq_vec array
+ * @vec_nr:  softirq vector number
  *
- * The @h parameter contains a pointer to the struct softirq_action
- * that has handled the softirq. By subtracting the @vec pointer from
- * the @h pointer, we can determine the softirq number. Also, when used in
- * combination with the softirq_entry tracepoint we can determine the softirq
- * latency.
+ * When used in combination with the softirq_entry tracepoint
+ * we can determine the softirq handler runtine.
  */
 DEFINE_EVENT(softirq, softirq_exit,
 
-       TP_PROTO(struct softirq_action *h, struct softirq_action *vec),
+       TP_PROTO(unsigned int vec_nr),
 
-       TP_ARGS(h, vec)
+       TP_ARGS(vec_nr)
 );
 
 /**
  * softirq_raise - called immediately when a softirq is raised
- * @h: pointer to struct softirq_action
- * @vec: pointer to first struct softirq_action in softirq_vec array
+ * @vec_nr:  softirq vector number
  *
- * The @h parameter contains a pointer to the softirq vector number which is
- * raised. @vec is NULL and it means @h includes vector number not
- * softirq_action. When used in combination with the softirq_entry tracepoint
- * we can determine the softirq raise latency.
+ * When used in combination with the softirq_entry tracepoint
+ * we can determine the softirq raise to run latency.
  */
 DEFINE_EVENT(softirq, softirq_raise,
 
-       TP_PROTO(struct softirq_action *h, struct softirq_action *vec),
+       TP_PROTO(unsigned int vec_nr),
 
-       TP_ARGS(h, vec)
+       TP_ARGS(vec_nr)
 );
 
 #endif /*  _TRACE_IRQ_H */
index 56a8919..99865c3 100644 (file)
@@ -74,7 +74,8 @@ static struct hlist_head kretprobe_inst_table[KPROBE_TABLE_SIZE];
 /* NOTE: change this value only with kprobe_mutex held */
 static bool kprobes_all_disarmed;
 
-static DEFINE_MUTEX(kprobe_mutex);     /* Protects kprobe_table */
+/* This protects kprobe_table and optimizing_list */
+static DEFINE_MUTEX(kprobe_mutex);
 static DEFINE_PER_CPU(struct kprobe *, kprobe_instance) = NULL;
 static struct {
        spinlock_t lock ____cacheline_aligned_in_smp;
@@ -595,6 +596,7 @@ static __kprobes void try_to_optimize_kprobe(struct kprobe *p)
 }
 
 #ifdef CONFIG_SYSCTL
+/* This should be called with kprobe_mutex locked */
 static void __kprobes optimize_all_kprobes(void)
 {
        struct hlist_head *head;
@@ -607,17 +609,16 @@ static void __kprobes optimize_all_kprobes(void)
                return;
 
        kprobes_allow_optimization = true;
-       mutex_lock(&text_mutex);
        for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
                head = &kprobe_table[i];
                hlist_for_each_entry_rcu(p, node, head, hlist)
                        if (!kprobe_disabled(p))
                                optimize_kprobe(p);
        }
-       mutex_unlock(&text_mutex);
        printk(KERN_INFO "Kprobes globally optimized\n");
 }
 
+/* This should be called with kprobe_mutex locked */
 static void __kprobes unoptimize_all_kprobes(void)
 {
        struct hlist_head *head;
index f309e80..517d827 100644 (file)
@@ -417,8 +417,8 @@ event_filter_match(struct perf_event *event)
        return event->cpu == -1 || event->cpu == smp_processor_id();
 }
 
-static int
-__event_sched_out(struct perf_event *event,
+static void
+event_sched_out(struct perf_event *event,
                  struct perf_cpu_context *cpuctx,
                  struct perf_event_context *ctx)
 {
@@ -437,13 +437,14 @@ __event_sched_out(struct perf_event *event,
        }
 
        if (event->state != PERF_EVENT_STATE_ACTIVE)
-               return 0;
+               return;
 
        event->state = PERF_EVENT_STATE_INACTIVE;
        if (event->pending_disable) {
                event->pending_disable = 0;
                event->state = PERF_EVENT_STATE_OFF;
        }
+       event->tstamp_stopped = ctx->time;
        event->pmu->del(event, 0);
        event->oncpu = -1;
 
@@ -452,19 +453,6 @@ __event_sched_out(struct perf_event *event,
        ctx->nr_active--;
        if (event->attr.exclusive || !cpuctx->active_oncpu)
                cpuctx->exclusive = 0;
-       return 1;
-}
-
-static void
-event_sched_out(struct perf_event *event,
-                 struct perf_cpu_context *cpuctx,
-                 struct perf_event_context *ctx)
-{
-       int ret;
-
-       ret = __event_sched_out(event, cpuctx, ctx);
-       if (ret)
-               event->tstamp_stopped = ctx->time;
 }
 
 static void
@@ -664,7 +652,7 @@ retry:
 }
 
 static int
-__event_sched_in(struct perf_event *event,
+event_sched_in(struct perf_event *event,
                 struct perf_cpu_context *cpuctx,
                 struct perf_event_context *ctx)
 {
@@ -684,6 +672,8 @@ __event_sched_in(struct perf_event *event,
                return -EAGAIN;
        }
 
+       event->tstamp_running += ctx->time - event->tstamp_stopped;
+
        if (!is_software_event(event))
                cpuctx->active_oncpu++;
        ctx->nr_active++;
@@ -694,35 +684,6 @@ __event_sched_in(struct perf_event *event,
        return 0;
 }
 
-static inline int
-event_sched_in(struct perf_event *event,
-                struct perf_cpu_context *cpuctx,
-                struct perf_event_context *ctx)
-{
-       int ret = __event_sched_in(event, cpuctx, ctx);
-       if (ret)
-               return ret;
-       event->tstamp_running += ctx->time - event->tstamp_stopped;
-       return 0;
-}
-
-static void
-group_commit_event_sched_in(struct perf_event *group_event,
-              struct perf_cpu_context *cpuctx,
-              struct perf_event_context *ctx)
-{
-       struct perf_event *event;
-       u64 now = ctx->time;
-
-       group_event->tstamp_running += now - group_event->tstamp_stopped;
-       /*
-        * Schedule in siblings as one group (if any):
-        */
-       list_for_each_entry(event, &group_event->sibling_list, group_entry) {
-               event->tstamp_running += now - event->tstamp_stopped;
-       }
-}
-
 static int
 group_sched_in(struct perf_event *group_event,
               struct perf_cpu_context *cpuctx,
@@ -730,19 +691,15 @@ group_sched_in(struct perf_event *group_event,
 {
        struct perf_event *event, *partial_group = NULL;
        struct pmu *pmu = group_event->pmu;
+       u64 now = ctx->time;
+       bool simulate = false;
 
        if (group_event->state == PERF_EVENT_STATE_OFF)
                return 0;
 
        pmu->start_txn(pmu);
 
-       /*
-        * use __event_sched_in() to delay updating tstamp_running
-        * until the transaction is committed. In case of failure
-        * we will keep an unmodified tstamp_running which is a
-        * requirement to get correct timing information
-        */
-       if (__event_sched_in(group_event, cpuctx, ctx)) {
+       if (event_sched_in(group_event, cpuctx, ctx)) {
                pmu->cancel_txn(pmu);
                return -EAGAIN;
        }
@@ -751,31 +708,42 @@ group_sched_in(struct perf_event *group_event,
         * Schedule in siblings as one group (if any):
         */
        list_for_each_entry(event, &group_event->sibling_list, group_entry) {
-               if (__event_sched_in(event, cpuctx, ctx)) {
+               if (event_sched_in(event, cpuctx, ctx)) {
                        partial_group = event;
                        goto group_error;
                }
        }
 
-       if (!pmu->commit_txn(pmu)) {
-               /* commit tstamp_running */
-               group_commit_event_sched_in(group_event, cpuctx, ctx);
+       if (!pmu->commit_txn(pmu))
                return 0;
-       }
+
 group_error:
        /*
         * Groups can be scheduled in as one unit only, so undo any
         * partial group before returning:
+        * The events up to the failed event are scheduled out normally,
+        * tstamp_stopped will be updated.
         *
-        * use __event_sched_out() to avoid updating tstamp_stopped
-        * because the event never actually ran
+        * The failed events and the remaining siblings need to have
+        * their timings updated as if they had gone thru event_sched_in()
+        * and event_sched_out(). This is required to get consistent timings
+        * across the group. This also takes care of the case where the group
+        * could never be scheduled by ensuring tstamp_stopped is set to mark
+        * the time the event was actually stopped, such that time delta
+        * calculation in update_event_times() is correct.
         */
        list_for_each_entry(event, &group_event->sibling_list, group_entry) {
                if (event == partial_group)
-                       break;
-               __event_sched_out(event, cpuctx, ctx);
+                       simulate = true;
+
+               if (simulate) {
+                       event->tstamp_running += now - event->tstamp_stopped;
+                       event->tstamp_stopped = now;
+               } else {
+                       event_sched_out(event, cpuctx, ctx);
+               }
        }
-       __event_sched_out(group_event, cpuctx, ctx);
+       event_sched_out(group_event, cpuctx, ctx);
 
        pmu->cancel_txn(pmu);
 
index f02a9df..18f4be0 100644 (file)
@@ -229,18 +229,20 @@ restart:
 
        do {
                if (pending & 1) {
+                       unsigned int vec_nr = h - softirq_vec;
                        int prev_count = preempt_count();
-                       kstat_incr_softirqs_this_cpu(h - softirq_vec);
 
-                       trace_softirq_entry(h, softirq_vec);
+                       kstat_incr_softirqs_this_cpu(vec_nr);
+
+                       trace_softirq_entry(vec_nr);
                        h->action(h);
-                       trace_softirq_exit(h, softirq_vec);
+                       trace_softirq_exit(vec_nr);
                        if (unlikely(prev_count != preempt_count())) {
-                               printk(KERN_ERR "huh, entered softirq %td %s %p"
+                               printk(KERN_ERR "huh, entered softirq %u %s %p"
                                       "with preempt_count %08x,"
-                                      " exited with %08x?\n", h - softirq_vec,
-                                      softirq_to_name[h - softirq_vec],
-                                      h->action, prev_count, preempt_count());
+                                      " exited with %08x?\n", vec_nr,
+                                      softirq_to_name[vec_nr], h->action,
+                                      prev_count, preempt_count());
                                preempt_count() = prev_count;
                        }
 
index c3dab05..9ed509a 100644 (file)
@@ -224,6 +224,9 @@ enum {
        RB_LEN_TIME_STAMP = 16,
 };
 
+#define skip_time_extend(event) \
+       ((struct ring_buffer_event *)((char *)event + RB_LEN_TIME_EXTEND))
+
 static inline int rb_null_event(struct ring_buffer_event *event)
 {
        return event->type_len == RINGBUF_TYPE_PADDING && !event->time_delta;
@@ -248,8 +251,12 @@ rb_event_data_length(struct ring_buffer_event *event)
        return length + RB_EVNT_HDR_SIZE;
 }
 
-/* inline for ring buffer fast paths */
-static unsigned
+/*
+ * Return the length of the given event. Will return
+ * the length of the time extend if the event is a
+ * time extend.
+ */
+static inline unsigned
 rb_event_length(struct ring_buffer_event *event)
 {
        switch (event->type_len) {
@@ -274,13 +281,41 @@ rb_event_length(struct ring_buffer_event *event)
        return 0;
 }
 
+/*
+ * Return total length of time extend and data,
+ *   or just the event length for all other events.
+ */
+static inline unsigned
+rb_event_ts_length(struct ring_buffer_event *event)
+{
+       unsigned len = 0;
+
+       if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) {
+               /* time extends include the data event after it */
+               len = RB_LEN_TIME_EXTEND;
+               event = skip_time_extend(event);
+       }
+       return len + rb_event_length(event);
+}
+
 /**
  * ring_buffer_event_length - return the length of the event
  * @event: the event to get the length of
+ *
+ * Returns the size of the data load of a data event.
+ * If the event is something other than a data event, it
+ * returns the size of the event itself. With the exception
+ * of a TIME EXTEND, where it still returns the size of the
+ * data load of the data event after it.
  */
 unsigned ring_buffer_event_length(struct ring_buffer_event *event)
 {
-       unsigned length = rb_event_length(event);
+       unsigned length;
+
+       if (event->type_len == RINGBUF_TYPE_TIME_EXTEND)
+               event = skip_time_extend(event);
+
+       length = rb_event_length(event);
        if (event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX)
                return length;
        length -= RB_EVNT_HDR_SIZE;
@@ -294,6 +329,8 @@ EXPORT_SYMBOL_GPL(ring_buffer_event_length);
 static void *
 rb_event_data(struct ring_buffer_event *event)
 {
+       if (event->type_len == RINGBUF_TYPE_TIME_EXTEND)
+               event = skip_time_extend(event);
        BUG_ON(event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX);
        /* If length is in len field, then array[0] has the data */
        if (event->type_len)
@@ -404,9 +441,6 @@ static inline int test_time_stamp(u64 delta)
 /* Max payload is BUF_PAGE_SIZE - header (8bytes) */
 #define BUF_MAX_DATA_SIZE (BUF_PAGE_SIZE - (sizeof(u32) * 2))
 
-/* Max number of timestamps that can fit on a page */
-#define RB_TIMESTAMPS_PER_PAGE (BUF_PAGE_SIZE / RB_LEN_TIME_EXTEND)
-
 int ring_buffer_print_page_header(struct trace_seq *s)
 {
        struct buffer_data_page field;
@@ -1546,6 +1580,25 @@ static void rb_inc_iter(struct ring_buffer_iter *iter)
        iter->head = 0;
 }
 
+/* Slow path, do not inline */
+static noinline struct ring_buffer_event *
+rb_add_time_stamp(struct ring_buffer_event *event, u64 delta)
+{
+       event->type_len = RINGBUF_TYPE_TIME_EXTEND;
+
+       /* Not the first event on the page? */
+       if (rb_event_index(event)) {
+               event->time_delta = delta & TS_MASK;
+               event->array[0] = delta >> TS_SHIFT;
+       } else {
+               /* nope, just zero it */
+               event->time_delta = 0;
+               event->array[0] = 0;
+       }
+
+       return skip_time_extend(event);
+}
+
 /**
  * ring_buffer_update_event - update event type and data
  * @event: the even to update
@@ -1558,28 +1611,31 @@ static void rb_inc_iter(struct ring_buffer_iter *iter)
  * data field.
  */
 static void
-rb_update_event(struct ring_buffer_event *event,
-                        unsigned type, unsigned length)
+rb_update_event(struct ring_buffer_per_cpu *cpu_buffer,
+               struct ring_buffer_event *event, unsigned length,
+               int add_timestamp, u64 delta)
 {
-       event->type_len = type;
-
-       switch (type) {
-
-       case RINGBUF_TYPE_PADDING:
-       case RINGBUF_TYPE_TIME_EXTEND:
-       case RINGBUF_TYPE_TIME_STAMP:
-               break;
+       /* Only a commit updates the timestamp */
+       if (unlikely(!rb_event_is_commit(cpu_buffer, event)))
+               delta = 0;
 
-       case 0:
-               length -= RB_EVNT_HDR_SIZE;
-               if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT)
-                       event->array[0] = length;
-               else
-                       event->type_len = DIV_ROUND_UP(length, RB_ALIGNMENT);
-               break;
-       default:
-               BUG();
+       /*
+        * If we need to add a timestamp, then we
+        * add it to the start of the resevered space.
+        */
+       if (unlikely(add_timestamp)) {
+               event = rb_add_time_stamp(event, delta);
+               length -= RB_LEN_TIME_EXTEND;
+               delta = 0;
        }
+
+       event->time_delta = delta;
+       length -= RB_EVNT_HDR_SIZE;
+       if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT) {
+               event->type_len = 0;
+               event->array[0] = length;
+       } else
+               event->type_len = DIV_ROUND_UP(length, RB_ALIGNMENT);
 }
 
 /*
@@ -1823,10 +1879,13 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer,
        local_sub(length, &tail_page->write);
 }
 
-static struct ring_buffer_event *
+/*
+ * This is the slow path, force gcc not to inline it.
+ */
+static noinline struct ring_buffer_event *
 rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
             unsigned long length, unsigned long tail,
-            struct buffer_page *tail_page, u64 *ts)
+            struct buffer_page *tail_page, u64 ts)
 {
        struct buffer_page *commit_page = cpu_buffer->commit_page;
        struct ring_buffer *buffer = cpu_buffer->buffer;
@@ -1909,8 +1968,8 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
                 * Nested commits always have zero deltas, so
                 * just reread the time stamp
                 */
-               *ts = rb_time_stamp(buffer);
-               next_page->page->time_stamp = *ts;
+               ts = rb_time_stamp(buffer);
+               next_page->page->time_stamp = ts;
        }
 
  out_again:
@@ -1929,12 +1988,21 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
 
 static struct ring_buffer_event *
 __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
-                 unsigned type, unsigned long length, u64 *ts)
+                 unsigned long length, u64 ts,
+                 u64 delta, int add_timestamp)
 {
        struct buffer_page *tail_page;
        struct ring_buffer_event *event;
        unsigned long tail, write;
 
+       /*
+        * If the time delta since the last event is too big to
+        * hold in the time field of the event, then we append a
+        * TIME EXTEND event ahead of the data event.
+        */
+       if (unlikely(add_timestamp))
+               length += RB_LEN_TIME_EXTEND;
+
        tail_page = cpu_buffer->tail_page;
        write = local_add_return(length, &tail_page->write);
 
@@ -1943,7 +2011,7 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
        tail = write - length;
 
        /* See if we shot pass the end of this buffer page */
-       if (write > BUF_PAGE_SIZE)
+       if (unlikely(write > BUF_PAGE_SIZE))
                return rb_move_tail(cpu_buffer, length, tail,
                                    tail_page, ts);
 
@@ -1951,18 +2019,16 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
 
        event = __rb_page_index(tail_page, tail);
        kmemcheck_annotate_bitfield(event, bitfield);
-       rb_update_event(event, type, length);
+       rb_update_event(cpu_buffer, event, length, add_timestamp, delta);
 
-       /* The passed in type is zero for DATA */
-       if (likely(!type))
-               local_inc(&tail_page->entries);
+       local_inc(&tail_page->entries);
 
        /*
         * If this is the first commit on the page, then update
         * its timestamp.
         */
        if (!tail)
-               tail_page->page->time_stamp = *ts;
+               tail_page->page->time_stamp = ts;
 
        return event;
 }
@@ -1977,7 +2043,7 @@ rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer,
        unsigned long addr;
 
        new_index = rb_event_index(event);
-       old_index = new_index + rb_event_length(event);
+       old_index = new_index + rb_event_ts_length(event);
        addr = (unsigned long)event;
        addr &= PAGE_MASK;
 
@@ -2003,76 +2069,13 @@ rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer,
        return 0;
 }
 
-static int
-rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer,
-                 u64 *ts, u64 *delta)
-{
-       struct ring_buffer_event *event;
-       int ret;
-
-       WARN_ONCE(*delta > (1ULL << 59),
-                 KERN_WARNING "Delta way too big! %llu ts=%llu write stamp = %llu\n",
-                 (unsigned long long)*delta,
-                 (unsigned long long)*ts,
-                 (unsigned long long)cpu_buffer->write_stamp);
-
-       /*
-        * The delta is too big, we to add a
-        * new timestamp.
-        */
-       event = __rb_reserve_next(cpu_buffer,
-                                 RINGBUF_TYPE_TIME_EXTEND,
-                                 RB_LEN_TIME_EXTEND,
-                                 ts);
-       if (!event)
-               return -EBUSY;
-
-       if (PTR_ERR(event) == -EAGAIN)
-               return -EAGAIN;
-
-       /* Only a commited time event can update the write stamp */
-       if (rb_event_is_commit(cpu_buffer, event)) {
-               /*
-                * If this is the first on the page, then it was
-                * updated with the page itself. Try to discard it
-                * and if we can't just make it zero.
-                */
-               if (rb_event_index(event)) {
-                       event->time_delta = *delta & TS_MASK;
-                       event->array[0] = *delta >> TS_SHIFT;
-               } else {
-                       /* try to discard, since we do not need this */
-                       if (!rb_try_to_discard(cpu_buffer, event)) {
-                               /* nope, just zero it */
-                               event->time_delta = 0;
-                               event->array[0] = 0;
-                       }
-               }
-               cpu_buffer->write_stamp = *ts;
-               /* let the caller know this was the commit */
-               ret = 1;
-       } else {
-               /* Try to discard the event */
-               if (!rb_try_to_discard(cpu_buffer, event)) {
-                       /* Darn, this is just wasted space */
-                       event->time_delta = 0;
-                       event->array[0] = 0;
-               }
-               ret = 0;
-       }
-
-       *delta = 0;
-
-       return ret;
-}
-
 static void rb_start_commit(struct ring_buffer_per_cpu *cpu_buffer)
 {
        local_inc(&cpu_buffer->committing);
        local_inc(&cpu_buffer->commits);
 }
 
-static void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer)
+static inline void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer)
 {
        unsigned long commits;
 
@@ -2110,9 +2113,10 @@ rb_reserve_next_event(struct ring_buffer *buffer,
                      unsigned long length)
 {
        struct ring_buffer_event *event;
-       u64 ts, delta = 0;
-       int commit = 0;
+       u64 ts, delta;
        int nr_loops = 0;
+       int add_timestamp;
+       u64 diff;
 
        rb_start_commit(cpu_buffer);
 
@@ -2133,6 +2137,9 @@ rb_reserve_next_event(struct ring_buffer *buffer,
 
        length = rb_calculate_event_length(length);
  again:
+       add_timestamp = 0;
+       delta = 0;
+
        /*
         * We allow for interrupts to reenter here and do a trace.
         * If one does, it will cause this original code to loop
@@ -2146,56 +2153,32 @@ rb_reserve_next_event(struct ring_buffer *buffer,
                goto out_fail;
 
        ts = rb_time_stamp(cpu_buffer->buffer);
+       diff = ts - cpu_buffer->write_stamp;
 
-       /*
-        * Only the first commit can update the timestamp.
-        * Yes there is a race here. If an interrupt comes in
-        * just after the conditional and it traces too, then it
-        * will also check the deltas. More than one timestamp may
-        * also be made. But only the entry that did the actual
-        * commit will be something other than zero.
-        */
-       if (likely(cpu_buffer->tail_page == cpu_buffer->commit_page &&
-                  rb_page_write(cpu_buffer->tail_page) ==
-                  rb_commit_index(cpu_buffer))) {
-               u64 diff;
-
-               diff = ts - cpu_buffer->write_stamp;
-
-               /* make sure this diff is calculated here */
-               barrier();
-
-               /* Did the write stamp get updated already? */
-               if (unlikely(ts < cpu_buffer->write_stamp))
-                       goto get_event;
+       /* make sure this diff is calculated here */
+       barrier();
 
+       /* Did the write stamp get updated already? */
+       if (likely(ts >= cpu_buffer->write_stamp)) {
                delta = diff;
                if (unlikely(test_time_stamp(delta))) {
-
-                       commit = rb_add_time_stamp(cpu_buffer, &ts, &delta);
-                       if (commit == -EBUSY)
-                               goto out_fail;
-
-                       if (commit == -EAGAIN)
-                               goto again;
-
-                       RB_WARN_ON(cpu_buffer, commit < 0);
+                       WARN_ONCE(delta > (1ULL << 59),
+                                 KERN_WARNING "Delta way too big! %llu ts=%llu write stamp = %llu\n",
+                                 (unsigned long long)delta,
+                                 (unsigned long long)ts,
+                                 (unsigned long long)cpu_buffer->write_stamp);
+                       add_timestamp = 1;
                }
        }
 
- get_event:
-       event = __rb_reserve_next(cpu_buffer, 0, length, &ts);
+       event = __rb_reserve_next(cpu_buffer, length, ts,
+                                 delta, add_timestamp);
        if (unlikely(PTR_ERR(event) == -EAGAIN))
                goto again;
 
        if (!event)
                goto out_fail;
 
-       if (!rb_event_is_commit(cpu_buffer, event))
-               delta = 0;
-
-       event->time_delta = delta;
-
        return event;
 
  out_fail:
@@ -2207,13 +2190,9 @@ rb_reserve_next_event(struct ring_buffer *buffer,
 
 #define TRACE_RECURSIVE_DEPTH 16
 
-static int trace_recursive_lock(void)
+/* Keep this code out of the fast path cache */
+static noinline void trace_recursive_fail(void)
 {
-       current->trace_recursion++;
-
-       if (likely(current->trace_recursion < TRACE_RECURSIVE_DEPTH))
-               return 0;
-
        /* Disable all tracing before we do anything else */
        tracing_off_permanent();
 
@@ -2225,10 +2204,21 @@ static int trace_recursive_lock(void)
                    in_nmi());
 
        WARN_ON_ONCE(1);
+}
+
+static inline int trace_recursive_lock(void)
+{
+       current->trace_recursion++;
+
+       if (likely(current->trace_recursion < TRACE_RECURSIVE_DEPTH))
+               return 0;
+
+       trace_recursive_fail();
+
        return -1;
 }
 
-static void trace_recursive_unlock(void)
+static inline void trace_recursive_unlock(void)
 {
        WARN_ON_ONCE(!current->trace_recursion);
 
@@ -2308,12 +2298,28 @@ static void
 rb_update_write_stamp(struct ring_buffer_per_cpu *cpu_buffer,
                      struct ring_buffer_event *event)
 {
+       u64 delta;
+
        /*
         * The event first in the commit queue updates the
         * time stamp.
         */
-       if (rb_event_is_commit(cpu_buffer, event))
-               cpu_buffer->write_stamp += event->time_delta;
+       if (rb_event_is_commit(cpu_buffer, event)) {
+               /*
+                * A commit event that is first on a page
+                * updates the write timestamp with the page stamp
+                */
+               if (!rb_event_index(event))
+                       cpu_buffer->write_stamp =
+                               cpu_buffer->commit_page->page->time_stamp;
+               else if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) {
+                       delta = event->array[0];
+                       delta <<= TS_SHIFT;
+                       delta += event->time_delta;
+                       cpu_buffer->write_stamp += delta;
+               } else
+                       cpu_buffer->write_stamp += event->time_delta;
+       }
 }
 
 static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer,
@@ -2353,6 +2359,9 @@ EXPORT_SYMBOL_GPL(ring_buffer_unlock_commit);
 
 static inline void rb_event_discard(struct ring_buffer_event *event)
 {
+       if (event->type_len == RINGBUF_TYPE_TIME_EXTEND)
+               event = skip_time_extend(event);
+
        /* array[0] holds the actual length for the discarded event */
        event->array[0] = rb_event_data_length(event) - RB_EVNT_HDR_SIZE;
        event->type_len = RINGBUF_TYPE_PADDING;
@@ -3049,12 +3058,12 @@ rb_buffer_peek(struct ring_buffer_per_cpu *cpu_buffer, u64 *ts,
 
  again:
        /*
-        * We repeat when a timestamp is encountered. It is possible
-        * to get multiple timestamps from an interrupt entering just
-        * as one timestamp is about to be written, or from discarded
-        * commits. The most that we can have is the number on a single page.
+        * We repeat when a time extend is encountered.
+        * Since the time extend is always attached to a data event,
+        * we should never loop more than once.
+        * (We never hit the following condition more than twice).
         */
-       if (RB_WARN_ON(cpu_buffer, ++nr_loops > RB_TIMESTAMPS_PER_PAGE))
+       if (RB_WARN_ON(cpu_buffer, ++nr_loops > 2))
                return NULL;
 
        reader = rb_get_reader_page(cpu_buffer);
@@ -3130,14 +3139,12 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
                return NULL;
 
        /*
-        * We repeat when a timestamp is encountered.
-        * We can get multiple timestamps by nested interrupts or also
-        * if filtering is on (discarding commits). Since discarding
-        * commits can be frequent we can get a lot of timestamps.
-        * But we limit them by not adding timestamps if they begin
-        * at the start of a page.
+        * We repeat when a time extend is encountered.
+        * Since the time extend is always attached to a data event,
+        * we should never loop more than once.
+        * (We never hit the following condition more than twice).
         */
-       if (RB_WARN_ON(cpu_buffer, ++nr_loops > RB_TIMESTAMPS_PER_PAGE))
+       if (RB_WARN_ON(cpu_buffer, ++nr_loops > 2))
                return NULL;
 
        if (rb_per_cpu_empty(cpu_buffer))
@@ -3835,7 +3842,8 @@ int ring_buffer_read_page(struct ring_buffer *buffer,
                if (len > (commit - read))
                        len = (commit - read);
 
-               size = rb_event_length(event);
+               /* Always keep the time extend and data together */
+               size = rb_event_ts_length(event);
 
                if (len < size)
                        goto out_unlock;
@@ -3857,7 +3865,8 @@ int ring_buffer_read_page(struct ring_buffer *buffer,
                                break;
 
                        event = rb_reader_event(cpu_buffer);
-                       size = rb_event_length(event);
+                       /* Always keep the time extend and data together */
+                       size = rb_event_ts_length(event);
                } while (len > size);
 
                /* update bpage */
index 001bcd2..82d9b81 100644 (file)
@@ -3996,13 +3996,9 @@ static void tracing_init_debugfs_percpu(long cpu)
 {
        struct dentry *d_percpu = tracing_dentry_percpu();
        struct dentry *d_cpu;
-       /* strlen(cpu) + MAX(log10(cpu)) + '\0' */
-       char cpu_dir[7];
+       char cpu_dir[30]; /* 30 characters should be more than enough */
 
-       if (cpu > 999 || cpu < 0)
-               return;
-
-       sprintf(cpu_dir, "cpu%ld", cpu);
+       snprintf(cpu_dir, 30, "cpu%ld", cpu);
        d_cpu = debugfs_create_dir(cpu_dir, d_percpu);
        if (!d_cpu) {
                pr_warning("Could not create debugfs '%s' entry\n", cpu_dir);
index 43e3dd2..399751b 100644 (file)
@@ -15,6 +15,23 @@ DESCRIPTION
 This command displays the symbolic event types which can be selected in the
 various perf commands with the -e option.
 
+EVENT MODIFIERS
+---------------
+
+Events can optionally have a modifer by appending a colon and one or
+more modifiers.  Modifiers allow the user to restrict when events are
+counted with 'u' for user-space, 'k' for kernel, 'h' for hypervisor.
+
+The 'p' modifier can be used for specifying how precise the instruction
+address should be. The 'p' modifier is currently only implemented for
+Intel PEBS and can be specified multiple times:
+  0 - SAMPLE_IP can have arbitrary skid
+  1 - SAMPLE_IP must have constant skid
+  2 - SAMPLE_IP requested to have 0 skid
+  3 - SAMPLE_IP must have 0 skid
+
+The PEBS implementation now supports up to 2.
+
 RAW HARDWARE EVENT DESCRIPTOR
 -----------------------------
 Even when an event is not available in a symbolic form within perf right now,
index 27d52da..62de1b7 100644 (file)
@@ -16,7 +16,9 @@ or
 or
 'perf probe' --list
 or
-'perf probe' --line='FUNC[:RLN[+NUM|:RLN2]]|SRC:ALN[+NUM|:ALN2]'
+'perf probe' [options] --line='FUNC[:RLN[+NUM|:RLN2]]|SRC:ALN[+NUM|:ALN2]'
+or
+'perf probe' [options] --vars='PROBEPOINT'
 
 DESCRIPTION
 -----------
@@ -31,6 +33,11 @@ OPTIONS
 --vmlinux=PATH::
        Specify vmlinux path which has debuginfo (Dwarf binary).
 
+-m::
+--module=MODNAME::
+       Specify module name in which perf-probe searches probe points
+       or lines.
+
 -s::
 --source=PATH::
        Specify path to kernel source.
@@ -57,6 +64,15 @@ OPTIONS
        Show source code lines which can be probed. This needs an argument
        which specifies a range of the source code. (see LINE SYNTAX for detail)
 
+-V::
+--vars=::
+       Show available local variables at given probe point. The argument
+       syntax is same as PROBE SYNTAX, but NO ARGs.
+
+--externs::
+       (Only for --vars) Show external defined variables in addition to local
+       variables.
+
 -f::
 --force::
        Forcibly add events with existing name.
index 3ee27dc..a91f9f9 100644 (file)
@@ -83,6 +83,10 @@ OPTIONS
 --call-graph::
        Do call-graph (stack chain/backtrace) recording.
 
+-q::
+--quiet::
+       Don't print any message, useful for scripting.
+
 -v::
 --verbose::
        Be more verbose (show counter open errors, etc).
index 199d5e1..2e000c0 100644 (file)
@@ -50,14 +50,17 @@ static struct {
        bool list_events;
        bool force_add;
        bool show_lines;
+       bool show_vars;
+       bool show_ext_vars;
+       bool mod_events;
        int nevents;
        struct perf_probe_event events[MAX_PROBES];
        struct strlist *dellist;
        struct line_range line_range;
+       const char *target_module;
        int max_probe_points;
 } params;
 
-
 /* Parse an event definition. Note that any error must die. */
 static int parse_probe_event(const char *str)
 {
@@ -92,6 +95,7 @@ static int parse_probe_event_argv(int argc, const char **argv)
        len = 0;
        for (i = 0; i < argc; i++)
                len += sprintf(&buf[len], "%s ", argv[i]);
+       params.mod_events = true;
        ret = parse_probe_event(buf);
        free(buf);
        return ret;
@@ -100,9 +104,10 @@ static int parse_probe_event_argv(int argc, const char **argv)
 static int opt_add_probe_event(const struct option *opt __used,
                              const char *str, int unset __used)
 {
-       if (str)
+       if (str) {
+               params.mod_events = true;
                return parse_probe_event(str);
-       else
+       } else
                return 0;
 }
 
@@ -110,6 +115,7 @@ static int opt_del_probe_event(const struct option *opt __used,
                               const char *str, int unset __used)
 {
        if (str) {
+               params.mod_events = true;
                if (!params.dellist)
                        params.dellist = strlist__new(true, NULL);
                strlist__add(params.dellist, str);
@@ -130,6 +136,25 @@ static int opt_show_lines(const struct option *opt __used,
 
        return ret;
 }
+
+static int opt_show_vars(const struct option *opt __used,
+                        const char *str, int unset __used)
+{
+       struct perf_probe_event *pev = &params.events[params.nevents];
+       int ret;
+
+       if (!str)
+               return 0;
+
+       ret = parse_probe_event(str);
+       if (!ret && pev->nargs != 0) {
+               pr_err("  Error: '--vars' doesn't accept arguments.\n");
+               return -EINVAL;
+       }
+       params.show_vars = true;
+
+       return ret;
+}
 #endif
 
 static const char * const probe_usage[] = {
@@ -138,7 +163,8 @@ static const char * const probe_usage[] = {
        "perf probe [<options>] --del '[GROUP:]EVENT' ...",
        "perf probe --list",
 #ifdef DWARF_SUPPORT
-       "perf probe --line 'LINEDESC'",
+       "perf probe [<options>] --line 'LINEDESC'",
+       "perf probe [<options>] --vars 'PROBEPOINT'",
 #endif
        NULL
 };
@@ -180,10 +206,17 @@ static const struct option options[] = {
        OPT_CALLBACK('L', "line", NULL,
                     "FUNC[:RLN[+NUM|-RLN2]]|SRC:ALN[+NUM|-ALN2]",
                     "Show source code lines.", opt_show_lines),
+       OPT_CALLBACK('V', "vars", NULL,
+                    "FUNC[@SRC][+OFF|%return|:RL|;PT]|SRC:AL|SRC;PT",
+                    "Show accessible variables on PROBEDEF", opt_show_vars),
+       OPT_BOOLEAN('\0', "externs", &params.show_ext_vars,
+                   "Show external variables too (with --vars only)"),
        OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
                   "file", "vmlinux pathname"),
        OPT_STRING('s', "source", &symbol_conf.source_prefix,
                   "directory", "path to kernel source"),
+       OPT_STRING('m', "module", &params.target_module,
+                  "modname", "target module name"),
 #endif
        OPT__DRY_RUN(&probe_event_dry_run),
        OPT_INTEGER('\0', "max-probes", &params.max_probe_points,
@@ -217,7 +250,7 @@ int cmd_probe(int argc, const char **argv, const char *prefix __used)
                usage_with_options(probe_usage, options);
 
        if (params.list_events) {
-               if (params.nevents != 0 || params.dellist) {
+               if (params.mod_events) {
                        pr_err("  Error: Don't use --list with --add/--del.\n");
                        usage_with_options(probe_usage, options);
                }
@@ -225,6 +258,10 @@ int cmd_probe(int argc, const char **argv, const char *prefix __used)
                        pr_err("  Error: Don't use --list with --line.\n");
                        usage_with_options(probe_usage, options);
                }
+               if (params.show_vars) {
+                       pr_err(" Error: Don't use --list with --vars.\n");
+                       usage_with_options(probe_usage, options);
+               }
                ret = show_perf_probe_events();
                if (ret < 0)
                        pr_err("  Error: Failed to show event list. (%d)\n",
@@ -234,17 +271,35 @@ int cmd_probe(int argc, const char **argv, const char *prefix __used)
 
 #ifdef DWARF_SUPPORT
        if (params.show_lines) {
-               if (params.nevents != 0 || params.dellist) {
-                       pr_warning("  Error: Don't use --line with"
-                                  " --add/--del.\n");
+               if (params.mod_events) {
+                       pr_err("  Error: Don't use --line with"
+                              " --add/--del.\n");
+                       usage_with_options(probe_usage, options);
+               }
+               if (params.show_vars) {
+                       pr_err(" Error: Don't use --line with --vars.\n");
                        usage_with_options(probe_usage, options);
                }
 
-               ret = show_line_range(&params.line_range);
+               ret = show_line_range(&params.line_range, params.target_module);
                if (ret < 0)
                        pr_err("  Error: Failed to show lines. (%d)\n", ret);
                return ret;
        }
+       if (params.show_vars) {
+               if (params.mod_events) {
+                       pr_err("  Error: Don't use --vars with"
+                              " --add/--del.\n");
+                       usage_with_options(probe_usage, options);
+               }
+               ret = show_available_vars(params.events, params.nevents,
+                                         params.max_probe_points,
+                                         params.target_module,
+                                         params.show_ext_vars);
+               if (ret < 0)
+                       pr_err("  Error: Failed to show vars. (%d)\n", ret);
+               return ret;
+       }
 #endif
 
        if (params.dellist) {
@@ -258,8 +313,9 @@ int cmd_probe(int argc, const char **argv, const char *prefix __used)
 
        if (params.nevents) {
                ret = add_perf_probe_events(params.events, params.nevents,
-                                           params.force_add,
-                                           params.max_probe_points);
+                                           params.max_probe_points,
+                                           params.target_module,
+                                           params.force_add);
                if (ret < 0) {
                        pr_err("  Error: Failed to add events. (%d)\n", ret);
                        return ret;
index ff77b80..4e75583 100644 (file)
@@ -353,7 +353,7 @@ try_again:
                }
 
                if (read(fd[nr_cpu][counter][thread_index], &read_data, sizeof(read_data)) == -1) {
-                       perror("Unable to read perf file descriptor\n");
+                       perror("Unable to read perf file descriptor");
                        exit(-1);
                }
 
@@ -626,7 +626,7 @@ static int __cmd_record(int argc, const char **argv)
 
        nr_cpus = read_cpu_map(cpu_list);
        if (nr_cpus < 1) {
-               perror("failed to collect number of CPUs\n");
+               perror("failed to collect number of CPUs");
                return -1;
        }
 
@@ -761,6 +761,9 @@ static int __cmd_record(int argc, const char **argv)
                }
        }
 
+       if (quiet)
+               return 0;
+
        fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
 
        /*
@@ -820,6 +823,7 @@ static const struct option options[] = {
                    "do call-graph (stack chain/backtrace) recording"),
        OPT_INCR('v', "verbose", &verbose,
                    "be more verbose (show counter open errors, etc)"),
+       OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
        OPT_BOOLEAN('s', "stat", &inherit_stat,
                    "per thread counts"),
        OPT_BOOLEAN('d', "data", &sample_address,
index 40a6a29..2f8df45 100644 (file)
@@ -46,9 +46,6 @@ static struct scripting_ops   *scripting_ops;
 
 static void setup_scripting(void)
 {
-       /* make sure PERF_EXEC_PATH is set for scripts */
-       perf_set_argv_exec_path(perf_exec_path());
-
        setup_perl_scripting();
        setup_python_scripting();
 
@@ -285,7 +282,7 @@ static int parse_scriptname(const struct option *opt __used,
                script++;
        } else {
                script = str;
-               ext = strchr(script, '.');
+               ext = strrchr(script, '.');
                if (!ext) {
                        fprintf(stderr, "invalid script extension");
                        return -1;
@@ -593,6 +590,9 @@ int cmd_trace(int argc, const char **argv, const char *prefix __used)
                suffix = REPORT_SUFFIX;
        }
 
+       /* make sure PERF_EXEC_PATH is set for scripts */
+       perf_set_argv_exec_path(perf_exec_path());
+
        if (!suffix && argc >= 2 && strncmp(argv[1], "-", strlen("-")) != 0) {
                char *record_script_path, *report_script_path;
                int live_pipe[2];
@@ -625,12 +625,13 @@ int cmd_trace(int argc, const char **argv, const char *prefix __used)
                        dup2(live_pipe[1], 1);
                        close(live_pipe[0]);
 
-                       __argv = malloc(5 * sizeof(const char *));
+                       __argv = malloc(6 * sizeof(const char *));
                        __argv[0] = "/bin/sh";
                        __argv[1] = record_script_path;
-                       __argv[2] = "-o";
-                       __argv[3] = "-";
-                       __argv[4] = NULL;
+                       __argv[2] = "-q";
+                       __argv[3] = "-o";
+                       __argv[4] = "-";
+                       __argv[5] = NULL;
 
                        execvp("/bin/sh", (char **)__argv);
                        exit(-1);
index e3a5e55..4028d92 100644 (file)
@@ -7,4 +7,4 @@ if [ $# -gt 0 ] ; then
        shift
     fi
 fi
-perf trace $@ -s ~/libexec/perf-core/scripts/perl/failed-syscalls.pl $comm
+perf trace $@ -s "$PERF_EXEC_PATH"/scripts/perl/failed-syscalls.pl $comm
index d83070b..ba25f4d 100644 (file)
@@ -7,7 +7,7 @@ if [ $# -lt 1 ] ; then
 fi
 comm=$1
 shift
-perf trace $@ -s ~/libexec/perf-core/scripts/perl/rw-by-file.pl $comm
+perf trace $@ -s "$PERF_EXEC_PATH"/scripts/perl/rw-by-file.pl $comm
 
 
 
index 7ef4698..641a3f5 100644 (file)
@@ -1,6 +1,6 @@
 #!/bin/bash
 # description: system-wide r/w activity
-perf trace $@ -s ~/libexec/perf-core/scripts/perl/rw-by-pid.pl
+perf trace $@ -s "$PERF_EXEC_PATH"/scripts/perl/rw-by-pid.pl
 
 
 
index 93e698c..4918dba 100644 (file)
@@ -17,7 +17,7 @@ if [ "$n_args" -gt 0 ] ; then
     interval=$1
     shift
 fi
-perf trace $@ -s ~/libexec/perf-core/scripts/perl/rwtop.pl $interval
+perf trace $@ -s "$PERF_EXEC_PATH"/scripts/perl/rwtop.pl $interval
 
 
 
index a0d898f..49052eb 100644 (file)
@@ -1,6 +1,6 @@
 #!/bin/bash
 # description: system-wide min/max/avg wakeup latency
-perf trace $@ -s ~/libexec/perf-core/scripts/perl/wakeup-latency.pl
+perf trace $@ -s "$PERF_EXEC_PATH"/scripts/perl/wakeup-latency.pl
 
 
 
index 3508113..df0c65f 100644 (file)
@@ -1,6 +1,6 @@
 #!/bin/bash
 # description: workqueue stats (ins/exe/create/destroy)
-perf trace $@ -s ~/libexec/perf-core/scripts/perl/workqueue-stats.pl
+perf trace $@ -s "$PERF_EXEC_PATH"/scripts/perl/workqueue-stats.pl
 
 
 
index 9689bc0..13cc02b 100644 (file)
@@ -6,6 +6,14 @@
 # Public License ("GPL") version 2 as published by the Free Software
 # Foundation.
 
+import errno, os
+
+FUTEX_WAIT = 0
+FUTEX_WAKE = 1
+FUTEX_PRIVATE_FLAG = 128
+FUTEX_CLOCK_REALTIME = 256
+FUTEX_CMD_MASK = ~(FUTEX_PRIVATE_FLAG | FUTEX_CLOCK_REALTIME)
+
 NSECS_PER_SEC    = 1000000000
 
 def avg(total, n):
@@ -24,5 +32,55 @@ def nsecs_str(nsecs):
     str = "%5u.%09u" % (nsecs_secs(nsecs), nsecs_nsecs(nsecs)),
     return str
 
+def add_stats(dict, key, value):
+       if not dict.has_key(key):
+               dict[key] = (value, value, value, 1)
+       else:
+               min, max, avg, count = dict[key]
+               if value < min:
+                       min = value
+               if value > max:
+                       max = value
+               avg = (avg + value) / 2
+               dict[key] = (min, max, avg, count + 1)
+
 def clear_term():
     print("\x1b[H\x1b[2J")
+
+audit_package_warned = False
+
+try:
+       import audit
+       machine_to_id = {
+               'x86_64': audit.MACH_86_64,
+               'alpha' : audit.MACH_ALPHA,
+               'ia64'  : audit.MACH_IA64,
+               'ppc'   : audit.MACH_PPC,
+               'ppc64' : audit.MACH_PPC64,
+               's390'  : audit.MACH_S390,
+               's390x' : audit.MACH_S390X,
+               'i386'  : audit.MACH_X86,
+               'i586'  : audit.MACH_X86,
+               'i686'  : audit.MACH_X86,
+       }
+       try:
+               machine_to_id['armeb'] = audit.MACH_ARMEB
+       except:
+               pass
+       machine_id = machine_to_id[os.uname()[4]]
+except:
+       if not audit_package_warned:
+               audit_package_warned = True
+               print "Install the audit-libs-python package to get syscall names"
+
+def syscall_name(id):
+       try:
+               return audit.audit_syscall_to_name(id, machine_id)
+       except:
+               return str(id)
+
+def strerror(nr):
+       try:
+               return errno.errorcode[abs(nr)]
+       except:
+               return "Unknown %d errno" % nr
index 3029354..0358702 100644 (file)
@@ -7,4 +7,4 @@ if [ $# -gt 0 ] ; then
        shift
     fi
 fi
-perf trace $@ -s ~/libexec/perf-core/scripts/python/failed-syscalls-by-pid.py $comm
+perf trace $@ -s "$PERF_EXEC_PATH"/scripts/python/failed-syscalls-by-pid.py $comm
diff --git a/tools/perf/scripts/python/bin/futex-contention-record b/tools/perf/scripts/python/bin/futex-contention-record
new file mode 100644 (file)
index 0000000..5ecbb43
--- /dev/null
@@ -0,0 +1,2 @@
+#!/bin/bash
+perf record -a -e syscalls:sys_enter_futex -e syscalls:sys_exit_futex $@
diff --git a/tools/perf/scripts/python/bin/futex-contention-report b/tools/perf/scripts/python/bin/futex-contention-report
new file mode 100644 (file)
index 0000000..c826813
--- /dev/null
@@ -0,0 +1,4 @@
+#!/bin/bash
+# description: futext contention measurement
+
+perf trace $@ -s "$PERF_EXEC_PATH"/scripts/python/futex-contention.py
index c3d0a63..4ad361b 100644 (file)
@@ -2,4 +2,4 @@
 # description: display a process of packet and processing time
 # args: [tx] [rx] [dev=] [debug]
 
-perf trace -s ~/libexec/perf-core/scripts/python/netdev-times.py $@
+perf trace -s "$PERF_EXEC_PATH"/scripts/python/netdev-times.py $@
index 61d05f7..df1791f 100644 (file)
@@ -1,3 +1,3 @@
 #!/bin/bash
 # description: sched migration overview
-perf trace $@ -s ~/libexec/perf-core/scripts/python/sched-migration.py
+perf trace $@ -s "$PERF_EXEC_PATH"/scripts/python/sched-migration.py
index b01c842..36b409c 100644 (file)
@@ -21,4 +21,4 @@ elif [ "$n_args" -gt 0 ] ; then
     interval=$1
     shift
 fi
-perf trace $@ -s ~/libexec/perf-core/scripts/python/sctop.py $comm $interval
+perf trace $@ -s "$PERF_EXEC_PATH"/scripts/python/sctop.py $comm $interval
index 9e9d8dd..4eb88c9 100644 (file)
@@ -7,4 +7,4 @@ if [ $# -gt 0 ] ; then
        shift
     fi
 fi
-perf trace $@ -s ~/libexec/perf-core/scripts/python/syscall-counts-by-pid.py $comm
+perf trace $@ -s "$PERF_EXEC_PATH"/scripts/python/syscall-counts-by-pid.py $comm
index dc076b6..cb2f9c5 100644 (file)
@@ -7,4 +7,4 @@ if [ $# -gt 0 ] ; then
        shift
     fi
 fi
-perf trace $@ -s ~/libexec/perf-core/scripts/python/syscall-counts.py $comm
+perf trace $@ -s "$PERF_EXEC_PATH"/scripts/python/syscall-counts.py $comm
index 0ca0227..acd7848 100644 (file)
@@ -13,21 +13,26 @@ sys.path.append(os.environ['PERF_EXEC_PATH'] + \
 
 from perf_trace_context import *
 from Core import *
+from Util import *
 
-usage = "perf trace -s syscall-counts-by-pid.py [comm]\n";
+usage = "perf trace -s syscall-counts-by-pid.py [comm|pid]\n";
 
 for_comm = None
+for_pid = None
 
 if len(sys.argv) > 2:
        sys.exit(usage)
 
 if len(sys.argv) > 1:
-       for_comm = sys.argv[1]
+       try:
+               for_pid = int(sys.argv[1])
+       except:
+               for_comm = sys.argv[1]
 
 syscalls = autodict()
 
 def trace_begin():
-       pass
+       print "Press control+C to stop and show the summary"
 
 def trace_end():
        print_error_totals()
@@ -35,9 +40,9 @@ def trace_end():
 def raw_syscalls__sys_exit(event_name, context, common_cpu,
        common_secs, common_nsecs, common_pid, common_comm,
        id, ret):
-       if for_comm is not None:
-               if common_comm != for_comm:
-                       return
+       if (for_comm and common_comm != for_comm) or \
+          (for_pid  and common_pid  != for_pid ):
+               return
 
        if ret < 0:
                try:
@@ -62,7 +67,7 @@ def print_error_totals():
                    print "\n%s [%d]\n" % (comm, pid),
                    id_keys = syscalls[comm][pid].keys()
                    for id in id_keys:
-                           print "  syscall: %-16d\n" % (id),
+                           print "  syscall: %-16s\n" % syscall_name(id),
                            ret_keys = syscalls[comm][pid][id].keys()
                            for ret, val in sorted(syscalls[comm][pid][id].iteritems(), key = lambda(k, v): (v, k),  reverse = True):
-                                   print "    err = %-20d  %10d\n" % (ret, val),
+                                   print "    err = %-20s  %10d\n" % (strerror(ret), val),
diff --git a/tools/perf/scripts/python/futex-contention.py b/tools/perf/scripts/python/futex-contention.py
new file mode 100644 (file)
index 0000000..11e70a3
--- /dev/null
@@ -0,0 +1,50 @@
+# futex contention
+# (c) 2010, Arnaldo Carvalho de Melo <acme@redhat.com>
+# Licensed under the terms of the GNU GPL License version 2
+#
+# Translation of:
+#
+# http://sourceware.org/systemtap/wiki/WSFutexContention
+#
+# to perf python scripting.
+#
+# Measures futex contention
+
+import os, sys
+sys.path.append(os.environ['PERF_EXEC_PATH'] + '/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
+from Util import *
+
+process_names = {}
+thread_thislock = {}
+thread_blocktime = {}
+
+lock_waits = {} # long-lived stats on (tid,lock) blockage elapsed time
+process_names = {} # long-lived pid-to-execname mapping
+
+def syscalls__sys_enter_futex(event, ctxt, cpu, s, ns, tid, comm,
+                             nr, uaddr, op, val, utime, uaddr2, val3):
+       cmd = op & FUTEX_CMD_MASK
+       if cmd != FUTEX_WAIT:
+               return # we don't care about originators of WAKE events
+
+       process_names[tid] = comm
+       thread_thislock[tid] = uaddr
+       thread_blocktime[tid] = nsecs(s, ns)
+
+def syscalls__sys_exit_futex(event, ctxt, cpu, s, ns, tid, comm,
+                            nr, ret):
+       if thread_blocktime.has_key(tid):
+               elapsed = nsecs(s, ns) - thread_blocktime[tid]
+               add_stats(lock_waits, (tid, thread_thislock[tid]), elapsed)
+               del thread_blocktime[tid]
+               del thread_thislock[tid]
+
+def trace_begin():
+       print "Press control+C to stop and show the summary"
+
+def trace_end():
+       for (tid, lock) in lock_waits:
+               min, max, avg, count = lock_waits[tid, lock]
+               print "%s[%d] lock %x contended %d times, %d avg ns" % \
+                     (process_names[tid], tid, lock, count, avg)
+
index 6cafad4..7a6ec2c 100644 (file)
@@ -8,10 +8,7 @@
 # will be refreshed every [interval] seconds.  The default interval is
 # 3 seconds.
 
-import thread
-import time
-import os
-import sys
+import os, sys, thread, time
 
 sys.path.append(os.environ['PERF_EXEC_PATH'] + \
        '/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
@@ -20,7 +17,7 @@ from perf_trace_context import *
 from Core import *
 from Util import *
 
-usage = "perf trace -s syscall-counts.py [comm] [interval]\n";
+usage = "perf trace -s sctop.py [comm] [interval]\n";
 
 for_comm = None
 default_interval = 3
@@ -71,7 +68,7 @@ def print_syscall_totals(interval):
                for id, val in sorted(syscalls.iteritems(), key = lambda(k, v): (v, k), \
                                              reverse = True):
                        try:
-                               print "%-40d  %10d\n" % (id, val),
+                               print "%-40s  %10d\n" % (syscall_name(id), val),
                        except TypeError:
                                pass
                syscalls.clear()
index af722d6..d1ee3ec 100644 (file)
@@ -5,29 +5,33 @@
 # Displays system-wide system call totals, broken down by syscall.
 # If a [comm] arg is specified, only syscalls called by [comm] are displayed.
 
-import os
-import sys
+import os, sys
 
 sys.path.append(os.environ['PERF_EXEC_PATH'] + \
        '/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
 
 from perf_trace_context import *
 from Core import *
+from Util import syscall_name
 
 usage = "perf trace -s syscall-counts-by-pid.py [comm]\n";
 
 for_comm = None
+for_pid = None
 
 if len(sys.argv) > 2:
        sys.exit(usage)
 
 if len(sys.argv) > 1:
-       for_comm = sys.argv[1]
+       try:
+               for_pid = int(sys.argv[1])
+       except:
+               for_comm = sys.argv[1]
 
 syscalls = autodict()
 
 def trace_begin():
-       pass
+       print "Press control+C to stop and show the summary"
 
 def trace_end():
        print_syscall_totals()
@@ -35,9 +39,10 @@ def trace_end():
 def raw_syscalls__sys_enter(event_name, context, common_cpu,
        common_secs, common_nsecs, common_pid, common_comm,
        id, args):
-       if for_comm is not None:
-               if common_comm != for_comm:
-                       return
+
+       if (for_comm and common_comm != for_comm) or \
+          (for_pid  and common_pid  != for_pid ):
+               return
        try:
                syscalls[common_comm][common_pid][id] += 1
        except TypeError:
@@ -61,4 +66,4 @@ def print_syscall_totals():
                    id_keys = syscalls[comm][pid].keys()
                    for id, val in sorted(syscalls[comm][pid].iteritems(), \
                                  key = lambda(k, v): (v, k),  reverse = True):
-                           print "  %-38d  %10d\n" % (id, val),
+                           print "  %-38s  %10d\n" % (syscall_name(id), val),
index f977e85..ea183dc 100644 (file)
@@ -13,6 +13,7 @@ sys.path.append(os.environ['PERF_EXEC_PATH'] + \
 
 from perf_trace_context import *
 from Core import *
+from Util import syscall_name
 
 usage = "perf trace -s syscall-counts.py [comm]\n";
 
@@ -27,7 +28,7 @@ if len(sys.argv) > 1:
 syscalls = autodict()
 
 def trace_begin():
-       pass
+       print "Press control+C to stop and show the summary"
 
 def trace_end():
        print_syscall_totals()
@@ -55,4 +56,4 @@ def print_syscall_totals():
 
     for id, val in sorted(syscalls.iteritems(), key = lambda(k, v): (v, k), \
                                  reverse = True):
-           print "%-40d  %10d\n" % (id, val),
+           print "%-40s  %10d\n" % (syscall_name(id), val),
index f9c7e3a..c8d81b0 100644 (file)
@@ -12,8 +12,8 @@
 #include "debug.h"
 #include "util.h"
 
-int verbose = 0;
-bool dump_trace = false;
+int verbose;
+bool dump_trace = false, quiet = false;
 
 int eprintf(int level, const char *fmt, ...)
 {
index 7a17ee0..7b51408 100644 (file)
@@ -6,7 +6,7 @@
 #include "event.h"
 
 extern int verbose;
-extern bool dump_trace;
+extern bool quiet, dump_trace;
 
 int dump_printf(const char *fmt, ...) __attribute__((format(printf, 1, 2)));
 void trace_event(event_t *event);
index 7857579..b397c03 100644 (file)
@@ -215,6 +215,16 @@ struct symbol *map_groups__find_function_by_name(struct map_groups *self,
        return map_groups__find_symbol_by_name(self, MAP__FUNCTION, name, mapp, filter);
 }
 
+static inline
+struct symbol *machine__find_kernel_function_by_name(struct machine *self,
+                                                    const char *name,
+                                                    struct map **mapp,
+                                                    symbol_filter_t filter)
+{
+       return map_groups__find_function_by_name(&self->kmaps, name, mapp,
+                                                filter);
+}
+
 int map_groups__fixup_overlappings(struct map_groups *self, struct map *map,
                                   int verbose, FILE *fp);
 
index fcc16e4..3b6a529 100644 (file)
@@ -74,10 +74,9 @@ static int e_snprintf(char *str, size_t size, const char *format, ...)
 static char *synthesize_perf_probe_point(struct perf_probe_point *pp);
 static struct machine machine;
 
-/* Initialize symbol maps and path of vmlinux */
+/* Initialize symbol maps and path of vmlinux/modules */
 static int init_vmlinux(void)
 {
-       struct dso *kernel;
        int ret;
 
        symbol_conf.sort_by_name = true;
@@ -91,33 +90,61 @@ static int init_vmlinux(void)
                goto out;
        }
 
-       ret = machine__init(&machine, "/", 0);
+       ret = machine__init(&machine, "", HOST_KERNEL_ID);
        if (ret < 0)
                goto out;
 
-       kernel = dso__new_kernel(symbol_conf.vmlinux_name);
-       if (kernel == NULL)
-               die("Failed to create kernel dso.");
-
-       ret = __machine__create_kernel_maps(&machine, kernel);
-       if (ret < 0)
-               pr_debug("Failed to create kernel maps.\n");
-
+       if (machine__create_kernel_maps(&machine) < 0) {
+               pr_debug("machine__create_kernel_maps ");
+               goto out;
+       }
 out:
        if (ret < 0)
                pr_warning("Failed to init vmlinux path.\n");
        return ret;
 }
 
+static struct symbol *__find_kernel_function_by_name(const char *name,
+                                                    struct map **mapp)
+{
+       return machine__find_kernel_function_by_name(&machine, name, mapp,
+                                                    NULL);
+}
+
+const char *kernel_get_module_path(const char *module)
+{
+       struct dso *dso;
+
+       if (module) {
+               list_for_each_entry(dso, &machine.kernel_dsos, node) {
+                       if (strncmp(dso->short_name + 1, module,
+                                   dso->short_name_len - 2) == 0)
+                               goto found;
+               }
+               pr_debug("Failed to find module %s.\n", module);
+               return NULL;
+       } else {
+               dso = machine.vmlinux_maps[MAP__FUNCTION]->dso;
+               if (dso__load_vmlinux_path(dso,
+                        machine.vmlinux_maps[MAP__FUNCTION], NULL) < 0) {
+                       pr_debug("Failed to load kernel map.\n");
+                       return NULL;
+               }
+       }
+found:
+       return dso->long_name;
+}
+
 #ifdef DWARF_SUPPORT
-static int open_vmlinux(void)
+static int open_vmlinux(const char *module)
 {
-       if (map__load(machine.vmlinux_maps[MAP__FUNCTION], NULL) < 0) {
-               pr_debug("Failed to load kernel map.\n");
-               return -EINVAL;
+       const char *path = kernel_get_module_path(module);
+       if (!path) {
+               pr_err("Failed to find path of %s module", module ?: "kernel");
+               return -ENOENT;
        }
-       pr_debug("Try to open %s\n", machine.vmlinux_maps[MAP__FUNCTION]->dso->long_name);
-       return open(machine.vmlinux_maps[MAP__FUNCTION]->dso->long_name, O_RDONLY);
+       pr_debug("Try to open %s\n", path);
+       return open(path, O_RDONLY);
 }
 
 /*
@@ -125,20 +152,19 @@ static int open_vmlinux(void)
  * Currently only handles kprobes.
  */
 static int kprobe_convert_to_perf_probe(struct probe_trace_point *tp,
-                                      struct perf_probe_point *pp)
+                                       struct perf_probe_point *pp)
 {
        struct symbol *sym;
-       int fd, ret = -ENOENT;
+       struct map *map;
+       u64 addr;
+       int ret = -ENOENT;
 
-       sym = map__find_symbol_by_name(machine.vmlinux_maps[MAP__FUNCTION],
-                                      tp->symbol, NULL);
+       sym = __find_kernel_function_by_name(tp->symbol, &map);
        if (sym) {
-               fd = open_vmlinux();
-               if (fd >= 0) {
-                       ret = find_perf_probe_point(fd,
-                                                sym->start + tp->offset, pp);
-                       close(fd);
-               }
+               addr = map->unmap_ip(map, sym->start + tp->offset);
+               pr_debug("try to find %s+%ld@%llx\n", tp->symbol,
+                        tp->offset, addr);
+               ret = find_perf_probe_point((unsigned long)addr, pp);
        }
        if (ret <= 0) {
                pr_debug("Failed to find corresponding probes from "
@@ -156,12 +182,12 @@ static int kprobe_convert_to_perf_probe(struct probe_trace_point *tp,
 /* Try to find perf_probe_event with debuginfo */
 static int try_to_find_probe_trace_events(struct perf_probe_event *pev,
                                           struct probe_trace_event **tevs,
-                                          int max_tevs)
+                                          int max_tevs, const char *module)
 {
        bool need_dwarf = perf_probe_event_need_dwarf(pev);
        int fd, ntevs;
 
-       fd = open_vmlinux();
+       fd = open_vmlinux(module);
        if (fd < 0) {
                if (need_dwarf) {
                        pr_warning("Failed to open debuginfo file.\n");
@@ -300,7 +326,7 @@ error:
  * Show line-range always requires debuginfo to find source file and
  * line number.
  */
-int show_line_range(struct line_range *lr)
+int show_line_range(struct line_range *lr, const char *module)
 {
        int l = 1;
        struct line_node *ln;
@@ -313,7 +339,7 @@ int show_line_range(struct line_range *lr)
        if (ret < 0)
                return ret;
 
-       fd = open_vmlinux();
+       fd = open_vmlinux(module);
        if (fd < 0) {
                pr_warning("Failed to open debuginfo file.\n");
                return fd;
@@ -378,11 +404,84 @@ end:
        return ret;
 }
 
+static int show_available_vars_at(int fd, struct perf_probe_event *pev,
+                                 int max_vls, bool externs)
+{
+       char *buf;
+       int ret, i;
+       struct str_node *node;
+       struct variable_list *vls = NULL, *vl;
+
+       buf = synthesize_perf_probe_point(&pev->point);
+       if (!buf)
+               return -EINVAL;
+       pr_debug("Searching variables at %s\n", buf);
+
+       ret = find_available_vars_at(fd, pev, &vls, max_vls, externs);
+       if (ret > 0) {
+               /* Some variables were found */
+               fprintf(stdout, "Available variables at %s\n", buf);
+               for (i = 0; i < ret; i++) {
+                       vl = &vls[i];
+                       /*
+                        * A probe point might be converted to
+                        * several trace points.
+                        */
+                       fprintf(stdout, "\t@<%s+%lu>\n", vl->point.symbol,
+                               vl->point.offset);
+                       free(vl->point.symbol);
+                       if (vl->vars) {
+                               strlist__for_each(node, vl->vars)
+                                       fprintf(stdout, "\t\t%s\n", node->s);
+                               strlist__delete(vl->vars);
+                       } else
+                               fprintf(stdout, "(No variables)\n");
+               }
+               free(vls);
+       } else
+               pr_err("Failed to find variables at %s (%d)\n", buf, ret);
+
+       free(buf);
+       return ret;
+}
+
+/* Show available variables on given probe point */
+int show_available_vars(struct perf_probe_event *pevs, int npevs,
+                       int max_vls, const char *module, bool externs)
+{
+       int i, fd, ret = 0;
+
+       ret = init_vmlinux();
+       if (ret < 0)
+               return ret;
+
+       fd = open_vmlinux(module);
+       if (fd < 0) {
+               pr_warning("Failed to open debuginfo file.\n");
+               return fd;
+       }
+
+       setup_pager();
+
+       for (i = 0; i < npevs && ret >= 0; i++)
+               ret = show_available_vars_at(fd, &pevs[i], max_vls, externs);
+
+       close(fd);
+       return ret;
+}
+
 #else  /* !DWARF_SUPPORT */
 
 static int kprobe_convert_to_perf_probe(struct probe_trace_point *tp,
-                                      struct perf_probe_point *pp)
+                                       struct perf_probe_point *pp)
 {
+       struct symbol *sym;
+
+       sym = __find_kernel_function_by_name(tp->symbol, NULL);
+       if (!sym) {
+               pr_err("Failed to find symbol %s in kernel.\n", tp->symbol);
+               return -ENOENT;
+       }
        pp->function = strdup(tp->symbol);
        if (pp->function == NULL)
                return -ENOMEM;
@@ -394,7 +493,7 @@ static int kprobe_convert_to_perf_probe(struct probe_trace_point *tp,
 
 static int try_to_find_probe_trace_events(struct perf_probe_event *pev,
                                struct probe_trace_event **tevs __unused,
-                               int max_tevs __unused)
+                               int max_tevs __unused, const char *mod __unused)
 {
        if (perf_probe_event_need_dwarf(pev)) {
                pr_warning("Debuginfo-analysis is not supported.\n");
@@ -403,12 +502,19 @@ static int try_to_find_probe_trace_events(struct perf_probe_event *pev,
        return 0;
 }
 
-int show_line_range(struct line_range *lr __unused)
+int show_line_range(struct line_range *lr __unused, const char *module __unused)
 {
        pr_warning("Debuginfo-analysis is not supported.\n");
        return -ENOSYS;
 }
 
+int show_available_vars(struct perf_probe_event *pevs __unused,
+                       int npevs __unused, int max_vls __unused,
+                       const char *module __unused, bool externs __unused)
+{
+       pr_warning("Debuginfo-analysis is not supported.\n");
+       return -ENOSYS;
+}
 #endif
 
 int parse_line_range_desc(const char *arg, struct line_range *lr)
@@ -1087,7 +1193,7 @@ error:
 }
 
 static int convert_to_perf_probe_event(struct probe_trace_event *tev,
-                               struct perf_probe_event *pev)
+                                      struct perf_probe_event *pev)
 {
        char buf[64] = "";
        int i, ret;
@@ -1516,14 +1622,14 @@ static int __add_probe_trace_events(struct perf_probe_event *pev,
 
 static int convert_to_probe_trace_events(struct perf_probe_event *pev,
                                          struct probe_trace_event **tevs,
-                                         int max_tevs)
+                                         int max_tevs, const char *module)
 {
        struct symbol *sym;
        int ret = 0, i;
        struct probe_trace_event *tev;
 
        /* Convert perf_probe_event with debuginfo */
-       ret = try_to_find_probe_trace_events(pev, tevs, max_tevs);
+       ret = try_to_find_probe_trace_events(pev, tevs, max_tevs, module);
        if (ret != 0)
                return ret;
 
@@ -1572,8 +1678,7 @@ static int convert_to_probe_trace_events(struct perf_probe_event *pev,
        }
 
        /* Currently just checking function name from symbol map */
-       sym = map__find_symbol_by_name(machine.vmlinux_maps[MAP__FUNCTION],
-                                      tev->point.symbol, NULL);
+       sym = __find_kernel_function_by_name(tev->point.symbol, NULL);
        if (!sym) {
                pr_warning("Kernel symbol \'%s\' not found.\n",
                           tev->point.symbol);
@@ -1596,7 +1701,7 @@ struct __event_package {
 };
 
 int add_perf_probe_events(struct perf_probe_event *pevs, int npevs,
-                         bool force_add, int max_tevs)
+                         int max_tevs, const char *module, bool force_add)
 {
        int i, j, ret;
        struct __event_package *pkgs;
@@ -1617,7 +1722,9 @@ int add_perf_probe_events(struct perf_probe_event *pevs, int npevs,
                pkgs[i].pev = &pevs[i];
                /* Convert with or without debuginfo */
                ret  = convert_to_probe_trace_events(pkgs[i].pev,
-                                                     &pkgs[i].tevs, max_tevs);
+                                                    &pkgs[i].tevs,
+                                                    max_tevs,
+                                                    module);
                if (ret < 0)
                        goto end;
                pkgs[i].ntevs = ret;
index 5af3924..5accbed 100644 (file)
@@ -90,6 +90,12 @@ struct line_range {
        struct list_head        line_list;      /* Visible lines */
 };
 
+/* List of variables */
+struct variable_list {
+       struct probe_trace_point        point;  /* Actual probepoint */
+       struct strlist                  *vars;  /* Available variables */
+};
+
 /* Command string to events */
 extern int parse_perf_probe_command(const char *cmd,
                                    struct perf_probe_event *pev);
@@ -109,12 +115,18 @@ extern void clear_perf_probe_event(struct perf_probe_event *pev);
 /* Command string to line-range */
 extern int parse_line_range_desc(const char *cmd, struct line_range *lr);
 
+/* Internal use: Return kernel/module path */
+extern const char *kernel_get_module_path(const char *module);
 
 extern int add_perf_probe_events(struct perf_probe_event *pevs, int npevs,
-                                bool force_add, int max_probe_points);
+                                int max_probe_points, const char *module,
+                                bool force_add);
 extern int del_perf_probe_events(struct strlist *dellist);
 extern int show_perf_probe_events(void);
-extern int show_line_range(struct line_range *lr);
+extern int show_line_range(struct line_range *lr, const char *module);
+extern int show_available_vars(struct perf_probe_event *pevs, int npevs,
+                              int max_probe_points, const char *module,
+                              bool externs);
 
 
 /* Maximum index number of event-name postfix */
index 32b81f7..3991d73 100644 (file)
@@ -116,6 +116,101 @@ static void line_list__free(struct list_head *head)
        }
 }
 
+/* Dwarf FL wrappers */
+
+static int __linux_kernel_find_elf(Dwfl_Module *mod,
+                                  void **userdata,
+                                  const char *module_name,
+                                  Dwarf_Addr base,
+                                  char **file_name, Elf **elfp)
+{
+       int fd;
+       const char *path = kernel_get_module_path(module_name);
+
+       if (path) {
+               fd = open(path, O_RDONLY);
+               if (fd >= 0) {
+                       *file_name = strdup(path);
+                       return fd;
+               }
+       }
+       /* If failed, try to call standard method */
+       return dwfl_linux_kernel_find_elf(mod, userdata, module_name, base,
+                                         file_name, elfp);
+}
+
+static char *debuginfo_path;   /* Currently dummy */
+
+static const Dwfl_Callbacks offline_callbacks = {
+       .find_debuginfo = dwfl_standard_find_debuginfo,
+       .debuginfo_path = &debuginfo_path,
+
+       .section_address = dwfl_offline_section_address,
+
+       /* We use this table for core files too.  */
+       .find_elf = dwfl_build_id_find_elf,
+};
+
+static const Dwfl_Callbacks kernel_callbacks = {
+       .find_debuginfo = dwfl_standard_find_debuginfo,
+       .debuginfo_path = &debuginfo_path,
+
+       .find_elf = __linux_kernel_find_elf,
+       .section_address = dwfl_linux_kernel_module_section_address,
+};
+
+/* Get a Dwarf from offline image */
+static Dwarf *dwfl_init_offline_dwarf(int fd, Dwfl **dwflp, Dwarf_Addr *bias)
+{
+       Dwfl_Module *mod;
+       Dwarf *dbg = NULL;
+
+       if (!dwflp)
+               return NULL;
+
+       *dwflp = dwfl_begin(&offline_callbacks);
+       if (!*dwflp)
+               return NULL;
+
+       mod = dwfl_report_offline(*dwflp, "", "", fd);
+       if (!mod)
+               goto error;
+
+       dbg = dwfl_module_getdwarf(mod, bias);
+       if (!dbg) {
+error:
+               dwfl_end(*dwflp);
+               *dwflp = NULL;
+       }
+       return dbg;
+}
+
+/* Get a Dwarf from live kernel image */
+static Dwarf *dwfl_init_live_kernel_dwarf(Dwarf_Addr addr, Dwfl **dwflp,
+                                         Dwarf_Addr *bias)
+{
+       Dwarf *dbg;
+
+       if (!dwflp)
+               return NULL;
+
+       *dwflp = dwfl_begin(&kernel_callbacks);
+       if (!*dwflp)
+               return NULL;
+
+       /* Load the kernel dwarves: Don't care the result here */
+       dwfl_linux_kernel_report_kernel(*dwflp);
+       dwfl_linux_kernel_report_modules(*dwflp);
+
+       dbg = dwfl_addrdwarf(*dwflp, addr, bias);
+       /* Here, check whether we could get a real dwarf */
+       if (!dbg) {
+               dwfl_end(*dwflp);
+               *dwflp = NULL;
+       }
+       return dbg;
+}
+
 /* Dwarf wrappers */
 
 /* Find the realpath of the target file. */
@@ -160,26 +255,44 @@ static bool die_compare_name(Dwarf_Die *dw_die, const char *tname)
        return name ? (strcmp(tname, name) == 0) : false;
 }
 
-/* Get type die, but skip qualifiers and typedef */
-static Dwarf_Die *die_get_real_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem)
+/* Get type die */
+static Dwarf_Die *die_get_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem)
 {
        Dwarf_Attribute attr;
+
+       if (dwarf_attr_integrate(vr_die, DW_AT_type, &attr) &&
+           dwarf_formref_die(&attr, die_mem))
+               return die_mem;
+       else
+               return NULL;
+}
+
+/* Get a type die, but skip qualifiers */
+static Dwarf_Die *__die_get_real_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem)
+{
        int tag;
 
        do {
-               if (dwarf_attr(vr_die, DW_AT_type, &attr) == NULL ||
-                   dwarf_formref_die(&attr, die_mem) == NULL)
-                       return NULL;
-
-               tag = dwarf_tag(die_mem);
-               vr_die = die_mem;
+               vr_die = die_get_type(vr_die, die_mem);
+               if (!vr_die)
+                       break;
+               tag = dwarf_tag(vr_die);
        } while (tag == DW_TAG_const_type ||
                 tag == DW_TAG_restrict_type ||
                 tag == DW_TAG_volatile_type ||
-                tag == DW_TAG_shared_type ||
-                tag == DW_TAG_typedef);
+                tag == DW_TAG_shared_type);
+
+       return vr_die;
+}
 
-       return die_mem;
+/* Get a type die, but skip qualifiers and typedef */
+static Dwarf_Die *die_get_real_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem)
+{
+       do {
+               vr_die = __die_get_real_type(vr_die, die_mem);
+       } while (vr_die && dwarf_tag(vr_die) == DW_TAG_typedef);
+
+       return vr_die;
 }
 
 static bool die_is_signed_type(Dwarf_Die *tp_die)
@@ -320,25 +433,35 @@ static Dwarf_Die *die_find_inlinefunc(Dwarf_Die *sp_die, Dwarf_Addr addr,
        return die_find_child(sp_die, __die_find_inline_cb, &addr, die_mem);
 }
 
+struct __find_variable_param {
+       const char *name;
+       Dwarf_Addr addr;
+};
+
 static int __die_find_variable_cb(Dwarf_Die *die_mem, void *data)
 {
-       const char *name = data;
+       struct __find_variable_param *fvp = data;
        int tag;
 
        tag = dwarf_tag(die_mem);
        if ((tag == DW_TAG_formal_parameter ||
             tag == DW_TAG_variable) &&
-           die_compare_name(die_mem, name))
+           die_compare_name(die_mem, fvp->name))
                return DIE_FIND_CB_FOUND;
 
-       return DIE_FIND_CB_CONTINUE;
+       if (dwarf_haspc(die_mem, fvp->addr))
+               return DIE_FIND_CB_CONTINUE;
+       else
+               return DIE_FIND_CB_SIBLING;
 }
 
-/* Find a variable called 'name' */
-static Dwarf_Die *die_find_variable(Dwarf_Die *sp_die, const char *name,
-                                   Dwarf_Die *die_mem)
+/* Find a variable called 'name' at given address */
+static Dwarf_Die *die_find_variable_at(Dwarf_Die *sp_die, const char *name,
+                                      Dwarf_Addr addr, Dwarf_Die *die_mem)
 {
-       return die_find_child(sp_die, __die_find_variable_cb, (void *)name,
+       struct __find_variable_param fvp = { .name = name, .addr = addr};
+
+       return die_find_child(sp_die, __die_find_variable_cb, (void *)&fvp,
                              die_mem);
 }
 
@@ -361,6 +484,60 @@ static Dwarf_Die *die_find_member(Dwarf_Die *st_die, const char *name,
                              die_mem);
 }
 
+/* Get the name of given variable DIE */
+static int die_get_typename(Dwarf_Die *vr_die, char *buf, int len)
+{
+       Dwarf_Die type;
+       int tag, ret, ret2;
+       const char *tmp = "";
+
+       if (__die_get_real_type(vr_die, &type) == NULL)
+               return -ENOENT;
+
+       tag = dwarf_tag(&type);
+       if (tag == DW_TAG_array_type || tag == DW_TAG_pointer_type)
+               tmp = "*";
+       else if (tag == DW_TAG_subroutine_type) {
+               /* Function pointer */
+               ret = snprintf(buf, len, "(function_type)");
+               return (ret >= len) ? -E2BIG : ret;
+       } else {
+               if (!dwarf_diename(&type))
+                       return -ENOENT;
+               if (tag == DW_TAG_union_type)
+                       tmp = "union ";
+               else if (tag == DW_TAG_structure_type)
+                       tmp = "struct ";
+               /* Write a base name */
+               ret = snprintf(buf, len, "%s%s", tmp, dwarf_diename(&type));
+               return (ret >= len) ? -E2BIG : ret;
+       }
+       ret = die_get_typename(&type, buf, len);
+       if (ret > 0) {
+               ret2 = snprintf(buf + ret, len - ret, "%s", tmp);
+               ret = (ret2 >= len - ret) ? -E2BIG : ret2 + ret;
+       }
+       return ret;
+}
+
+/* Get the name and type of given variable DIE, stored as "type\tname" */
+static int die_get_varname(Dwarf_Die *vr_die, char *buf, int len)
+{
+       int ret, ret2;
+
+       ret = die_get_typename(vr_die, buf, len);
+       if (ret < 0) {
+               pr_debug("Failed to get type, make it unknown.\n");
+               ret = snprintf(buf, len, "(unknown_type)");
+       }
+       if (ret > 0) {
+               ret2 = snprintf(buf + ret, len - ret, "\t%s",
+                               dwarf_diename(vr_die));
+               ret = (ret2 >= len - ret) ? -E2BIG : ret2 + ret;
+       }
+       return ret;
+}
+
 /*
  * Probe finder related functions
  */
@@ -374,8 +551,13 @@ static struct probe_trace_arg_ref *alloc_trace_arg_ref(long offs)
        return ref;
 }
 
-/* Show a location */
-static int convert_variable_location(Dwarf_Die *vr_die, struct probe_finder *pf)
+/*
+ * Convert a location into trace_arg.
+ * If tvar == NULL, this just checks variable can be converted.
+ */
+static int convert_variable_location(Dwarf_Die *vr_die, Dwarf_Addr addr,
+                                    Dwarf_Op *fb_ops,
+                                    struct probe_trace_arg *tvar)
 {
        Dwarf_Attribute attr;
        Dwarf_Op *op;
@@ -384,20 +566,23 @@ static int convert_variable_location(Dwarf_Die *vr_die, struct probe_finder *pf)
        Dwarf_Word offs = 0;
        bool ref = false;
        const char *regs;
-       struct probe_trace_arg *tvar = pf->tvar;
        int ret;
 
+       if (dwarf_attr(vr_die, DW_AT_external, &attr) != NULL)
+               goto static_var;
+
        /* TODO: handle more than 1 exprs */
        if (dwarf_attr(vr_die, DW_AT_location, &attr) == NULL ||
-           dwarf_getlocation_addr(&attr, pf->addr, &op, &nops, 1) <= 0 ||
+           dwarf_getlocation_addr(&attr, addr, &op, &nops, 1) <= 0 ||
            nops == 0) {
                /* TODO: Support const_value */
-               pr_err("Failed to find the location of %s at this address.\n"
-                      " Perhaps, it has been optimized out.\n", pf->pvar->var);
                return -ENOENT;
        }
 
        if (op->atom == DW_OP_addr) {
+static_var:
+               if (!tvar)
+                       return 0;
                /* Static variables on memory (not stack), make @varname */
                ret = strlen(dwarf_diename(vr_die));
                tvar->value = zalloc(ret + 2);
@@ -412,14 +597,11 @@ static int convert_variable_location(Dwarf_Die *vr_die, struct probe_finder *pf)
 
        /* If this is based on frame buffer, set the offset */
        if (op->atom == DW_OP_fbreg) {
-               if (pf->fb_ops == NULL) {
-                       pr_warning("The attribute of frame base is not "
-                                  "supported.\n");
+               if (fb_ops == NULL)
                        return -ENOTSUP;
-               }
                ref = true;
                offs = op->number;
-               op = &pf->fb_ops[0];
+               op = &fb_ops[0];
        }
 
        if (op->atom >= DW_OP_breg0 && op->atom <= DW_OP_breg31) {
@@ -435,13 +617,18 @@ static int convert_variable_location(Dwarf_Die *vr_die, struct probe_finder *pf)
        } else if (op->atom == DW_OP_regx) {
                regn = op->number;
        } else {
-               pr_warning("DW_OP %x is not supported.\n", op->atom);
+               pr_debug("DW_OP %x is not supported.\n", op->atom);
                return -ENOTSUP;
        }
 
+       if (!tvar)
+               return 0;
+
        regs = get_arch_regstr(regn);
        if (!regs) {
-               pr_warning("Mapping for DWARF register number %u missing on this architecture.", regn);
+               /* This should be a bug in DWARF or this tool */
+               pr_warning("Mapping for DWARF register number %u "
+                          "missing on this architecture.", regn);
                return -ERANGE;
        }
 
@@ -666,8 +853,14 @@ static int convert_variable(Dwarf_Die *vr_die, struct probe_finder *pf)
        pr_debug("Converting variable %s into trace event.\n",
                 dwarf_diename(vr_die));
 
-       ret = convert_variable_location(vr_die, pf);
-       if (ret == 0 && pf->pvar->field) {
+       ret = convert_variable_location(vr_die, pf->addr, pf->fb_ops,
+                                       pf->tvar);
+       if (ret == -ENOENT)
+               pr_err("Failed to find the location of %s at this address.\n"
+                      " Perhaps, it has been optimized out.\n", pf->pvar->var);
+       else if (ret == -ENOTSUP)
+               pr_err("Sorry, we don't support this variable location yet.\n");
+       else if (pf->pvar->field) {
                ret = convert_variable_fields(vr_die, pf->pvar->var,
                                              pf->pvar->field, &pf->tvar->ref,
                                              &die_mem);
@@ -722,56 +915,39 @@ static int find_variable(Dwarf_Die *sp_die, struct probe_finder *pf)
        pr_debug("Searching '%s' variable in context.\n",
                 pf->pvar->var);
        /* Search child die for local variables and parameters. */
-       if (die_find_variable(sp_die, pf->pvar->var, &vr_die))
+       if (die_find_variable_at(sp_die, pf->pvar->var, pf->addr, &vr_die))
                ret = convert_variable(&vr_die, pf);
        else {
                /* Search upper class */
                nscopes = dwarf_getscopes_die(sp_die, &scopes);
-               if (nscopes > 0) {
-                       ret = dwarf_getscopevar(scopes, nscopes, pf->pvar->var,
-                                               0, NULL, 0, 0, &vr_die);
-                       if (ret >= 0)
+               while (nscopes-- > 1) {
+                       pr_debug("Searching variables in %s\n",
+                                dwarf_diename(&scopes[nscopes]));
+                       /* We should check this scope, so give dummy address */
+                       if (die_find_variable_at(&scopes[nscopes],
+                                                pf->pvar->var, 0,
+                                                &vr_die)) {
                                ret = convert_variable(&vr_die, pf);
-                       else
-                               ret = -ENOENT;
+                               goto found;
+                       }
+               }
+               if (scopes)
                        free(scopes);
-               } else
-                       ret = -ENOENT;
+               ret = -ENOENT;
        }
+found:
        if (ret < 0)
                pr_warning("Failed to find '%s' in this function.\n",
                           pf->pvar->var);
        return ret;
 }
 
-/* Show a probe point to output buffer */
-static int convert_probe_point(Dwarf_Die *sp_die, struct probe_finder *pf)
+/* Convert subprogram DIE to trace point */
+static int convert_to_trace_point(Dwarf_Die *sp_die, Dwarf_Addr paddr,
+                                 bool retprobe, struct probe_trace_point *tp)
 {
-       struct probe_trace_event *tev;
        Dwarf_Addr eaddr;
-       Dwarf_Die die_mem;
        const char *name;
-       int ret, i;
-       Dwarf_Attribute fb_attr;
-       size_t nops;
-
-       if (pf->ntevs == pf->max_tevs) {
-               pr_warning("Too many( > %d) probe point found.\n",
-                          pf->max_tevs);
-               return -ERANGE;
-       }
-       tev = &pf->tevs[pf->ntevs++];
-
-       /* If no real subprogram, find a real one */
-       if (!sp_die || dwarf_tag(sp_die) != DW_TAG_subprogram) {
-               sp_die = die_find_real_subprogram(&pf->cu_die,
-                                                pf->addr, &die_mem);
-               if (!sp_die) {
-                       pr_warning("Failed to find probe point in any "
-                                  "functions.\n");
-                       return -ENOENT;
-               }
-       }
 
        /* Copy the name of probe point */
        name = dwarf_diename(sp_die);
@@ -781,26 +957,45 @@ static int convert_probe_point(Dwarf_Die *sp_die, struct probe_finder *pf)
                                   dwarf_diename(sp_die));
                        return -ENOENT;
                }
-               tev->point.symbol = strdup(name);
-               if (tev->point.symbol == NULL)
+               tp->symbol = strdup(name);
+               if (tp->symbol == NULL)
                        return -ENOMEM;
-               tev->point.offset = (unsigned long)(pf->addr - eaddr);
+               tp->offset = (unsigned long)(paddr - eaddr);
        } else
                /* This function has no name. */
-               tev->point.offset = (unsigned long)pf->addr;
+               tp->offset = (unsigned long)paddr;
 
        /* Return probe must be on the head of a subprogram */
-       if (pf->pev->point.retprobe) {
-               if (tev->point.offset != 0) {
+       if (retprobe) {
+               if (eaddr != paddr) {
                        pr_warning("Return probe must be on the head of"
                                   " a real function\n");
                        return -EINVAL;
                }
-               tev->point.retprobe = true;
+               tp->retprobe = true;
        }
 
-       pr_debug("Probe point found: %s+%lu\n", tev->point.symbol,
-                tev->point.offset);
+       return 0;
+}
+
+/* Call probe_finder callback with real subprogram DIE */
+static int call_probe_finder(Dwarf_Die *sp_die, struct probe_finder *pf)
+{
+       Dwarf_Die die_mem;
+       Dwarf_Attribute fb_attr;
+       size_t nops;
+       int ret;
+
+       /* If no real subprogram, find a real one */
+       if (!sp_die || dwarf_tag(sp_die) != DW_TAG_subprogram) {
+               sp_die = die_find_real_subprogram(&pf->cu_die,
+                                                 pf->addr, &die_mem);
+               if (!sp_die) {
+                       pr_warning("Failed to find probe point in any "
+                                  "functions.\n");
+                       return -ENOENT;
+               }
+       }
 
        /* Get the frame base attribute/ops */
        dwarf_attr(sp_die, DW_AT_frame_base, &fb_attr);
@@ -820,22 +1015,13 @@ static int convert_probe_point(Dwarf_Die *sp_die, struct probe_finder *pf)
 #endif
        }
 
-       /* Find each argument */
-       tev->nargs = pf->pev->nargs;
-       tev->args = zalloc(sizeof(struct probe_trace_arg) * tev->nargs);
-       if (tev->args == NULL)
-               return -ENOMEM;
-       for (i = 0; i < pf->pev->nargs; i++) {
-               pf->pvar = &pf->pev->args[i];
-               pf->tvar = &tev->args[i];
-               ret = find_variable(sp_die, pf);
-               if (ret != 0)
-                       return ret;
-       }
+       /* Call finder's callback handler */
+       ret = pf->callback(sp_die, pf);
 
        /* *pf->fb_ops will be cached in libdw. Don't free it. */
        pf->fb_ops = NULL;
-       return 0;
+
+       return ret;
 }
 
 /* Find probe point from its line number */
@@ -871,7 +1057,7 @@ static int find_probe_point_by_line(struct probe_finder *pf)
                         (int)i, lineno, (uintmax_t)addr);
                pf->addr = addr;
 
-               ret = convert_probe_point(NULL, pf);
+               ret = call_probe_finder(NULL, pf);
                /* Continuing, because target line might be inlined. */
        }
        return ret;
@@ -984,7 +1170,7 @@ static int find_probe_point_lazy(Dwarf_Die *sp_die, struct probe_finder *pf)
                         (int)i, lineno, (unsigned long long)addr);
                pf->addr = addr;
 
-               ret = convert_probe_point(sp_die, pf);
+               ret = call_probe_finder(sp_die, pf);
                /* Continuing, because target line might be inlined. */
        }
        /* TODO: deallocate lines, but how? */
@@ -1019,7 +1205,7 @@ static int probe_point_inline_cb(Dwarf_Die *in_die, void *data)
                pr_debug("found inline addr: 0x%jx\n",
                         (uintmax_t)pf->addr);
 
-               param->retval = convert_probe_point(in_die, pf);
+               param->retval = call_probe_finder(in_die, pf);
                if (param->retval < 0)
                        return DWARF_CB_ABORT;
        }
@@ -1057,7 +1243,7 @@ static int probe_point_search_cb(Dwarf_Die *sp_die, void *data)
                        }
                        pf->addr += pp->offset;
                        /* TODO: Check the address in this function */
-                       param->retval = convert_probe_point(sp_die, pf);
+                       param->retval = call_probe_finder(sp_die, pf);
                }
        } else {
                struct dwarf_callback_param _param = {.data = (void *)pf,
@@ -1079,90 +1265,276 @@ static int find_probe_point_by_func(struct probe_finder *pf)
        return _param.retval;
 }
 
-/* Find probe_trace_events specified by perf_probe_event from debuginfo */
-int find_probe_trace_events(int fd, struct perf_probe_event *pev,
-                            struct probe_trace_event **tevs, int max_tevs)
+/* Find probe points from debuginfo */
+static int find_probes(int fd, struct probe_finder *pf)
 {
-       struct probe_finder pf = {.pev = pev, .max_tevs = max_tevs};
-       struct perf_probe_point *pp = &pev->point;
+       struct perf_probe_point *pp = &pf->pev->point;
        Dwarf_Off off, noff;
        size_t cuhl;
        Dwarf_Die *diep;
-       Dwarf *dbg;
+       Dwarf *dbg = NULL;
+       Dwfl *dwfl;
+       Dwarf_Addr bias;        /* Currently ignored */
        int ret = 0;
 
-       pf.tevs = zalloc(sizeof(struct probe_trace_event) * max_tevs);
-       if (pf.tevs == NULL)
-               return -ENOMEM;
-       *tevs = pf.tevs;
-       pf.ntevs = 0;
-
-       dbg = dwarf_begin(fd, DWARF_C_READ);
+       dbg = dwfl_init_offline_dwarf(fd, &dwfl, &bias);
        if (!dbg) {
                pr_warning("No dwarf info found in the vmlinux - "
                        "please rebuild with CONFIG_DEBUG_INFO=y.\n");
-               free(pf.tevs);
-               *tevs = NULL;
                return -EBADF;
        }
 
 #if _ELFUTILS_PREREQ(0, 142)
        /* Get the call frame information from this dwarf */
-       pf.cfi = dwarf_getcfi(dbg);
+       pf->cfi = dwarf_getcfi(dbg);
 #endif
 
        off = 0;
-       line_list__init(&pf.lcache);
+       line_list__init(&pf->lcache);
        /* Loop on CUs (Compilation Unit) */
        while (!dwarf_nextcu(dbg, off, &noff, &cuhl, NULL, NULL, NULL) &&
               ret >= 0) {
                /* Get the DIE(Debugging Information Entry) of this CU */
-               diep = dwarf_offdie(dbg, off + cuhl, &pf.cu_die);
+               diep = dwarf_offdie(dbg, off + cuhl, &pf->cu_die);
                if (!diep)
                        continue;
 
                /* Check if target file is included. */
                if (pp->file)
-                       pf.fname = cu_find_realpath(&pf.cu_die, pp->file);
+                       pf->fname = cu_find_realpath(&pf->cu_die, pp->file);
                else
-                       pf.fname = NULL;
+                       pf->fname = NULL;
 
-               if (!pp->file || pf.fname) {
+               if (!pp->file || pf->fname) {
                        if (pp->function)
-                               ret = find_probe_point_by_func(&pf);
+                               ret = find_probe_point_by_func(pf);
                        else if (pp->lazy_line)
-                               ret = find_probe_point_lazy(NULL, &pf);
+                               ret = find_probe_point_lazy(NULL, pf);
                        else {
-                               pf.lno = pp->line;
-                               ret = find_probe_point_by_line(&pf);
+                               pf->lno = pp->line;
+                               ret = find_probe_point_by_line(pf);
                        }
                }
                off = noff;
        }
-       line_list__free(&pf.lcache);
-       dwarf_end(dbg);
+       line_list__free(&pf->lcache);
+       if (dwfl)
+               dwfl_end(dwfl);
 
-       return (ret < 0) ? ret : pf.ntevs;
+       return ret;
+}
+
+/* Add a found probe point into trace event list */
+static int add_probe_trace_event(Dwarf_Die *sp_die, struct probe_finder *pf)
+{
+       struct trace_event_finder *tf =
+                       container_of(pf, struct trace_event_finder, pf);
+       struct probe_trace_event *tev;
+       int ret, i;
+
+       /* Check number of tevs */
+       if (tf->ntevs == tf->max_tevs) {
+               pr_warning("Too many( > %d) probe point found.\n",
+                          tf->max_tevs);
+               return -ERANGE;
+       }
+       tev = &tf->tevs[tf->ntevs++];
+
+       ret = convert_to_trace_point(sp_die, pf->addr, pf->pev->point.retprobe,
+                                    &tev->point);
+       if (ret < 0)
+               return ret;
+
+       pr_debug("Probe point found: %s+%lu\n", tev->point.symbol,
+                tev->point.offset);
+
+       /* Find each argument */
+       tev->nargs = pf->pev->nargs;
+       tev->args = zalloc(sizeof(struct probe_trace_arg) * tev->nargs);
+       if (tev->args == NULL)
+               return -ENOMEM;
+       for (i = 0; i < pf->pev->nargs; i++) {
+               pf->pvar = &pf->pev->args[i];
+               pf->tvar = &tev->args[i];
+               ret = find_variable(sp_die, pf);
+               if (ret != 0)
+                       return ret;
+       }
+
+       return 0;
+}
+
+/* Find probe_trace_events specified by perf_probe_event from debuginfo */
+int find_probe_trace_events(int fd, struct perf_probe_event *pev,
+                           struct probe_trace_event **tevs, int max_tevs)
+{
+       struct trace_event_finder tf = {
+                       .pf = {.pev = pev, .callback = add_probe_trace_event},
+                       .max_tevs = max_tevs};
+       int ret;
+
+       /* Allocate result tevs array */
+       *tevs = zalloc(sizeof(struct probe_trace_event) * max_tevs);
+       if (*tevs == NULL)
+               return -ENOMEM;
+
+       tf.tevs = *tevs;
+       tf.ntevs = 0;
+
+       ret = find_probes(fd, &tf.pf);
+       if (ret < 0) {
+               free(*tevs);
+               *tevs = NULL;
+               return ret;
+       }
+
+       return (ret < 0) ? ret : tf.ntevs;
+}
+
+#define MAX_VAR_LEN 64
+
+/* Collect available variables in this scope */
+static int collect_variables_cb(Dwarf_Die *die_mem, void *data)
+{
+       struct available_var_finder *af = data;
+       struct variable_list *vl;
+       char buf[MAX_VAR_LEN];
+       int tag, ret;
+
+       vl = &af->vls[af->nvls - 1];
+
+       tag = dwarf_tag(die_mem);
+       if (tag == DW_TAG_formal_parameter ||
+           tag == DW_TAG_variable) {
+               ret = convert_variable_location(die_mem, af->pf.addr,
+                                               af->pf.fb_ops, NULL);
+               if (ret == 0) {
+                       ret = die_get_varname(die_mem, buf, MAX_VAR_LEN);
+                       pr_debug2("Add new var: %s\n", buf);
+                       if (ret > 0)
+                               strlist__add(vl->vars, buf);
+               }
+       }
+
+       if (af->child && dwarf_haspc(die_mem, af->pf.addr))
+               return DIE_FIND_CB_CONTINUE;
+       else
+               return DIE_FIND_CB_SIBLING;
+}
+
+/* Add a found vars into available variables list */
+static int add_available_vars(Dwarf_Die *sp_die, struct probe_finder *pf)
+{
+       struct available_var_finder *af =
+                       container_of(pf, struct available_var_finder, pf);
+       struct variable_list *vl;
+       Dwarf_Die die_mem, *scopes = NULL;
+       int ret, nscopes;
+
+       /* Check number of tevs */
+       if (af->nvls == af->max_vls) {
+               pr_warning("Too many( > %d) probe point found.\n", af->max_vls);
+               return -ERANGE;
+       }
+       vl = &af->vls[af->nvls++];
+
+       ret = convert_to_trace_point(sp_die, pf->addr, pf->pev->point.retprobe,
+                                    &vl->point);
+       if (ret < 0)
+               return ret;
+
+       pr_debug("Probe point found: %s+%lu\n", vl->point.symbol,
+                vl->point.offset);
+
+       /* Find local variables */
+       vl->vars = strlist__new(true, NULL);
+       if (vl->vars == NULL)
+               return -ENOMEM;
+       af->child = true;
+       die_find_child(sp_die, collect_variables_cb, (void *)af, &die_mem);
+
+       /* Find external variables */
+       if (!af->externs)
+               goto out;
+       /* Don't need to search child DIE for externs. */
+       af->child = false;
+       nscopes = dwarf_getscopes_die(sp_die, &scopes);
+       while (nscopes-- > 1)
+               die_find_child(&scopes[nscopes], collect_variables_cb,
+                              (void *)af, &die_mem);
+       if (scopes)
+               free(scopes);
+
+out:
+       if (strlist__empty(vl->vars)) {
+               strlist__delete(vl->vars);
+               vl->vars = NULL;
+       }
+
+       return ret;
+}
+
+/* Find available variables at given probe point */
+int find_available_vars_at(int fd, struct perf_probe_event *pev,
+                          struct variable_list **vls, int max_vls,
+                          bool externs)
+{
+       struct available_var_finder af = {
+                       .pf = {.pev = pev, .callback = add_available_vars},
+                       .max_vls = max_vls, .externs = externs};
+       int ret;
+
+       /* Allocate result vls array */
+       *vls = zalloc(sizeof(struct variable_list) * max_vls);
+       if (*vls == NULL)
+               return -ENOMEM;
+
+       af.vls = *vls;
+       af.nvls = 0;
+
+       ret = find_probes(fd, &af.pf);
+       if (ret < 0) {
+               /* Free vlist for error */
+               while (af.nvls--) {
+                       if (af.vls[af.nvls].point.symbol)
+                               free(af.vls[af.nvls].point.symbol);
+                       if (af.vls[af.nvls].vars)
+                               strlist__delete(af.vls[af.nvls].vars);
+               }
+               free(af.vls);
+               *vls = NULL;
+               return ret;
+       }
+
+       return (ret < 0) ? ret : af.nvls;
 }
 
 /* Reverse search */
-int find_perf_probe_point(int fd, unsigned long addr,
-                         struct perf_probe_point *ppt)
+int find_perf_probe_point(unsigned long addr, struct perf_probe_point *ppt)
 {
        Dwarf_Die cudie, spdie, indie;
-       Dwarf *dbg;
+       Dwarf *dbg = NULL;
+       Dwfl *dwfl = NULL;
        Dwarf_Line *line;
-       Dwarf_Addr laddr, eaddr;
+       Dwarf_Addr laddr, eaddr, bias = 0;
        const char *tmp;
        int lineno, ret = 0;
        bool found = false;
 
-       dbg = dwarf_begin(fd, DWARF_C_READ);
-       if (!dbg)
-               return -EBADF;
+       /* Open the live linux kernel */
+       dbg = dwfl_init_live_kernel_dwarf(addr, &dwfl, &bias);
+       if (!dbg) {
+               pr_warning("No dwarf info found in the vmlinux - "
+                       "please rebuild with CONFIG_DEBUG_INFO=y.\n");
+               ret = -EINVAL;
+               goto end;
+       }
 
+       /* Adjust address with bias */
+       addr += bias;
        /* Find cu die */
-       if (!dwarf_addrdie(dbg, (Dwarf_Addr)addr, &cudie)) {
+       if (!dwarf_addrdie(dbg, (Dwarf_Addr)addr - bias, &cudie)) {
+               pr_warning("No CU DIE is found at %lx\n", addr);
                ret = -EINVAL;
                goto end;
        }
@@ -1225,7 +1597,8 @@ found:
        }
 
 end:
-       dwarf_end(dbg);
+       if (dwfl)
+               dwfl_end(dwfl);
        if (ret >= 0)
                ret = found ? 1 : 0;
        return ret;
@@ -1358,6 +1731,9 @@ static int line_range_search_cb(Dwarf_Die *sp_die, void *data)
        struct line_finder *lf = param->data;
        struct line_range *lr = lf->lr;
 
+       pr_debug("find (%llx) %s\n",
+                (unsigned long long)dwarf_dieoffset(sp_die),
+                dwarf_diename(sp_die));
        if (dwarf_tag(sp_die) == DW_TAG_subprogram &&
            die_compare_name(sp_die, lr->function)) {
                lf->fname = dwarf_decl_file(sp_die);
@@ -1401,10 +1777,12 @@ int find_line_range(int fd, struct line_range *lr)
        Dwarf_Off off = 0, noff;
        size_t cuhl;
        Dwarf_Die *diep;
-       Dwarf *dbg;
+       Dwarf *dbg = NULL;
+       Dwfl *dwfl;
+       Dwarf_Addr bias;        /* Currently ignored */
        const char *comp_dir;
 
-       dbg = dwarf_begin(fd, DWARF_C_READ);
+       dbg = dwfl_init_offline_dwarf(fd, &dwfl, &bias);
        if (!dbg) {
                pr_warning("No dwarf info found in the vmlinux - "
                        "please rebuild with CONFIG_DEBUG_INFO=y.\n");
@@ -1450,8 +1828,7 @@ int find_line_range(int fd, struct line_range *lr)
        }
 
        pr_debug("path: %s\n", lr->path);
-       dwarf_end(dbg);
-
+       dwfl_end(dwfl);
        return (ret < 0) ? ret : lf.found;
 }
 
index 4507d51..bba69d4 100644 (file)
@@ -22,20 +22,27 @@ extern int find_probe_trace_events(int fd, struct perf_probe_event *pev,
                                    int max_tevs);
 
 /* Find a perf_probe_point from debuginfo */
-extern int find_perf_probe_point(int fd, unsigned long addr,
+extern int find_perf_probe_point(unsigned long addr,
                                 struct perf_probe_point *ppt);
 
+/* Find a line range */
 extern int find_line_range(int fd, struct line_range *lr);
 
+/* Find available variables */
+extern int find_available_vars_at(int fd, struct perf_probe_event *pev,
+                                 struct variable_list **vls, int max_points,
+                                 bool externs);
+
 #include <dwarf.h>
 #include <libdw.h>
+#include <libdwfl.h>
 #include <version.h>
 
 struct probe_finder {
        struct perf_probe_event *pev;           /* Target probe event */
-       struct probe_trace_event *tevs;         /* Result trace events */
-       int                     ntevs;          /* Number of trace events */
-       int                     max_tevs;       /* Max number of trace events */
+
+       /* Callback when a probe point is found */
+       int (*callback)(Dwarf_Die *sp_die, struct probe_finder *pf);
 
        /* For function searching */
        int                     lno;            /* Line number */
@@ -53,6 +60,22 @@ struct probe_finder {
        struct probe_trace_arg  *tvar;          /* Current result variable */
 };
 
+struct trace_event_finder {
+       struct probe_finder     pf;
+       struct probe_trace_event *tevs;         /* Found trace events */
+       int                     ntevs;          /* Number of trace events */
+       int                     max_tevs;       /* Max number of trace events */
+};
+
+struct available_var_finder {
+       struct probe_finder     pf;
+       struct variable_list    *vls;           /* Found variable lists */
+       int                     nvls;           /* Number of variable lists */
+       int                     max_vls;        /* Max no. of variable lists */
+       bool                    externs;        /* Find external vars too */
+       bool                    child;          /* Search child scopes */
+};
+
 struct line_finder {
        struct line_range       *lr;            /* Target line range */
 
index 6d0df80..8bc010e 100644 (file)
@@ -1,4 +1,3 @@
-#include <slang.h>
 #include "libslang.h"
 #include <linux/compiler.h>
 #include <linux/list.h>