perf, x86: Improve the PEBS ABI
Peter Zijlstra [Thu, 8 Apr 2010 21:03:20 +0000 (23:03 +0200)]
Rename perf_event_attr::precise to perf_event_attr::precise_ip and
widen it to 2 bits. This new field describes the required precision of
the PERF_SAMPLE_IP field:

  0 - SAMPLE_IP can have arbitrary skid
  1 - SAMPLE_IP must have constant skid
  2 - SAMPLE_IP requested to have 0 skid
  3 - SAMPLE_IP must have 0 skid

And modify the Intel PEBS code accordingly. The PEBS implementation
now supports up to precise_ip == 2, where we perform the IP fixup.

Also s/PERF_RECORD_MISC_EXACT/&_IP/ to clarify its meaning, this bit
should be set for each PERF_SAMPLE_IP field known to match the actual
instruction triggering the event.

This new scheme allows for a PEBS mode that uses the buffer for more
than a single event.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Stephane Eranian <eranian@google.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

arch/x86/kernel/cpu/perf_event.c
arch/x86/kernel/cpu/perf_event_intel.c
arch/x86/kernel/cpu/perf_event_intel_ds.c
include/linux/perf_event.h
tools/perf/builtin-top.c
tools/perf/util/parse-events.c

index 4a3f1f2..27fa9ee 100644 (file)
@@ -488,6 +488,21 @@ static int x86_setup_perfctr(struct perf_event *event)
 
 static int x86_pmu_hw_config(struct perf_event *event)
 {
+       if (event->attr.precise_ip) {
+               int precise = 0;
+
+               /* Support for constant skid */
+               if (x86_pmu.pebs)
+                       precise++;
+
+               /* Support for IP fixup */
+               if (x86_pmu.lbr_nr)
+                       precise++;
+
+               if (event->attr.precise_ip > precise)
+                       return -EOPNOTSUPP;
+       }
+
        /*
         * Generate PMC IRQs:
         * (keep 'enabled' bit clear for now)
@@ -1780,7 +1795,7 @@ unsigned long perf_misc_flags(struct pt_regs *regs)
        }
 
        if (regs->flags & PERF_EFLAGS_EXACT)
-               misc |= PERF_RECORD_MISC_EXACT;
+               misc |= PERF_RECORD_MISC_EXACT_IP;
 
        return misc;
 }
index a4b56ac..fdbc652 100644 (file)
@@ -563,7 +563,7 @@ static void intel_pmu_disable_event(struct perf_event *event)
 
        x86_pmu_disable_event(event);
 
-       if (unlikely(event->attr.precise))
+       if (unlikely(event->attr.precise_ip))
                intel_pmu_pebs_disable(event);
 }
 
@@ -615,7 +615,7 @@ static void intel_pmu_enable_event(struct perf_event *event)
                return;
        }
 
-       if (unlikely(event->attr.precise))
+       if (unlikely(event->attr.precise_ip))
                intel_pmu_pebs_enable(event);
 
        __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE);
index 35056f7..18018d1 100644 (file)
@@ -307,7 +307,7 @@ intel_pebs_constraints(struct perf_event *event)
 {
        struct event_constraint *c;
 
-       if (!event->attr.precise)
+       if (!event->attr.precise_ip)
                return NULL;
 
        if (x86_pmu.pebs_constraints) {
@@ -330,7 +330,7 @@ static void intel_pmu_pebs_enable(struct perf_event *event)
        cpuc->pebs_enabled |= 1ULL << hwc->idx;
        WARN_ON_ONCE(cpuc->enabled);
 
-       if (x86_pmu.intel_cap.pebs_trap)
+       if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1)
                intel_pmu_lbr_enable(event);
 }
 
@@ -345,7 +345,7 @@ static void intel_pmu_pebs_disable(struct perf_event *event)
 
        hwc->config |= ARCH_PERFMON_EVENTSEL_INT;
 
-       if (x86_pmu.intel_cap.pebs_trap)
+       if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1)
                intel_pmu_lbr_disable(event);
 }
 
@@ -485,7 +485,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
        regs.bp = pebs->bp;
        regs.sp = pebs->sp;
 
-       if (intel_pmu_pebs_fixup_ip(regs))
+       if (event->attr.precise_ip > 1 && intel_pmu_pebs_fixup_ip(&regs))
                regs.flags |= PERF_EFLAGS_EXACT;
        else
                regs.flags &= ~PERF_EFLAGS_EXACT;
@@ -518,7 +518,7 @@ static void intel_pmu_drain_pebs_core(struct pt_regs *iregs)
 
        WARN_ON_ONCE(!event);
 
-       if (!event->attr.precise)
+       if (!event->attr.precise_ip)
                return;
 
        n = top - at;
@@ -570,7 +570,7 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
 
                        WARN_ON_ONCE(!event);
 
-                       if (!event->attr.precise)
+                       if (!event->attr.precise_ip)
                                continue;
 
                        if (__test_and_set_bit(bit, (unsigned long *)&status))
index 6be4a0f..23cd005 100644 (file)
@@ -203,9 +203,19 @@ struct perf_event_attr {
                                enable_on_exec :  1, /* next exec enables     */
                                task           :  1, /* trace fork/exit       */
                                watermark      :  1, /* wakeup_watermark      */
-                               precise        :  1, /* OoO invariant counter */
-
-                               __reserved_1   : 48;
+                               /*
+                                * precise_ip:
+                                *
+                                *  0 - SAMPLE_IP can have arbitrary skid
+                                *  1 - SAMPLE_IP must have constant skid
+                                *  2 - SAMPLE_IP requested to have 0 skid
+                                *  3 - SAMPLE_IP must have 0 skid
+                                *
+                                *  See also PERF_RECORD_MISC_EXACT_IP
+                                */
+                               precise_ip     :  2, /* skid constraint       */
+
+                               __reserved_1   : 47;
 
        union {
                __u32           wakeup_events;    /* wakeup every n events */
@@ -296,7 +306,12 @@ struct perf_event_mmap_page {
 #define PERF_RECORD_MISC_GUEST_KERNEL          (4 << 0)
 #define PERF_RECORD_MISC_GUEST_USER            (5 << 0)
 
-#define PERF_RECORD_MISC_EXACT                 (1 << 14)
+/*
+ * Indicates that the content of PERF_SAMPLE_IP points to
+ * the actual instruction that triggered the event. See also
+ * perf_event_attr::precise_ip.
+ */
+#define PERF_RECORD_MISC_EXACT_IP              (1 << 14)
 /*
  * Reserve the last bit to indicate some extended misc field
  */
index 3de3977..ed9b5b6 100644 (file)
@@ -1021,7 +1021,7 @@ static void event__process_sample(const event_t *self,
                return;
        }
 
-       if (self->header.misc & PERF_RECORD_MISC_EXACT)
+       if (self->header.misc & PERF_RECORD_MISC_EXACT_IP)
                exact_samples++;
 
        if (event__preprocess_sample(self, session, &al, symbol_filter) < 0 ||
index bc8b7e6..ae7f591 100644 (file)
@@ -654,10 +654,6 @@ parse_raw_event(const char **strp, struct perf_event_attr *attr)
                return EVT_FAILED;
        n = hex2u64(str + 1, &config);
        if (n > 0) {
-               if (str[n+1] == 'p') {
-                       attr->precise = 1;
-                       n++;
-               }
                *strp = str + n + 1;
                attr->type = PERF_TYPE_RAW;
                attr->config = config;
@@ -692,19 +688,29 @@ static enum event_result
 parse_event_modifier(const char **strp, struct perf_event_attr *attr)
 {
        const char *str = *strp;
-       int eu = 1, ek = 1, eh = 1;
+       int exclude = 0;
+       int eu = 0, ek = 0, eh = 0, precise = 0;
 
        if (*str++ != ':')
                return 0;
        while (*str) {
-               if (*str == 'u')
+               if (*str == 'u') {
+                       if (!exclude)
+                               exclude = eu = ek = eh = 1;
                        eu = 0;
-               else if (*str == 'k')
+               } else if (*str == 'k') {
+                       if (!exclude)
+                               exclude = eu = ek = eh = 1;
                        ek = 0;
-               else if (*str == 'h')
+               } else if (*str == 'h') {
+                       if (!exclude)
+                               exclude = eu = ek = eh = 1;
                        eh = 0;
-               else
+               } else if (*str == 'p') {
+                       precise++;
+               } else
                        break;
+
                ++str;
        }
        if (str >= *strp + 2) {
@@ -712,6 +718,7 @@ parse_event_modifier(const char **strp, struct perf_event_attr *attr)
                attr->exclude_user   = eu;
                attr->exclude_kernel = ek;
                attr->exclude_hv     = eh;
+               attr->precise_ip     = precise;
                return 1;
        }
        return 0;