Merge branch 'perf/fast' into perf/core
Ingo Molnar [Fri, 27 Jan 2012 11:07:57 +0000 (12:07 +0100)]
Merge reason: Lets ready it for v3.4

Signed-off-by: Ingo Molnar <mingo@elte.hu>

13 files changed:
arch/arm/include/asm/perf_event.h
arch/frv/include/asm/perf_event.h
arch/hexagon/include/asm/perf_event.h
arch/powerpc/include/asm/perf_event_server.h
arch/powerpc/kernel/perf_event.c
arch/s390/include/asm/perf_event.h
arch/x86/include/asm/perf_event.h
arch/x86/kernel/cpu/perf_event.c
arch/x86/kernel/cpu/perf_event.h
include/linux/perf_event.h
kernel/events/core.c
kernel/events/hw_breakpoint.c
tools/perf/builtin-test.c

index 99cfe36..7523340 100644 (file)
 #ifndef __ARM_PERF_EVENT_H__
 #define __ARM_PERF_EVENT_H__
 
-/* ARM performance counters start from 1 (in the cp15 accesses) so use the
- * same indexes here for consistency. */
-#define PERF_EVENT_INDEX_OFFSET 1
-
 /* ARM perf PMU IDs for use by internal perf clients. */
 enum arm_perf_pmu_ids {
        ARM_PERF_PMU_ID_XSCALE1 = 0,
index a69e015..c52ea55 100644 (file)
@@ -12,6 +12,4 @@
 #ifndef _ASM_PERF_EVENT_H
 #define _ASM_PERF_EVENT_H
 
-#define PERF_EVENT_INDEX_OFFSET        0
-
 #endif /* _ASM_PERF_EVENT_H */
index 6c2910f..8b8526b 100644 (file)
@@ -19,6 +19,4 @@
 #ifndef _ASM_PERF_EVENT_H
 #define _ASM_PERF_EVENT_H
 
-#define PERF_EVENT_INDEX_OFFSET        0
-
 #endif /* _ASM_PERF_EVENT_H */
index 8f1df12..1a8093f 100644 (file)
@@ -61,8 +61,6 @@ struct pt_regs;
 extern unsigned long perf_misc_flags(struct pt_regs *regs);
 extern unsigned long perf_instruction_pointer(struct pt_regs *regs);
 
-#define PERF_EVENT_INDEX_OFFSET        1
-
 /*
  * Only override the default definitions in include/linux/perf_event.h
  * if we have hardware PMU support.
index 10a140f..d614ab5 100644 (file)
@@ -1187,6 +1187,11 @@ static int power_pmu_event_init(struct perf_event *event)
        return err;
 }
 
+static int power_pmu_event_idx(struct perf_event *event)
+{
+       return event->hw.idx;
+}
+
 struct pmu power_pmu = {
        .pmu_enable     = power_pmu_enable,
        .pmu_disable    = power_pmu_disable,
@@ -1199,6 +1204,7 @@ struct pmu power_pmu = {
        .start_txn      = power_pmu_start_txn,
        .cancel_txn     = power_pmu_cancel_txn,
        .commit_txn     = power_pmu_commit_txn,
+       .event_idx      = power_pmu_event_idx,
 };
 
 /*
index a75f168..4eb444e 100644 (file)
@@ -6,4 +6,3 @@
 
 /* Empty, just to avoid compiling error */
 
-#define PERF_EVENT_INDEX_OFFSET 0
index 096c975..9b922c1 100644 (file)
@@ -188,8 +188,6 @@ extern u32 get_ibs_caps(void);
 #ifdef CONFIG_PERF_EVENTS
 extern void perf_events_lapic_init(void);
 
-#define PERF_EVENT_INDEX_OFFSET                        0
-
 /*
  * Abuse bit 3 of the cpu eflags register to indicate proper PEBS IP fixups.
  * This flag is otherwise unused and ABI specified to be 0, so nobody should
index 5adce10..f8bddb5 100644 (file)
@@ -24,6 +24,7 @@
 #include <linux/slab.h>
 #include <linux/cpu.h>
 #include <linux/bitops.h>
+#include <linux/device.h>
 
 #include <asm/apic.h>
 #include <asm/stacktrace.h>
@@ -31,6 +32,7 @@
 #include <asm/compat.h>
 #include <asm/smp.h>
 #include <asm/alternative.h>
+#include <asm/timer.h>
 
 #include "perf_event.h"
 
@@ -1210,6 +1212,8 @@ x86_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
                break;
 
        case CPU_STARTING:
+               if (x86_pmu.attr_rdpmc)
+                       set_in_cr4(X86_CR4_PCE);
                if (x86_pmu.cpu_starting)
                        x86_pmu.cpu_starting(cpu);
                break;
@@ -1319,6 +1323,8 @@ static int __init init_hw_perf_events(void)
                }
        }
 
+       x86_pmu.attr_rdpmc = 1; /* enable userspace RDPMC usage by default */
+
        pr_info("... version:                %d\n",     x86_pmu.version);
        pr_info("... bit width:              %d\n",     x86_pmu.cntval_bits);
        pr_info("... generic registers:      %d\n",     x86_pmu.num_counters);
@@ -1542,10 +1548,71 @@ static int x86_pmu_event_init(struct perf_event *event)
        return err;
 }
 
+static int x86_pmu_event_idx(struct perf_event *event)
+{
+       int idx = event->hw.idx;
+
+       if (x86_pmu.num_counters_fixed && idx >= X86_PMC_IDX_FIXED) {
+               idx -= X86_PMC_IDX_FIXED;
+               idx |= 1 << 30;
+       }
+
+       return idx + 1;
+}
+
+static ssize_t get_attr_rdpmc(struct device *cdev,
+                             struct device_attribute *attr,
+                             char *buf)
+{
+       return snprintf(buf, 40, "%d\n", x86_pmu.attr_rdpmc);
+}
+
+static void change_rdpmc(void *info)
+{
+       bool enable = !!(unsigned long)info;
+
+       if (enable)
+               set_in_cr4(X86_CR4_PCE);
+       else
+               clear_in_cr4(X86_CR4_PCE);
+}
+
+static ssize_t set_attr_rdpmc(struct device *cdev,
+                             struct device_attribute *attr,
+                             const char *buf, size_t count)
+{
+       unsigned long val = simple_strtoul(buf, NULL, 0);
+
+       if (!!val != !!x86_pmu.attr_rdpmc) {
+               x86_pmu.attr_rdpmc = !!val;
+               smp_call_function(change_rdpmc, (void *)val, 1);
+       }
+
+       return count;
+}
+
+static DEVICE_ATTR(rdpmc, S_IRUSR | S_IWUSR, get_attr_rdpmc, set_attr_rdpmc);
+
+static struct attribute *x86_pmu_attrs[] = {
+       &dev_attr_rdpmc.attr,
+       NULL,
+};
+
+static struct attribute_group x86_pmu_attr_group = {
+       .attrs = x86_pmu_attrs,
+};
+
+static const struct attribute_group *x86_pmu_attr_groups[] = {
+       &x86_pmu_attr_group,
+       NULL,
+};
+
 static struct pmu pmu = {
        .pmu_enable     = x86_pmu_enable,
        .pmu_disable    = x86_pmu_disable,
 
+       .attr_groups    = x86_pmu_attr_groups,
+
        .event_init     = x86_pmu_event_init,
 
        .add            = x86_pmu_add,
@@ -1557,8 +1624,23 @@ static struct pmu pmu = {
        .start_txn      = x86_pmu_start_txn,
        .cancel_txn     = x86_pmu_cancel_txn,
        .commit_txn     = x86_pmu_commit_txn,
+
+       .event_idx      = x86_pmu_event_idx,
 };
 
+void perf_update_user_clock(struct perf_event_mmap_page *userpg, u64 now)
+{
+       if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
+               return;
+
+       if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC))
+               return;
+
+       userpg->time_mult = this_cpu_read(cyc2ns);
+       userpg->time_shift = CYC2NS_SCALE_FACTOR;
+       userpg->time_offset = this_cpu_read(cyc2ns_offset) - now;
+}
+
 /*
  * callchain support
  */
index 8944062..513d617 100644 (file)
@@ -307,6 +307,14 @@ struct x86_pmu {
        struct x86_pmu_quirk *quirks;
        int             perfctr_second_write;
 
+       /*
+        * sysfs attrs
+        */
+       int             attr_rdpmc;
+
+       /*
+        * CPU Hotplug hooks
+        */
        int             (*cpu_prepare)(int cpu);
        void            (*cpu_starting)(int cpu);
        void            (*cpu_dying)(int cpu);
index 0885561..0b91db2 100644 (file)
@@ -291,12 +291,14 @@ struct perf_event_mmap_page {
        __s64   offset;                 /* add to hardware event value */
        __u64   time_enabled;           /* time event active */
        __u64   time_running;           /* time event on cpu */
+       __u32   time_mult, time_shift;
+       __u64   time_offset;
 
                /*
                 * Hole for extension of the self monitor capabilities
                 */
 
-       __u64   __reserved[123];        /* align to 1k */
+       __u64   __reserved[121];        /* align to 1k */
 
        /*
         * Control data for the mmap() data buffer.
@@ -615,6 +617,7 @@ struct pmu {
        struct list_head                entry;
 
        struct device                   *dev;
+       const struct attribute_group    **attr_groups;
        char                            *name;
        int                             type;
 
@@ -680,6 +683,12 @@ struct pmu {
         * for each successful ->add() during the transaction.
         */
        void (*cancel_txn)              (struct pmu *pmu); /* optional */
+
+       /*
+        * Will return the value for perf_event_mmap_page::index for this event,
+        * if no implementation is provided it will default to: event->hw.idx + 1.
+        */
+       int (*event_idx)                (struct perf_event *event); /*optional */
 };
 
 /**
index 32b48c8..de859fb 100644 (file)
@@ -3208,10 +3208,6 @@ int perf_event_task_disable(void)
        return 0;
 }
 
-#ifndef PERF_EVENT_INDEX_OFFSET
-# define PERF_EVENT_INDEX_OFFSET 0
-#endif
-
 static int perf_event_index(struct perf_event *event)
 {
        if (event->hw.state & PERF_HES_STOPPED)
@@ -3220,21 +3216,26 @@ static int perf_event_index(struct perf_event *event)
        if (event->state != PERF_EVENT_STATE_ACTIVE)
                return 0;
 
-       return event->hw.idx + 1 - PERF_EVENT_INDEX_OFFSET;
+       return event->pmu->event_idx(event);
 }
 
 static void calc_timer_values(struct perf_event *event,
+                               u64 *now,
                                u64 *enabled,
                                u64 *running)
 {
-       u64 now, ctx_time;
+       u64 ctx_time;
 
-       now = perf_clock();
-       ctx_time = event->shadow_ctx_time + now;
+       *now = perf_clock();
+       ctx_time = event->shadow_ctx_time + *now;
        *enabled = ctx_time - event->tstamp_enabled;
        *running = ctx_time - event->tstamp_running;
 }
 
+void __weak perf_update_user_clock(struct perf_event_mmap_page *userpg, u64 now)
+{
+}
+
 /*
  * Callers need to ensure there can be no nesting of this function, otherwise
  * the seqlock logic goes bad. We can not serialize this because the arch
@@ -3244,7 +3245,7 @@ void perf_event_update_userpage(struct perf_event *event)
 {
        struct perf_event_mmap_page *userpg;
        struct ring_buffer *rb;
-       u64 enabled, running;
+       u64 enabled, running, now;
 
        rcu_read_lock();
        /*
@@ -3256,7 +3257,7 @@ void perf_event_update_userpage(struct perf_event *event)
         * because of locking issue as we can be called in
         * NMI context
         */
-       calc_timer_values(event, &enabled, &running);
+       calc_timer_values(event, &now, &enabled, &running);
        rb = rcu_dereference(event->rb);
        if (!rb)
                goto unlock;
@@ -3272,7 +3273,7 @@ void perf_event_update_userpage(struct perf_event *event)
        barrier();
        userpg->index = perf_event_index(event);
        userpg->offset = perf_event_count(event);
-       if (event->state == PERF_EVENT_STATE_ACTIVE)
+       if (userpg->index)
                userpg->offset -= local64_read(&event->hw.prev_count);
 
        userpg->time_enabled = enabled +
@@ -3281,6 +3282,8 @@ void perf_event_update_userpage(struct perf_event *event)
        userpg->time_running = running +
                        atomic64_read(&event->child_total_time_running);
 
+       perf_update_user_clock(userpg, now);
+
        barrier();
        ++userpg->lock;
        preempt_enable();
@@ -3538,6 +3541,8 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
        event->mmap_user = get_current_user();
        vma->vm_mm->pinned_vm += event->mmap_locked;
 
+       perf_event_update_userpage(event);
+
 unlock:
        if (!ret)
                atomic_inc(&event->mmap_count);
@@ -3769,7 +3774,7 @@ static void perf_output_read_group(struct perf_output_handle *handle,
 static void perf_output_read(struct perf_output_handle *handle,
                             struct perf_event *event)
 {
-       u64 enabled = 0, running = 0;
+       u64 enabled = 0, running = 0, now;
        u64 read_format = event->attr.read_format;
 
        /*
@@ -3782,7 +3787,7 @@ static void perf_output_read(struct perf_output_handle *handle,
         * NMI context
         */
        if (read_format & PERF_FORMAT_TOTAL_TIMES)
-               calc_timer_values(event, &enabled, &running);
+               calc_timer_values(event, &now, &enabled, &running);
 
        if (event->attr.read_format & PERF_FORMAT_GROUP)
                perf_output_read_group(handle, event, enabled, running);
@@ -4994,6 +4999,11 @@ static int perf_swevent_init(struct perf_event *event)
        return 0;
 }
 
+static int perf_swevent_event_idx(struct perf_event *event)
+{
+       return 0;
+}
+
 static struct pmu perf_swevent = {
        .task_ctx_nr    = perf_sw_context,
 
@@ -5003,6 +5013,8 @@ static struct pmu perf_swevent = {
        .start          = perf_swevent_start,
        .stop           = perf_swevent_stop,
        .read           = perf_swevent_read,
+
+       .event_idx      = perf_swevent_event_idx,
 };
 
 #ifdef CONFIG_EVENT_TRACING
@@ -5089,6 +5101,8 @@ static struct pmu perf_tracepoint = {
        .start          = perf_swevent_start,
        .stop           = perf_swevent_stop,
        .read           = perf_swevent_read,
+
+       .event_idx      = perf_swevent_event_idx,
 };
 
 static inline void perf_tp_register(void)
@@ -5308,6 +5322,8 @@ static struct pmu perf_cpu_clock = {
        .start          = cpu_clock_event_start,
        .stop           = cpu_clock_event_stop,
        .read           = cpu_clock_event_read,
+
+       .event_idx      = perf_swevent_event_idx,
 };
 
 /*
@@ -5380,6 +5396,8 @@ static struct pmu perf_task_clock = {
        .start          = task_clock_event_start,
        .stop           = task_clock_event_stop,
        .read           = task_clock_event_read,
+
+       .event_idx      = perf_swevent_event_idx,
 };
 
 static void perf_pmu_nop_void(struct pmu *pmu)
@@ -5407,6 +5425,11 @@ static void perf_pmu_cancel_txn(struct pmu *pmu)
        perf_pmu_enable(pmu);
 }
 
+static int perf_event_idx_default(struct perf_event *event)
+{
+       return event->hw.idx + 1;
+}
+
 /*
  * Ensures all contexts with the same task_ctx_nr have the same
  * pmu_cpu_context too.
@@ -5493,6 +5516,7 @@ static int pmu_dev_alloc(struct pmu *pmu)
        if (!pmu->dev)
                goto out;
 
+       pmu->dev->groups = pmu->attr_groups;
        device_initialize(pmu->dev);
        ret = dev_set_name(pmu->dev, "%s", pmu->name);
        if (ret)
@@ -5596,6 +5620,9 @@ got_cpu_context:
                pmu->pmu_disable = perf_pmu_nop_void;
        }
 
+       if (!pmu->event_idx)
+               pmu->event_idx = perf_event_idx_default;
+
        list_add_rcu(&pmu->entry, &pmus);
        ret = 0;
 unlock:
index b7971d6..b0309f7 100644 (file)
@@ -613,6 +613,11 @@ static void hw_breakpoint_stop(struct perf_event *bp, int flags)
        bp->hw.state = PERF_HES_STOPPED;
 }
 
+static int hw_breakpoint_event_idx(struct perf_event *bp)
+{
+       return 0;
+}
+
 static struct pmu perf_breakpoint = {
        .task_ctx_nr    = perf_sw_context, /* could eventually get its own */
 
@@ -622,6 +627,8 @@ static struct pmu perf_breakpoint = {
        .start          = hw_breakpoint_start,
        .stop           = hw_breakpoint_stop,
        .read           = hw_breakpoint_pmu_read,
+
+       .event_idx      = hw_breakpoint_event_idx,
 };
 
 int __init init_hw_breakpoint(void)
index 3ce709e..70c4eb2 100644 (file)
@@ -15,6 +15,8 @@
 #include "util/thread_map.h"
 #include "../../include/linux/hw_breakpoint.h"
 
+#include <sys/mman.h>
+
 static int vmlinux_matches_kallsyms_filter(struct map *map __used, struct symbol *sym)
 {
        bool *visited = symbol__priv(sym);
@@ -1296,6 +1298,173 @@ out:
        return (err < 0 || errs > 0) ? -1 : 0;
 }
 
+
+#if defined(__x86_64__) || defined(__i386__)
+
+#define barrier() asm volatile("" ::: "memory")
+
+static u64 rdpmc(unsigned int counter)
+{
+       unsigned int low, high;
+
+       asm volatile("rdpmc" : "=a" (low), "=d" (high) : "c" (counter));
+
+       return low | ((u64)high) << 32;
+}
+
+static u64 rdtsc(void)
+{
+       unsigned int low, high;
+
+       asm volatile("rdtsc" : "=a" (low), "=d" (high));
+
+       return low | ((u64)high) << 32;
+}
+
+static u64 mmap_read_self(void *addr)
+{
+       struct perf_event_mmap_page *pc = addr;
+       u32 seq, idx, time_mult = 0, time_shift = 0;
+       u64 count, cyc = 0, time_offset = 0, enabled, running, delta;
+
+       do {
+               seq = pc->lock;
+               barrier();
+
+               enabled = pc->time_enabled;
+               running = pc->time_running;
+
+               if (enabled != running) {
+                       cyc = rdtsc();
+                       time_mult = pc->time_mult;
+                       time_shift = pc->time_shift;
+                       time_offset = pc->time_offset;
+               }
+
+               idx = pc->index;
+               count = pc->offset;
+               if (idx)
+                       count += rdpmc(idx - 1);
+
+               barrier();
+       } while (pc->lock != seq);
+
+       if (enabled != running) {
+               u64 quot, rem;
+
+               quot = (cyc >> time_shift);
+               rem = cyc & ((1 << time_shift) - 1);
+               delta = time_offset + quot * time_mult +
+                       ((rem * time_mult) >> time_shift);
+
+               enabled += delta;
+               if (idx)
+                       running += delta;
+
+               quot = count / running;
+               rem = count % running;
+               count = quot * enabled + (rem * enabled) / running;
+       }
+
+       return count;
+}
+
+/*
+ * If the RDPMC instruction faults then signal this back to the test parent task:
+ */
+static void segfault_handler(int sig __used, siginfo_t *info __used, void *uc __used)
+{
+       exit(-1);
+}
+
+static int __test__rdpmc(void)
+{
+       long page_size = sysconf(_SC_PAGE_SIZE);
+       volatile int tmp = 0;
+       u64 i, loops = 1000;
+       int n;
+       int fd;
+       void *addr;
+       struct perf_event_attr attr = {
+               .type = PERF_TYPE_HARDWARE,
+               .config = PERF_COUNT_HW_INSTRUCTIONS,
+               .exclude_kernel = 1,
+       };
+       u64 delta_sum = 0;
+        struct sigaction sa;
+
+       sigfillset(&sa.sa_mask);
+       sa.sa_sigaction = segfault_handler;
+       sigaction(SIGSEGV, &sa, NULL);
+
+       fprintf(stderr, "\n\n");
+
+       fd = sys_perf_event_open(&attr, 0, -1, -1, 0);
+       if (fd < 0) {
+               die("Error: sys_perf_event_open() syscall returned "
+                   "with %d (%s)\n", fd, strerror(errno));
+       }
+
+       addr = mmap(NULL, page_size, PROT_READ, MAP_SHARED, fd, 0);
+       if (addr == (void *)(-1)) {
+               die("Error: mmap() syscall returned "
+                   "with (%s)\n", strerror(errno));
+       }
+
+       for (n = 0; n < 6; n++) {
+               u64 stamp, now, delta;
+
+               stamp = mmap_read_self(addr);
+
+               for (i = 0; i < loops; i++)
+                       tmp++;
+
+               now = mmap_read_self(addr);
+               loops *= 10;
+
+               delta = now - stamp;
+               fprintf(stderr, "%14d: %14Lu\n", n, (long long)delta);
+
+               delta_sum += delta;
+       }
+
+       munmap(addr, page_size);
+       close(fd);
+
+       fprintf(stderr, "   ");
+
+       if (!delta_sum)
+               return -1;
+
+       return 0;
+}
+
+static int test__rdpmc(void)
+{
+       int status = 0;
+       int wret = 0;
+       int ret;
+       int pid;
+
+       pid = fork();
+       if (pid < 0)
+               return -1;
+
+       if (!pid) {
+               ret = __test__rdpmc();
+
+               exit(ret);
+       }
+
+       wret = waitpid(pid, &status, 0);
+       if (wret < 0 || status)
+               return -1;
+
+       return 0;
+}
+
+#endif
+
 static struct test {
        const char *desc;
        int (*func)(void);
@@ -1320,6 +1489,12 @@ static struct test {
                .desc = "parse events tests",
                .func = test__parse_events,
        },
+#if defined(__x86_64__) || defined(__i386__)
+       {
+               .desc = "x86 rdpmc test",
+               .func = test__rdpmc,
+       },
+#endif
        {
                .desc = "Validate PERF_RECORD_* events & perf_sample fields",
                .func = test__PERF_RECORD,
@@ -1412,7 +1587,5 @@ int cmd_test(int argc, const char **argv, const char *prefix __used)
        if (symbol__init() < 0)
                return -1;
 
-       setup_pager();
-
        return __cmd_test(argc, argv);
 }