Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git...
Linus Torvalds [Thu, 21 Oct 2010 19:54:49 +0000 (12:54 -0700)]
* 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (163 commits)
  tracing: Fix compile issue for trace_sched_wakeup.c
  [S390] hardirq: remove pointless header file includes
  [IA64] Move local_softirq_pending() definition
  perf, powerpc: Fix power_pmu_event_init to not use event->ctx
  ftrace: Remove recursion between recordmcount and scripts/mod/empty
  jump_label: Add COND_STMT(), reducer wrappery
  perf: Optimize sw events
  perf: Use jump_labels to optimize the scheduler hooks
  jump_label: Add atomic_t interface
  jump_label: Use more consistent naming
  perf, hw_breakpoint: Fix crash in hw_breakpoint creation
  perf: Find task before event alloc
  perf: Fix task refcount bugs
  perf: Fix group moving
  irq_work: Add generic hardirq context callbacks
  perf_events: Fix transaction recovery in group_sched_in()
  perf_events: Fix bogus AMD64 generic TLB events
  perf_events: Fix bogus context time tracking
  tracing: Remove parent recording in latency tracer graph options
  tracing: Use one prologue for the preempt irqs off tracer function tracers
  ...

158 files changed:
Documentation/kprobes.txt
Makefile
arch/Kconfig
arch/alpha/Kconfig
arch/alpha/include/asm/perf_event.h
arch/alpha/kernel/perf_event.c
arch/alpha/kernel/time.c
arch/arm/Kconfig
arch/arm/include/asm/perf_event.h
arch/arm/kernel/perf_event.c
arch/arm/oprofile/Makefile
arch/arm/oprofile/common.c
arch/frv/Kconfig
arch/frv/lib/Makefile
arch/frv/lib/perf_event.c [deleted file]
arch/ia64/include/asm/hardirq.h
arch/parisc/Kconfig
arch/parisc/include/asm/perf_event.h
arch/powerpc/Kconfig
arch/powerpc/include/asm/paca.h
arch/powerpc/kernel/perf_callchain.c
arch/powerpc/kernel/perf_event.c
arch/powerpc/kernel/perf_event_fsl_emb.c
arch/powerpc/kernel/time.c
arch/s390/Kconfig
arch/s390/include/asm/hardirq.h
arch/s390/include/asm/perf_event.h
arch/sh/Kconfig
arch/sh/include/asm/perf_event.h
arch/sh/kernel/perf_callchain.c
arch/sh/kernel/perf_event.c
arch/sh/oprofile/Makefile
arch/sh/oprofile/common.c
arch/sh/oprofile/op_impl.h [deleted file]
arch/sparc/Kconfig
arch/sparc/include/asm/jump_label.h [new file with mode: 0644]
arch/sparc/include/asm/perf_event.h
arch/sparc/kernel/Makefile
arch/sparc/kernel/jump_label.c [new file with mode: 0644]
arch/sparc/kernel/module.c
arch/sparc/kernel/pcr.c
arch/sparc/kernel/perf_event.c
arch/x86/Kconfig
arch/x86/include/asm/alternative.h
arch/x86/include/asm/entry_arch.h
arch/x86/include/asm/hardirq.h
arch/x86/include/asm/hw_irq.h
arch/x86/include/asm/irq_vectors.h
arch/x86/include/asm/jump_label.h [new file with mode: 0644]
arch/x86/include/asm/perf_event_p4.h
arch/x86/kernel/Makefile
arch/x86/kernel/alternative.c
arch/x86/kernel/cpu/perf_event.c
arch/x86/kernel/cpu/perf_event_amd.c
arch/x86/kernel/cpu/perf_event_intel.c
arch/x86/kernel/cpu/perf_event_intel_ds.c
arch/x86/kernel/cpu/perf_event_p4.c
arch/x86/kernel/entry_64.S
arch/x86/kernel/ftrace.c
arch/x86/kernel/irq.c
arch/x86/kernel/irq_work.c [new file with mode: 0644]
arch/x86/kernel/irqinit.c
arch/x86/kernel/jump_label.c [new file with mode: 0644]
arch/x86/kernel/kprobes.c
arch/x86/kernel/module.c
arch/x86/kernel/setup.c
arch/x86/mm/fault.c
arch/x86/mm/kmemcheck/kmemcheck.c
arch/x86/oprofile/backtrace.c
arch/x86/oprofile/nmi_int.c
drivers/oprofile/oprof.c
drivers/oprofile/oprof.h
drivers/oprofile/oprofile_files.c
drivers/oprofile/oprofile_perf.c [new file with mode: 0644]
drivers/oprofile/oprofilefs.c
include/asm-generic/hardirq.h
include/asm-generic/vmlinux.lds.h
include/linux/dynamic_debug.h
include/linux/ftrace_event.h
include/linux/interrupt.h
include/linux/irq_work.h [new file with mode: 0644]
include/linux/jump_label.h [new file with mode: 0644]
include/linux/jump_label_ref.h [new file with mode: 0644]
include/linux/module.h
include/linux/oprofile.h
include/linux/percpu.h
include/linux/perf_event.h
include/linux/sched.h
include/linux/stop_machine.h
include/linux/tracepoint.h
include/trace/events/irq.h
include/trace/events/napi.h
include/trace/events/net.h [new file with mode: 0644]
include/trace/events/power.h
include/trace/events/skb.h
init/Kconfig
kernel/Makefile
kernel/exit.c
kernel/hw_breakpoint.c
kernel/irq_work.c [new file with mode: 0644]
kernel/jump_label.c [new file with mode: 0644]
kernel/kprobes.c
kernel/module.c
kernel/perf_event.c
kernel/sched.c
kernel/test_kprobes.c
kernel/timer.c
kernel/trace/Kconfig
kernel/trace/ftrace.c
kernel/trace/ring_buffer.c
kernel/trace/trace.c
kernel/trace/trace.h
kernel/trace/trace_event_perf.c
kernel/trace/trace_events.c
kernel/trace/trace_functions_graph.c
kernel/trace/trace_irqsoff.c
kernel/trace/trace_sched_wakeup.c
kernel/trace/trace_workqueue.c
kernel/tracepoint.c
kernel/watchdog.c
lib/Kconfig.debug
lib/dynamic_debug.c
net/core/datagram.c
net/core/dev.c
net/core/net-traces.c
net/core/skbuff.c
scripts/Makefile
scripts/Makefile.build
scripts/Makefile.lib
scripts/basic/Makefile
scripts/basic/hash.c [deleted file]
scripts/gcc-goto.sh [new file with mode: 0644]
scripts/recordmcount.c [new file with mode: 0644]
scripts/recordmcount.h [new file with mode: 0644]
tools/perf/Documentation/perf-annotate.txt
tools/perf/Documentation/perf-report.txt
tools/perf/Makefile
tools/perf/builtin-annotate.c
tools/perf/builtin-report.c
tools/perf/feature-tests.mak
tools/perf/scripts/python/bin/netdev-times-record [new file with mode: 0644]
tools/perf/scripts/python/bin/netdev-times-report [new file with mode: 0644]
tools/perf/scripts/python/netdev-times.py [new file with mode: 0644]
tools/perf/util/cache.h
tools/perf/util/callchain.c
tools/perf/util/callchain.h
tools/perf/util/hist.c
tools/perf/util/path.c
tools/perf/util/sort.h
tools/perf/util/symbol.c
tools/perf/util/symbol.h
tools/perf/util/ui/browser.c
tools/perf/util/ui/browser.h
tools/perf/util/ui/browsers/annotate.c
tools/perf/util/ui/browsers/hists.c
tools/perf/util/ui/browsers/map.c
tools/perf/util/ui/util.c
tools/perf/util/util.h

index 1762b81..741fe66 100644 (file)
@@ -542,9 +542,11 @@ Kprobes does not use mutexes or allocate memory except during
 registration and unregistration.
 
 Probe handlers are run with preemption disabled.  Depending on the
-architecture, handlers may also run with interrupts disabled.  In any
-case, your handler should not yield the CPU (e.g., by attempting to
-acquire a semaphore).
+architecture and optimization state, handlers may also run with
+interrupts disabled (e.g., kretprobe handlers and optimized kprobe
+handlers run without interrupt disabled on x86/x86-64).  In any case,
+your handler should not yield the CPU (e.g., by attempting to acquire
+a semaphore).
 
 Since a return probe is implemented by replacing the return
 address with the trampoline's address, stack backtraces and calls
index 860c26a..d3c1071 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -568,6 +568,12 @@ endif
 
 ifdef CONFIG_FUNCTION_TRACER
 KBUILD_CFLAGS  += -pg
+ifdef CONFIG_DYNAMIC_FTRACE
+       ifdef CONFIG_HAVE_C_RECORDMCOUNT
+               BUILD_C_RECORDMCOUNT := y
+               export BUILD_C_RECORDMCOUNT
+       endif
+endif
 endif
 
 # We trigger additional mismatches with less inlining
@@ -591,6 +597,11 @@ KBUILD_CFLAGS      += $(call cc-option,-fno-strict-overflow)
 # conserve stack if available
 KBUILD_CFLAGS   += $(call cc-option,-fconserve-stack)
 
+# check for 'asm goto'
+ifeq ($(shell $(CONFIG_SHELL) $(srctree)/scripts/gcc-goto.sh $(CC)), y)
+       KBUILD_CFLAGS += -DCC_HAVE_ASM_GOTO
+endif
+
 # Add user supplied CPPFLAGS, AFLAGS and CFLAGS as the last assignments
 # But warn user when we do so
 warn-assign = \
index fe48fc7..53d7f61 100644 (file)
@@ -158,4 +158,7 @@ config HAVE_PERF_EVENTS_NMI
          subsystem.  Also has support for calculating CPU cycle events
          to determine how many clock cycles in a given period.
 
+config HAVE_ARCH_JUMP_LABEL
+       bool
+
 source "kernel/gcov/Kconfig"
index b9647bb..d04ccd7 100644 (file)
@@ -9,6 +9,7 @@ config ALPHA
        select HAVE_IDE
        select HAVE_OPROFILE
        select HAVE_SYSCALL_WRAPPERS
+       select HAVE_IRQ_WORK
        select HAVE_PERF_EVENTS
        select HAVE_DMA_ATTRS
        help
index 4157cd3..fe792ca 100644 (file)
@@ -1,11 +1,6 @@
 #ifndef __ASM_ALPHA_PERF_EVENT_H
 #define __ASM_ALPHA_PERF_EVENT_H
 
-/* Alpha only supports software events through this interface. */
-extern void set_perf_event_pending(void);
-
-#define PERF_EVENT_INDEX_OFFSET 0
-
 #ifdef CONFIG_PERF_EVENTS
 extern void init_hw_perf_events(void);
 #else
index 85d8e4f..1cc4968 100644 (file)
@@ -307,7 +307,7 @@ again:
                             new_raw_count) != prev_raw_count)
                goto again;
 
-       delta = (new_raw_count  - (prev_raw_count & alpha_pmu->pmc_count_mask[idx])) + ovf;
+       delta = (new_raw_count - (prev_raw_count & alpha_pmu->pmc_count_mask[idx])) + ovf;
 
        /* It is possible on very rare occasions that the PMC has overflowed
         * but the interrupt is yet to come.  Detect and fix this situation.
@@ -402,14 +402,13 @@ static void maybe_change_configuration(struct cpu_hw_events *cpuc)
                struct hw_perf_event *hwc = &pe->hw;
                int idx = hwc->idx;
 
-               if (cpuc->current_idx[j] != PMC_NO_INDEX) {
-                       cpuc->idx_mask |= (1<<cpuc->current_idx[j]);
-                       continue;
+               if (cpuc->current_idx[j] == PMC_NO_INDEX) {
+                       alpha_perf_event_set_period(pe, hwc, idx);
+                       cpuc->current_idx[j] = idx;
                }
 
-               alpha_perf_event_set_period(pe, hwc, idx);
-               cpuc->current_idx[j] = idx;
-               cpuc->idx_mask |= (1<<cpuc->current_idx[j]);
+               if (!(hwc->state & PERF_HES_STOPPED))
+                       cpuc->idx_mask |= (1<<cpuc->current_idx[j]);
        }
        cpuc->config = cpuc->event[0]->hw.config_base;
 }
@@ -420,12 +419,13 @@ static void maybe_change_configuration(struct cpu_hw_events *cpuc)
  *  - this function is called from outside this module via the pmu struct
  *    returned from perf event initialisation.
  */
-static int alpha_pmu_enable(struct perf_event *event)
+static int alpha_pmu_add(struct perf_event *event, int flags)
 {
        struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+       struct hw_perf_event *hwc = &event->hw;
        int n0;
        int ret;
-       unsigned long flags;
+       unsigned long irq_flags;
 
        /*
         * The Sparc code has the IRQ disable first followed by the perf
@@ -435,8 +435,8 @@ static int alpha_pmu_enable(struct perf_event *event)
         * nevertheless we disable the PMCs first to enable a potential
         * final PMI to occur before we disable interrupts.
         */
-       perf_disable();
-       local_irq_save(flags);
+       perf_pmu_disable(event->pmu);
+       local_irq_save(irq_flags);
 
        /* Default to error to be returned */
        ret = -EAGAIN;
@@ -455,8 +455,12 @@ static int alpha_pmu_enable(struct perf_event *event)
                }
        }
 
-       local_irq_restore(flags);
-       perf_enable();
+       hwc->state = PERF_HES_UPTODATE;
+       if (!(flags & PERF_EF_START))
+               hwc->state |= PERF_HES_STOPPED;
+
+       local_irq_restore(irq_flags);
+       perf_pmu_enable(event->pmu);
 
        return ret;
 }
@@ -467,15 +471,15 @@ static int alpha_pmu_enable(struct perf_event *event)
  *  - this function is called from outside this module via the pmu struct
  *    returned from perf event initialisation.
  */
-static void alpha_pmu_disable(struct perf_event *event)
+static void alpha_pmu_del(struct perf_event *event, int flags)
 {
        struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
        struct hw_perf_event *hwc = &event->hw;
-       unsigned long flags;
+       unsigned long irq_flags;
        int j;
 
-       perf_disable();
-       local_irq_save(flags);
+       perf_pmu_disable(event->pmu);
+       local_irq_save(irq_flags);
 
        for (j = 0; j < cpuc->n_events; j++) {
                if (event == cpuc->event[j]) {
@@ -501,8 +505,8 @@ static void alpha_pmu_disable(struct perf_event *event)
                }
        }
 
-       local_irq_restore(flags);
-       perf_enable();
+       local_irq_restore(irq_flags);
+       perf_pmu_enable(event->pmu);
 }
 
 
@@ -514,13 +518,44 @@ static void alpha_pmu_read(struct perf_event *event)
 }
 
 
-static void alpha_pmu_unthrottle(struct perf_event *event)
+static void alpha_pmu_stop(struct perf_event *event, int flags)
+{
+       struct hw_perf_event *hwc = &event->hw;
+       struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+
+       if (!(hwc->state & PERF_HES_STOPPED)) {
+               cpuc->idx_mask &= ~(1UL<<hwc->idx);
+               hwc->state |= PERF_HES_STOPPED;
+       }
+
+       if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
+               alpha_perf_event_update(event, hwc, hwc->idx, 0);
+               hwc->state |= PERF_HES_UPTODATE;
+       }
+
+       if (cpuc->enabled)
+               wrperfmon(PERFMON_CMD_DISABLE, (1UL<<hwc->idx));
+}
+
+
+static void alpha_pmu_start(struct perf_event *event, int flags)
 {
        struct hw_perf_event *hwc = &event->hw;
        struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 
+       if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED)))
+               return;
+
+       if (flags & PERF_EF_RELOAD) {
+               WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
+               alpha_perf_event_set_period(event, hwc, hwc->idx);
+       }
+
+       hwc->state = 0;
+
        cpuc->idx_mask |= 1UL<<hwc->idx;
-       wrperfmon(PERFMON_CMD_ENABLE, (1UL<<hwc->idx));
+       if (cpuc->enabled)
+               wrperfmon(PERFMON_CMD_ENABLE, (1UL<<hwc->idx));
 }
 
 
@@ -642,39 +677,36 @@ static int __hw_perf_event_init(struct perf_event *event)
        return 0;
 }
 
-static const struct pmu pmu = {
-       .enable         = alpha_pmu_enable,
-       .disable        = alpha_pmu_disable,
-       .read           = alpha_pmu_read,
-       .unthrottle     = alpha_pmu_unthrottle,
-};
-
-
 /*
  * Main entry point to initialise a HW performance event.
  */
-const struct pmu *hw_perf_event_init(struct perf_event *event)
+static int alpha_pmu_event_init(struct perf_event *event)
 {
        int err;
 
+       switch (event->attr.type) {
+       case PERF_TYPE_RAW:
+       case PERF_TYPE_HARDWARE:
+       case PERF_TYPE_HW_CACHE:
+               break;
+
+       default:
+               return -ENOENT;
+       }
+
        if (!alpha_pmu)
-               return ERR_PTR(-ENODEV);
+               return -ENODEV;
 
        /* Do the real initialisation work. */
        err = __hw_perf_event_init(event);
 
-       if (err)
-               return ERR_PTR(err);
-
-       return &pmu;
+       return err;
 }
 
-
-
 /*
  * Main entry point - enable HW performance counters.
  */
-void hw_perf_enable(void)
+static void alpha_pmu_enable(struct pmu *pmu)
 {
        struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 
@@ -700,7 +732,7 @@ void hw_perf_enable(void)
  * Main entry point - disable HW performance counters.
  */
 
-void hw_perf_disable(void)
+static void alpha_pmu_disable(struct pmu *pmu)
 {
        struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 
@@ -713,6 +745,17 @@ void hw_perf_disable(void)
        wrperfmon(PERFMON_CMD_DISABLE, cpuc->idx_mask);
 }
 
+static struct pmu pmu = {
+       .pmu_enable     = alpha_pmu_enable,
+       .pmu_disable    = alpha_pmu_disable,
+       .event_init     = alpha_pmu_event_init,
+       .add            = alpha_pmu_add,
+       .del            = alpha_pmu_del,
+       .start          = alpha_pmu_start,
+       .stop           = alpha_pmu_stop,
+       .read           = alpha_pmu_read,
+};
+
 
 /*
  * Main entry point - don't know when this is called but it
@@ -766,7 +809,7 @@ static void alpha_perf_event_irq_handler(unsigned long la_ptr,
        wrperfmon(PERFMON_CMD_DISABLE, cpuc->idx_mask);
 
        /* la_ptr is the counter that overflowed. */
-       if (unlikely(la_ptr >= perf_max_events)) {
+       if (unlikely(la_ptr >= alpha_pmu->num_pmcs)) {
                /* This should never occur! */
                irq_err_count++;
                pr_warning("PMI: silly index %ld\n", la_ptr);
@@ -807,7 +850,7 @@ static void alpha_perf_event_irq_handler(unsigned long la_ptr,
                        /* Interrupts coming too quickly; "throttle" the
                         * counter, i.e., disable it for a little while.
                         */
-                       cpuc->idx_mask &= ~(1UL<<idx);
+                       alpha_pmu_stop(event, 0);
                }
        }
        wrperfmon(PERFMON_CMD_ENABLE, cpuc->idx_mask);
@@ -837,6 +880,7 @@ void __init init_hw_perf_events(void)
 
        /* And set up PMU specification */
        alpha_pmu = &ev67_pmu;
-       perf_max_events = alpha_pmu->num_pmcs;
+
+       perf_pmu_register(&pmu);
 }
 
index 396af17..0f1d849 100644 (file)
@@ -41,7 +41,7 @@
 #include <linux/init.h>
 #include <linux/bcd.h>
 #include <linux/profile.h>
-#include <linux/perf_event.h>
+#include <linux/irq_work.h>
 
 #include <asm/uaccess.h>
 #include <asm/io.h>
@@ -83,25 +83,25 @@ static struct {
 
 unsigned long est_cycle_freq;
 
-#ifdef CONFIG_PERF_EVENTS
+#ifdef CONFIG_IRQ_WORK
 
-DEFINE_PER_CPU(u8, perf_event_pending);
+DEFINE_PER_CPU(u8, irq_work_pending);
 
-#define set_perf_event_pending_flag()  __get_cpu_var(perf_event_pending) = 1
-#define test_perf_event_pending()      __get_cpu_var(perf_event_pending)
-#define clear_perf_event_pending()     __get_cpu_var(perf_event_pending) = 0
+#define set_irq_work_pending_flag()  __get_cpu_var(irq_work_pending) = 1
+#define test_irq_work_pending()      __get_cpu_var(irq_work_pending)
+#define clear_irq_work_pending()     __get_cpu_var(irq_work_pending) = 0
 
-void set_perf_event_pending(void)
+void set_irq_work_pending(void)
 {
-       set_perf_event_pending_flag();
+       set_irq_work_pending_flag();
 }
 
-#else  /* CONFIG_PERF_EVENTS */
+#else  /* CONFIG_IRQ_WORK */
 
-#define test_perf_event_pending()      0
-#define clear_perf_event_pending()
+#define test_irq_work_pending()      0
+#define clear_irq_work_pending()
 
-#endif /* CONFIG_PERF_EVENTS */
+#endif /* CONFIG_IRQ_WORK */
 
 
 static inline __u32 rpcc(void)
@@ -191,9 +191,9 @@ irqreturn_t timer_interrupt(int irq, void *dev)
 
        write_sequnlock(&xtime_lock);
 
-       if (test_perf_event_pending()) {
-               clear_perf_event_pending();
-               perf_event_do_pending();
+       if (test_irq_work_pending()) {
+               clear_irq_work_pending();
+               irq_work_run();
        }
 
 #ifndef CONFIG_SMP
index 9c26ba7..9103904 100644 (file)
@@ -23,6 +23,7 @@ config ARM
        select HAVE_KERNEL_GZIP
        select HAVE_KERNEL_LZO
        select HAVE_KERNEL_LZMA
+       select HAVE_IRQ_WORK
        select HAVE_PERF_EVENTS
        select PERF_USE_VMALLOC
        select HAVE_REGS_AND_STACK_ACCESS_API
index b5799a3..c4aa4e8 100644 (file)
 #ifndef __ARM_PERF_EVENT_H__
 #define __ARM_PERF_EVENT_H__
 
-/*
- * NOP: on *most* (read: all supported) ARM platforms, the performance
- * counter interrupts are regular interrupts and not an NMI. This
- * means that when we receive the interrupt we can call
- * perf_event_do_pending() that handles all of the work with
- * interrupts disabled.
- */
-static inline void
-set_perf_event_pending(void)
-{
-}
-
 /* ARM performance counters start from 1 (in the cp15 accesses) so use the
  * same indexes here for consistency. */
 #define PERF_EVENT_INDEX_OFFSET 1
index ecbb028..49643b1 100644 (file)
@@ -123,6 +123,12 @@ armpmu_get_max_events(void)
 }
 EXPORT_SYMBOL_GPL(armpmu_get_max_events);
 
+int perf_num_counters(void)
+{
+       return armpmu_get_max_events();
+}
+EXPORT_SYMBOL_GPL(perf_num_counters);
+
 #define HW_OP_UNSUPPORTED              0xFFFF
 
 #define C(_x) \
@@ -221,46 +227,56 @@ again:
 }
 
 static void
-armpmu_disable(struct perf_event *event)
+armpmu_read(struct perf_event *event)
 {
-       struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
        struct hw_perf_event *hwc = &event->hw;
-       int idx = hwc->idx;
-
-       WARN_ON(idx < 0);
-
-       clear_bit(idx, cpuc->active_mask);
-       armpmu->disable(hwc, idx);
-
-       barrier();
 
-       armpmu_event_update(event, hwc, idx);
-       cpuc->events[idx] = NULL;
-       clear_bit(idx, cpuc->used_mask);
+       /* Don't read disabled counters! */
+       if (hwc->idx < 0)
+               return;
 
-       perf_event_update_userpage(event);
+       armpmu_event_update(event, hwc, hwc->idx);
 }
 
 static void
-armpmu_read(struct perf_event *event)
+armpmu_stop(struct perf_event *event, int flags)
 {
        struct hw_perf_event *hwc = &event->hw;
 
-       /* Don't read disabled counters! */
-       if (hwc->idx < 0)
+       if (!armpmu)
                return;
 
-       armpmu_event_update(event, hwc, hwc->idx);
+       /*
+        * ARM pmu always has to update the counter, so ignore
+        * PERF_EF_UPDATE, see comments in armpmu_start().
+        */
+       if (!(hwc->state & PERF_HES_STOPPED)) {
+               armpmu->disable(hwc, hwc->idx);
+               barrier(); /* why? */
+               armpmu_event_update(event, hwc, hwc->idx);
+               hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
+       }
 }
 
 static void
-armpmu_unthrottle(struct perf_event *event)
+armpmu_start(struct perf_event *event, int flags)
 {
        struct hw_perf_event *hwc = &event->hw;
 
+       if (!armpmu)
+               return;
+
+       /*
+        * ARM pmu always has to reprogram the period, so ignore
+        * PERF_EF_RELOAD, see the comment below.
+        */
+       if (flags & PERF_EF_RELOAD)
+               WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
+
+       hwc->state = 0;
        /*
         * Set the period again. Some counters can't be stopped, so when we
-        * were throttled we simply disabled the IRQ source and the counter
+        * were stopped we simply disabled the IRQ source and the counter
         * may have been left counting. If we don't do this step then we may
         * get an interrupt too soon or *way* too late if the overflow has
         * happened since disabling.
@@ -269,14 +285,33 @@ armpmu_unthrottle(struct perf_event *event)
        armpmu->enable(hwc, hwc->idx);
 }
 
+static void
+armpmu_del(struct perf_event *event, int flags)
+{
+       struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+       struct hw_perf_event *hwc = &event->hw;
+       int idx = hwc->idx;
+
+       WARN_ON(idx < 0);
+
+       clear_bit(idx, cpuc->active_mask);
+       armpmu_stop(event, PERF_EF_UPDATE);
+       cpuc->events[idx] = NULL;
+       clear_bit(idx, cpuc->used_mask);
+
+       perf_event_update_userpage(event);
+}
+
 static int
-armpmu_enable(struct perf_event *event)
+armpmu_add(struct perf_event *event, int flags)
 {
        struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
        struct hw_perf_event *hwc = &event->hw;
        int idx;
        int err = 0;
 
+       perf_pmu_disable(event->pmu);
+
        /* If we don't have a space for the counter then finish early. */
        idx = armpmu->get_event_idx(cpuc, hwc);
        if (idx < 0) {
@@ -293,25 +328,19 @@ armpmu_enable(struct perf_event *event)
        cpuc->events[idx] = event;
        set_bit(idx, cpuc->active_mask);
 
-       /* Set the period for the event. */
-       armpmu_event_set_period(event, hwc, idx);
-
-       /* Enable the event. */
-       armpmu->enable(hwc, idx);
+       hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
+       if (flags & PERF_EF_START)
+               armpmu_start(event, PERF_EF_RELOAD);
 
        /* Propagate our changes to the userspace mapping. */
        perf_event_update_userpage(event);
 
 out:
+       perf_pmu_enable(event->pmu);
        return err;
 }
 
-static struct pmu pmu = {
-       .enable     = armpmu_enable,
-       .disable    = armpmu_disable,
-       .unthrottle = armpmu_unthrottle,
-       .read       = armpmu_read,
-};
+static struct pmu pmu;
 
 static int
 validate_event(struct cpu_hw_events *cpuc,
@@ -491,20 +520,29 @@ __hw_perf_event_init(struct perf_event *event)
        return err;
 }
 
-const struct pmu *
-hw_perf_event_init(struct perf_event *event)
+static int armpmu_event_init(struct perf_event *event)
 {
        int err = 0;
 
+       switch (event->attr.type) {
+       case PERF_TYPE_RAW:
+       case PERF_TYPE_HARDWARE:
+       case PERF_TYPE_HW_CACHE:
+               break;
+
+       default:
+               return -ENOENT;
+       }
+
        if (!armpmu)
-               return ERR_PTR(-ENODEV);
+               return -ENODEV;
 
        event->destroy = hw_perf_event_destroy;
 
        if (!atomic_inc_not_zero(&active_events)) {
-               if (atomic_read(&active_events) > perf_max_events) {
+               if (atomic_read(&active_events) > armpmu->num_events) {
                        atomic_dec(&active_events);
-                       return ERR_PTR(-ENOSPC);
+                       return -ENOSPC;
                }
 
                mutex_lock(&pmu_reserve_mutex);
@@ -518,17 +556,16 @@ hw_perf_event_init(struct perf_event *event)
        }
 
        if (err)
-               return ERR_PTR(err);
+               return err;
 
        err = __hw_perf_event_init(event);
        if (err)
                hw_perf_event_destroy(event);
 
-       return err ? ERR_PTR(err) : &pmu;
+       return err;
 }
 
-void
-hw_perf_enable(void)
+static void armpmu_enable(struct pmu *pmu)
 {
        /* Enable all of the perf events on hardware. */
        int idx;
@@ -549,13 +586,23 @@ hw_perf_enable(void)
        armpmu->start();
 }
 
-void
-hw_perf_disable(void)
+static void armpmu_disable(struct pmu *pmu)
 {
        if (armpmu)
                armpmu->stop();
 }
 
+static struct pmu pmu = {
+       .pmu_enable     = armpmu_enable,
+       .pmu_disable    = armpmu_disable,
+       .event_init     = armpmu_event_init,
+       .add            = armpmu_add,
+       .del            = armpmu_del,
+       .start          = armpmu_start,
+       .stop           = armpmu_stop,
+       .read           = armpmu_read,
+};
+
 /*
  * ARMv6 Performance counter handling code.
  *
@@ -1045,7 +1092,7 @@ armv6pmu_handle_irq(int irq_num,
         * platforms that can have the PMU interrupts raised as an NMI, this
         * will not work.
         */
-       perf_event_do_pending();
+       irq_work_run();
 
        return IRQ_HANDLED;
 }
@@ -2021,7 +2068,7 @@ static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev)
         * platforms that can have the PMU interrupts raised as an NMI, this
         * will not work.
         */
-       perf_event_do_pending();
+       irq_work_run();
 
        return IRQ_HANDLED;
 }
@@ -2389,7 +2436,7 @@ xscale1pmu_handle_irq(int irq_num, void *dev)
                        armpmu->disable(hwc, idx);
        }
 
-       perf_event_do_pending();
+       irq_work_run();
 
        /*
         * Re-enable the PMU.
@@ -2716,7 +2763,7 @@ xscale2pmu_handle_irq(int irq_num, void *dev)
                        armpmu->disable(hwc, idx);
        }
 
-       perf_event_do_pending();
+       irq_work_run();
 
        /*
         * Re-enable the PMU.
@@ -2933,14 +2980,12 @@ init_hw_perf_events(void)
                        armpmu = &armv6pmu;
                        memcpy(armpmu_perf_cache_map, armv6_perf_cache_map,
                                        sizeof(armv6_perf_cache_map));
-                       perf_max_events = armv6pmu.num_events;
                        break;
                case 0xB020:    /* ARM11mpcore */
                        armpmu = &armv6mpcore_pmu;
                        memcpy(armpmu_perf_cache_map,
                               armv6mpcore_perf_cache_map,
                               sizeof(armv6mpcore_perf_cache_map));
-                       perf_max_events = armv6mpcore_pmu.num_events;
                        break;
                case 0xC080:    /* Cortex-A8 */
                        armv7pmu.id = ARM_PERF_PMU_ID_CA8;
@@ -2952,7 +2997,6 @@ init_hw_perf_events(void)
                        /* Reset PMNC and read the nb of CNTx counters
                            supported */
                        armv7pmu.num_events = armv7_reset_read_pmnc();
-                       perf_max_events = armv7pmu.num_events;
                        break;
                case 0xC090:    /* Cortex-A9 */
                        armv7pmu.id = ARM_PERF_PMU_ID_CA9;
@@ -2964,7 +3008,6 @@ init_hw_perf_events(void)
                        /* Reset PMNC and read the nb of CNTx counters
                            supported */
                        armv7pmu.num_events = armv7_reset_read_pmnc();
-                       perf_max_events = armv7pmu.num_events;
                        break;
                }
        /* Intel CPUs [xscale]. */
@@ -2975,13 +3018,11 @@ init_hw_perf_events(void)
                        armpmu = &xscale1pmu;
                        memcpy(armpmu_perf_cache_map, xscale_perf_cache_map,
                                        sizeof(xscale_perf_cache_map));
-                       perf_max_events = xscale1pmu.num_events;
                        break;
                case 2:
                        armpmu = &xscale2pmu;
                        memcpy(armpmu_perf_cache_map, xscale_perf_cache_map,
                                        sizeof(xscale_perf_cache_map));
-                       perf_max_events = xscale2pmu.num_events;
                        break;
                }
        }
@@ -2991,9 +3032,10 @@ init_hw_perf_events(void)
                                arm_pmu_names[armpmu->id], armpmu->num_events);
        } else {
                pr_info("no hardware support available\n");
-               perf_max_events = -1;
        }
 
+       perf_pmu_register(&pmu);
+
        return 0;
 }
 arch_initcall(init_hw_perf_events);
@@ -3001,13 +3043,6 @@ arch_initcall(init_hw_perf_events);
 /*
  * Callchain handling code.
  */
-static inline void
-callchain_store(struct perf_callchain_entry *entry,
-               u64 ip)
-{
-       if (entry->nr < PERF_MAX_STACK_DEPTH)
-               entry->ip[entry->nr++] = ip;
-}
 
 /*
  * The registers we're interested in are at the end of the variable
@@ -3039,7 +3074,7 @@ user_backtrace(struct frame_tail *tail,
        if (__copy_from_user_inatomic(&buftail, tail, sizeof(buftail)))
                return NULL;
 
-       callchain_store(entry, buftail.lr);
+       perf_callchain_store(entry, buftail.lr);
 
        /*
         * Frame pointers should strictly progress back up the stack
@@ -3051,16 +3086,11 @@ user_backtrace(struct frame_tail *tail,
        return buftail.fp - 1;
 }
 
-static void
-perf_callchain_user(struct pt_regs *regs,
-                   struct perf_callchain_entry *entry)
+void
+perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
 {
        struct frame_tail *tail;
 
-       callchain_store(entry, PERF_CONTEXT_USER);
-
-       if (!user_mode(regs))
-               regs = task_pt_regs(current);
 
        tail = (struct frame_tail *)regs->ARM_fp - 1;
 
@@ -3078,56 +3108,18 @@ callchain_trace(struct stackframe *fr,
                void *data)
 {
        struct perf_callchain_entry *entry = data;
-       callchain_store(entry, fr->pc);
+       perf_callchain_store(entry, fr->pc);
        return 0;
 }
 
-static void
-perf_callchain_kernel(struct pt_regs *regs,
-                     struct perf_callchain_entry *entry)
+void
+perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs)
 {
        struct stackframe fr;
 
-       callchain_store(entry, PERF_CONTEXT_KERNEL);
        fr.fp = regs->ARM_fp;
        fr.sp = regs->ARM_sp;
        fr.lr = regs->ARM_lr;
        fr.pc = regs->ARM_pc;
        walk_stackframe(&fr, callchain_trace, entry);
 }
-
-static void
-perf_do_callchain(struct pt_regs *regs,
-                 struct perf_callchain_entry *entry)
-{
-       int is_user;
-
-       if (!regs)
-               return;
-
-       is_user = user_mode(regs);
-
-       if (!current || !current->pid)
-               return;
-
-       if (is_user && current->state != TASK_RUNNING)
-               return;
-
-       if (!is_user)
-               perf_callchain_kernel(regs, entry);
-
-       if (current->mm)
-               perf_callchain_user(regs, entry);
-}
-
-static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry);
-
-struct perf_callchain_entry *
-perf_callchain(struct pt_regs *regs)
-{
-       struct perf_callchain_entry *entry = &__get_cpu_var(pmc_irq_entry);
-
-       entry->nr = 0;
-       perf_do_callchain(regs, entry);
-       return entry;
-}
index e666eaf..b2215c6 100644 (file)
@@ -6,4 +6,8 @@ DRIVER_OBJS = $(addprefix ../../../drivers/oprofile/, \
                oprofilefs.o oprofile_stats.o \
                timer_int.o )
 
+ifeq ($(CONFIG_HW_PERF_EVENTS),y)
+DRIVER_OBJS += $(addprefix ../../../drivers/oprofile/, oprofile_perf.o)
+endif
+
 oprofile-y                             := $(DRIVER_OBJS) common.o
index 72e09eb..8aa9744 100644 (file)
 #include <asm/ptrace.h>
 
 #ifdef CONFIG_HW_PERF_EVENTS
-/*
- * Per performance monitor configuration as set via oprofilefs.
- */
-struct op_counter_config {
-       unsigned long count;
-       unsigned long enabled;
-       unsigned long event;
-       unsigned long unit_mask;
-       unsigned long kernel;
-       unsigned long user;
-       struct perf_event_attr attr;
-};
-
-static int op_arm_enabled;
-static DEFINE_MUTEX(op_arm_mutex);
-
-static struct op_counter_config *counter_config;
-static struct perf_event **perf_events[nr_cpumask_bits];
-static int perf_num_counters;
-
-/*
- * Overflow callback for oprofile.
- */
-static void op_overflow_handler(struct perf_event *event, int unused,
-                       struct perf_sample_data *data, struct pt_regs *regs)
+char *op_name_from_perf_id(void)
 {
-       int id;
-       u32 cpu = smp_processor_id();
-
-       for (id = 0; id < perf_num_counters; ++id)
-               if (perf_events[cpu][id] == event)
-                       break;
-
-       if (id != perf_num_counters)
-               oprofile_add_sample(regs, id);
-       else
-               pr_warning("oprofile: ignoring spurious overflow "
-                               "on cpu %u\n", cpu);
-}
-
-/*
- * Called by op_arm_setup to create perf attributes to mirror the oprofile
- * settings in counter_config. Attributes are created as `pinned' events and
- * so are permanently scheduled on the PMU.
- */
-static void op_perf_setup(void)
-{
-       int i;
-       u32 size = sizeof(struct perf_event_attr);
-       struct perf_event_attr *attr;
-
-       for (i = 0; i < perf_num_counters; ++i) {
-               attr = &counter_config[i].attr;
-               memset(attr, 0, size);
-               attr->type              = PERF_TYPE_RAW;
-               attr->size              = size;
-               attr->config            = counter_config[i].event;
-               attr->sample_period     = counter_config[i].count;
-               attr->pinned            = 1;
-       }
-}
-
-static int op_create_counter(int cpu, int event)
-{
-       int ret = 0;
-       struct perf_event *pevent;
-
-       if (!counter_config[event].enabled || (perf_events[cpu][event] != NULL))
-               return ret;
-
-       pevent = perf_event_create_kernel_counter(&counter_config[event].attr,
-                                                 cpu, -1,
-                                                 op_overflow_handler);
-
-       if (IS_ERR(pevent)) {
-               ret = PTR_ERR(pevent);
-       } else if (pevent->state != PERF_EVENT_STATE_ACTIVE) {
-               perf_event_release_kernel(pevent);
-               pr_warning("oprofile: failed to enable event %d "
-                               "on CPU %d\n", event, cpu);
-               ret = -EBUSY;
-       } else {
-               perf_events[cpu][event] = pevent;
-       }
-
-       return ret;
-}
+       enum arm_perf_pmu_ids id = armpmu_get_pmu_id();
 
-static void op_destroy_counter(int cpu, int event)
-{
-       struct perf_event *pevent = perf_events[cpu][event];
-
-       if (pevent) {
-               perf_event_release_kernel(pevent);
-               perf_events[cpu][event] = NULL;
-       }
-}
-
-/*
- * Called by op_arm_start to create active perf events based on the
- * perviously configured attributes.
- */
-static int op_perf_start(void)
-{
-       int cpu, event, ret = 0;
-
-       for_each_online_cpu(cpu) {
-               for (event = 0; event < perf_num_counters; ++event) {
-                       ret = op_create_counter(cpu, event);
-                       if (ret)
-                               goto out;
-               }
-       }
-
-out:
-       return ret;
-}
-
-/*
- * Called by op_arm_stop at the end of a profiling run.
- */
-static void op_perf_stop(void)
-{
-       int cpu, event;
-
-       for_each_online_cpu(cpu)
-               for (event = 0; event < perf_num_counters; ++event)
-                       op_destroy_counter(cpu, event);
-}
-
-
-static char *op_name_from_perf_id(enum arm_perf_pmu_ids id)
-{
        switch (id) {
        case ARM_PERF_PMU_ID_XSCALE1:
                return "arm/xscale1";
@@ -176,116 +47,6 @@ static char *op_name_from_perf_id(enum arm_perf_pmu_ids id)
        }
 }
 
-static int op_arm_create_files(struct super_block *sb, struct dentry *root)
-{
-       unsigned int i;
-
-       for (i = 0; i < perf_num_counters; i++) {
-               struct dentry *dir;
-               char buf[4];
-
-               snprintf(buf, sizeof buf, "%d", i);
-               dir = oprofilefs_mkdir(sb, root, buf);
-               oprofilefs_create_ulong(sb, dir, "enabled", &counter_config[i].enabled);
-               oprofilefs_create_ulong(sb, dir, "event", &counter_config[i].event);
-               oprofilefs_create_ulong(sb, dir, "count", &counter_config[i].count);
-               oprofilefs_create_ulong(sb, dir, "unit_mask", &counter_config[i].unit_mask);
-               oprofilefs_create_ulong(sb, dir, "kernel", &counter_config[i].kernel);
-               oprofilefs_create_ulong(sb, dir, "user", &counter_config[i].user);
-       }
-
-       return 0;
-}
-
-static int op_arm_setup(void)
-{
-       spin_lock(&oprofilefs_lock);
-       op_perf_setup();
-       spin_unlock(&oprofilefs_lock);
-       return 0;
-}
-
-static int op_arm_start(void)
-{
-       int ret = -EBUSY;
-
-       mutex_lock(&op_arm_mutex);
-       if (!op_arm_enabled) {
-               ret = 0;
-               op_perf_start();
-               op_arm_enabled = 1;
-       }
-       mutex_unlock(&op_arm_mutex);
-       return ret;
-}
-
-static void op_arm_stop(void)
-{
-       mutex_lock(&op_arm_mutex);
-       if (op_arm_enabled)
-               op_perf_stop();
-       op_arm_enabled = 0;
-       mutex_unlock(&op_arm_mutex);
-}
-
-#ifdef CONFIG_PM
-static int op_arm_suspend(struct platform_device *dev, pm_message_t state)
-{
-       mutex_lock(&op_arm_mutex);
-       if (op_arm_enabled)
-               op_perf_stop();
-       mutex_unlock(&op_arm_mutex);
-       return 0;
-}
-
-static int op_arm_resume(struct platform_device *dev)
-{
-       mutex_lock(&op_arm_mutex);
-       if (op_arm_enabled && op_perf_start())
-               op_arm_enabled = 0;
-       mutex_unlock(&op_arm_mutex);
-       return 0;
-}
-
-static struct platform_driver oprofile_driver = {
-       .driver         = {
-               .name           = "arm-oprofile",
-       },
-       .resume         = op_arm_resume,
-       .suspend        = op_arm_suspend,
-};
-
-static struct platform_device *oprofile_pdev;
-
-static int __init init_driverfs(void)
-{
-       int ret;
-
-       ret = platform_driver_register(&oprofile_driver);
-       if (ret)
-               goto out;
-
-       oprofile_pdev = platform_device_register_simple(
-                               oprofile_driver.driver.name, 0, NULL, 0);
-       if (IS_ERR(oprofile_pdev)) {
-               ret = PTR_ERR(oprofile_pdev);
-               platform_driver_unregister(&oprofile_driver);
-       }
-
-out:
-       return ret;
-}
-
-static void  exit_driverfs(void)
-{
-       platform_device_unregister(oprofile_pdev);
-       platform_driver_unregister(&oprofile_driver);
-}
-#else
-static int __init init_driverfs(void) { return 0; }
-#define exit_driverfs() do { } while (0)
-#endif /* CONFIG_PM */
-
 static int report_trace(struct stackframe *frame, void *d)
 {
        unsigned int *depth = d;
@@ -350,74 +111,14 @@ static void arm_backtrace(struct pt_regs * const regs, unsigned int depth)
 
 int __init oprofile_arch_init(struct oprofile_operations *ops)
 {
-       int cpu, ret = 0;
-
-       perf_num_counters = armpmu_get_max_events();
-
-       counter_config = kcalloc(perf_num_counters,
-                       sizeof(struct op_counter_config), GFP_KERNEL);
-
-       if (!counter_config) {
-               pr_info("oprofile: failed to allocate %d "
-                               "counters\n", perf_num_counters);
-               return -ENOMEM;
-       }
-
-       ret = init_driverfs();
-       if (ret) {
-               kfree(counter_config);
-               counter_config = NULL;
-               return ret;
-       }
-
-       for_each_possible_cpu(cpu) {
-               perf_events[cpu] = kcalloc(perf_num_counters,
-                               sizeof(struct perf_event *), GFP_KERNEL);
-               if (!perf_events[cpu]) {
-                       pr_info("oprofile: failed to allocate %d perf events "
-                                       "for cpu %d\n", perf_num_counters, cpu);
-                       while (--cpu >= 0)
-                               kfree(perf_events[cpu]);
-                       return -ENOMEM;
-               }
-       }
-
        ops->backtrace          = arm_backtrace;
-       ops->create_files       = op_arm_create_files;
-       ops->setup              = op_arm_setup;
-       ops->start              = op_arm_start;
-       ops->stop               = op_arm_stop;
-       ops->shutdown           = op_arm_stop;
-       ops->cpu_type           = op_name_from_perf_id(armpmu_get_pmu_id());
-
-       if (!ops->cpu_type)
-               ret = -ENODEV;
-       else
-               pr_info("oprofile: using %s\n", ops->cpu_type);
 
-       return ret;
+       return oprofile_perf_init(ops);
 }
 
-void oprofile_arch_exit(void)
+void __exit oprofile_arch_exit(void)
 {
-       int cpu, id;
-       struct perf_event *event;
-
-       if (*perf_events) {
-               for_each_possible_cpu(cpu) {
-                       for (id = 0; id < perf_num_counters; ++id) {
-                               event = perf_events[cpu][id];
-                               if (event != NULL)
-                                       perf_event_release_kernel(event);
-                       }
-                       kfree(perf_events[cpu]);
-               }
-       }
-
-       if (counter_config) {
-               kfree(counter_config);
-               exit_driverfs();
-       }
+       oprofile_perf_exit();
 }
 #else
 int __init oprofile_arch_init(struct oprofile_operations *ops)
@@ -425,5 +126,5 @@ int __init oprofile_arch_init(struct oprofile_operations *ops)
        pr_info("oprofile: hardware counters not available\n");
        return -ENODEV;
 }
-void oprofile_arch_exit(void) {}
+void __exit oprofile_arch_exit(void) {}
 #endif /* CONFIG_HW_PERF_EVENTS */
index 16399bd..0f2417d 100644 (file)
@@ -7,6 +7,7 @@ config FRV
        default y
        select HAVE_IDE
        select HAVE_ARCH_TRACEHOOK
+       select HAVE_IRQ_WORK
        select HAVE_PERF_EVENTS
 
 config ZONE_DMA
index f470975..4ff2fb1 100644 (file)
@@ -5,4 +5,4 @@
 lib-y := \
        __ashldi3.o __lshrdi3.o __muldi3.o __ashrdi3.o __negdi2.o __ucmpdi2.o \
        checksum.o memcpy.o memset.o atomic-ops.o atomic64-ops.o \
-       outsl_ns.o outsl_sw.o insl_ns.o insl_sw.o cache.o perf_event.o
+       outsl_ns.o outsl_sw.o insl_ns.o insl_sw.o cache.o
diff --git a/arch/frv/lib/perf_event.c b/arch/frv/lib/perf_event.c
deleted file mode 100644 (file)
index 9ac5acf..0000000
+++ /dev/null
@@ -1,19 +0,0 @@
-/* Performance event handling
- *
- * Copyright (C) 2009 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public Licence
- * as published by the Free Software Foundation; either version
- * 2 of the Licence, or (at your option) any later version.
- */
-
-#include <linux/perf_event.h>
-
-/*
- * mark the performance event as pending
- */
-void set_perf_event_pending(void)
-{
-}
index d514cd9..8fb7d33 100644 (file)
@@ -6,12 +6,6 @@
  *     David Mosberger-Tang <davidm@hpl.hp.com>
  */
 
-
-#include <linux/threads.h>
-#include <linux/irq.h>
-
-#include <asm/processor.h>
-
 /*
  * No irq_cpustat_t for IA-64.  The data is held in the per-CPU data structure.
  */
 
 #define local_softirq_pending()                (local_cpu_data->softirq_pending)
 
+#include <linux/threads.h>
+#include <linux/irq.h>
+
+#include <asm/processor.h>
+
 extern void __iomem *ipi_base_addr;
 
 void ack_bad_irq(unsigned int irq);
index 907417d..79a04a9 100644 (file)
@@ -16,6 +16,7 @@ config PARISC
        select RTC_DRV_GENERIC
        select INIT_ALL_POSSIBLE
        select BUG
+       select HAVE_IRQ_WORK
        select HAVE_PERF_EVENTS
        select GENERIC_ATOMIC64 if !64BIT
        help
index cc14642..1e0fd8b 100644 (file)
@@ -1,7 +1,6 @@
 #ifndef __ASM_PARISC_PERF_EVENT_H
 #define __ASM_PARISC_PERF_EVENT_H
 
-/* parisc only supports software events through this interface. */
-static inline void set_perf_event_pending(void) { }
+/* Empty, just to avoid compiling error */
 
 #endif /* __ASM_PARISC_PERF_EVENT_H */
index 631e5a0..4b1e521 100644 (file)
@@ -138,6 +138,7 @@ config PPC
        select HAVE_OPROFILE
        select HAVE_SYSCALL_WRAPPERS if PPC64
        select GENERIC_ATOMIC64 if PPC32
+       select HAVE_IRQ_WORK
        select HAVE_PERF_EVENTS
        select HAVE_REGS_AND_STACK_ACCESS_API
        select HAVE_HW_BREAKPOINT if PERF_EVENTS && PPC_BOOK3S_64
index 1ff6662..9b287fd 100644 (file)
@@ -129,7 +129,7 @@ struct paca_struct {
        u8 soft_enabled;                /* irq soft-enable flag */
        u8 hard_enabled;                /* set if irqs are enabled in MSR */
        u8 io_sync;                     /* writel() needs spin_unlock sync */
-       u8 perf_event_pending;          /* PM interrupt while soft-disabled */
+       u8 irq_work_pending;            /* IRQ_WORK interrupt while soft-disable */
 
        /* Stuff for accurate time accounting */
        u64 user_time;                  /* accumulated usermode TB ticks */
index 95ad9da..d05ae42 100644 (file)
 #include "ppc32.h"
 #endif
 
-/*
- * Store another value in a callchain_entry.
- */
-static inline void callchain_store(struct perf_callchain_entry *entry, u64 ip)
-{
-       unsigned int nr = entry->nr;
-
-       if (nr < PERF_MAX_STACK_DEPTH) {
-               entry->ip[nr] = ip;
-               entry->nr = nr + 1;
-       }
-}
 
 /*
  * Is sp valid as the address of the next kernel stack frame after prev_sp?
@@ -58,8 +46,8 @@ static int valid_next_sp(unsigned long sp, unsigned long prev_sp)
        return 0;
 }
 
-static void perf_callchain_kernel(struct pt_regs *regs,
-                                 struct perf_callchain_entry *entry)
+void
+perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs)
 {
        unsigned long sp, next_sp;
        unsigned long next_ip;
@@ -69,8 +57,7 @@ static void perf_callchain_kernel(struct pt_regs *regs,
 
        lr = regs->link;
        sp = regs->gpr[1];
-       callchain_store(entry, PERF_CONTEXT_KERNEL);
-       callchain_store(entry, regs->nip);
+       perf_callchain_store(entry, regs->nip);
 
        if (!validate_sp(sp, current, STACK_FRAME_OVERHEAD))
                return;
@@ -89,7 +76,7 @@ static void perf_callchain_kernel(struct pt_regs *regs,
                        next_ip = regs->nip;
                        lr = regs->link;
                        level = 0;
-                       callchain_store(entry, PERF_CONTEXT_KERNEL);
+                       perf_callchain_store(entry, PERF_CONTEXT_KERNEL);
 
                } else {
                        if (level == 0)
@@ -111,7 +98,7 @@ static void perf_callchain_kernel(struct pt_regs *regs,
                        ++level;
                }
 
-               callchain_store(entry, next_ip);
+               perf_callchain_store(entry, next_ip);
                if (!valid_next_sp(next_sp, sp))
                        return;
                sp = next_sp;
@@ -233,8 +220,8 @@ static int sane_signal_64_frame(unsigned long sp)
                puc == (unsigned long) &sf->uc;
 }
 
-static void perf_callchain_user_64(struct pt_regs *regs,
-                                  struct perf_callchain_entry *entry)
+static void perf_callchain_user_64(struct perf_callchain_entry *entry,
+                                  struct pt_regs *regs)
 {
        unsigned long sp, next_sp;
        unsigned long next_ip;
@@ -246,8 +233,7 @@ static void perf_callchain_user_64(struct pt_regs *regs,
        next_ip = regs->nip;
        lr = regs->link;
        sp = regs->gpr[1];
-       callchain_store(entry, PERF_CONTEXT_USER);
-       callchain_store(entry, next_ip);
+       perf_callchain_store(entry, next_ip);
 
        for (;;) {
                fp = (unsigned long __user *) sp;
@@ -276,14 +262,14 @@ static void perf_callchain_user_64(struct pt_regs *regs,
                            read_user_stack_64(&uregs[PT_R1], &sp))
                                return;
                        level = 0;
-                       callchain_store(entry, PERF_CONTEXT_USER);
-                       callchain_store(entry, next_ip);
+                       perf_callchain_store(entry, PERF_CONTEXT_USER);
+                       perf_callchain_store(entry, next_ip);
                        continue;
                }
 
                if (level == 0)
                        next_ip = lr;
-               callchain_store(entry, next_ip);
+               perf_callchain_store(entry, next_ip);
                ++level;
                sp = next_sp;
        }
@@ -315,8 +301,8 @@ static int read_user_stack_32(unsigned int __user *ptr, unsigned int *ret)
        return __get_user_inatomic(*ret, ptr);
 }
 
-static inline void perf_callchain_user_64(struct pt_regs *regs,
-                                         struct perf_callchain_entry *entry)
+static inline void perf_callchain_user_64(struct perf_callchain_entry *entry,
+                                         struct pt_regs *regs)
 {
 }
 
@@ -435,8 +421,8 @@ static unsigned int __user *signal_frame_32_regs(unsigned int sp,
        return mctx->mc_gregs;
 }
 
-static void perf_callchain_user_32(struct pt_regs *regs,
-                                  struct perf_callchain_entry *entry)
+static void perf_callchain_user_32(struct perf_callchain_entry *entry,
+                                  struct pt_regs *regs)
 {
        unsigned int sp, next_sp;
        unsigned int next_ip;
@@ -447,8 +433,7 @@ static void perf_callchain_user_32(struct pt_regs *regs,
        next_ip = regs->nip;
        lr = regs->link;
        sp = regs->gpr[1];
-       callchain_store(entry, PERF_CONTEXT_USER);
-       callchain_store(entry, next_ip);
+       perf_callchain_store(entry, next_ip);
 
        while (entry->nr < PERF_MAX_STACK_DEPTH) {
                fp = (unsigned int __user *) (unsigned long) sp;
@@ -470,45 +455,24 @@ static void perf_callchain_user_32(struct pt_regs *regs,
                            read_user_stack_32(&uregs[PT_R1], &sp))
                                return;
                        level = 0;
-                       callchain_store(entry, PERF_CONTEXT_USER);
-                       callchain_store(entry, next_ip);
+                       perf_callchain_store(entry, PERF_CONTEXT_USER);
+                       perf_callchain_store(entry, next_ip);
                        continue;
                }
 
                if (level == 0)
                        next_ip = lr;
-               callchain_store(entry, next_ip);
+               perf_callchain_store(entry, next_ip);
                ++level;
                sp = next_sp;
        }
 }
 
-/*
- * Since we can't get PMU interrupts inside a PMU interrupt handler,
- * we don't need separate irq and nmi entries here.
- */
-static DEFINE_PER_CPU(struct perf_callchain_entry, cpu_perf_callchain);
-
-struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
+void
+perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
 {
-       struct perf_callchain_entry *entry = &__get_cpu_var(cpu_perf_callchain);
-
-       entry->nr = 0;
-
-       if (!user_mode(regs)) {
-               perf_callchain_kernel(regs, entry);
-               if (current->mm)
-                       regs = task_pt_regs(current);
-               else
-                       regs = NULL;
-       }
-
-       if (regs) {
-               if (current_is_64bit())
-                       perf_callchain_user_64(regs, entry);
-               else
-                       perf_callchain_user_32(regs, entry);
-       }
-
-       return entry;
+       if (current_is_64bit())
+               perf_callchain_user_64(entry, regs);
+       else
+               perf_callchain_user_32(entry, regs);
 }
index d301a30..3129c85 100644 (file)
@@ -402,6 +402,9 @@ static void power_pmu_read(struct perf_event *event)
 {
        s64 val, delta, prev;
 
+       if (event->hw.state & PERF_HES_STOPPED)
+               return;
+
        if (!event->hw.idx)
                return;
        /*
@@ -517,7 +520,7 @@ static void write_mmcr0(struct cpu_hw_events *cpuhw, unsigned long mmcr0)
  * Disable all events to prevent PMU interrupts and to allow
  * events to be added or removed.
  */
-void hw_perf_disable(void)
+static void power_pmu_disable(struct pmu *pmu)
 {
        struct cpu_hw_events *cpuhw;
        unsigned long flags;
@@ -565,7 +568,7 @@ void hw_perf_disable(void)
  * If we were previously disabled and events were added, then
  * put the new config on the PMU.
  */
-void hw_perf_enable(void)
+static void power_pmu_enable(struct pmu *pmu)
 {
        struct perf_event *event;
        struct cpu_hw_events *cpuhw;
@@ -672,6 +675,8 @@ void hw_perf_enable(void)
                }
                local64_set(&event->hw.prev_count, val);
                event->hw.idx = idx;
+               if (event->hw.state & PERF_HES_STOPPED)
+                       val = 0;
                write_pmc(idx, val);
                perf_event_update_userpage(event);
        }
@@ -727,7 +732,7 @@ static int collect_events(struct perf_event *group, int max_count,
  * re-enable the PMU in order to get hw_perf_enable to do the
  * actual work of reconfiguring the PMU.
  */
-static int power_pmu_enable(struct perf_event *event)
+static int power_pmu_add(struct perf_event *event, int ef_flags)
 {
        struct cpu_hw_events *cpuhw;
        unsigned long flags;
@@ -735,7 +740,7 @@ static int power_pmu_enable(struct perf_event *event)
        int ret = -EAGAIN;
 
        local_irq_save(flags);
-       perf_disable();
+       perf_pmu_disable(event->pmu);
 
        /*
         * Add the event to the list (if there is room)
@@ -749,6 +754,9 @@ static int power_pmu_enable(struct perf_event *event)
        cpuhw->events[n0] = event->hw.config;
        cpuhw->flags[n0] = event->hw.event_base;
 
+       if (!(ef_flags & PERF_EF_START))
+               event->hw.state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
+
        /*
         * If group events scheduling transaction was started,
         * skip the schedulability test here, it will be peformed
@@ -769,7 +777,7 @@ nocheck:
 
        ret = 0;
  out:
-       perf_enable();
+       perf_pmu_enable(event->pmu);
        local_irq_restore(flags);
        return ret;
 }
@@ -777,14 +785,14 @@ nocheck:
 /*
  * Remove a event from the PMU.
  */
-static void power_pmu_disable(struct perf_event *event)
+static void power_pmu_del(struct perf_event *event, int ef_flags)
 {
        struct cpu_hw_events *cpuhw;
        long i;
        unsigned long flags;
 
        local_irq_save(flags);
-       perf_disable();
+       perf_pmu_disable(event->pmu);
 
        power_pmu_read(event);
 
@@ -821,34 +829,60 @@ static void power_pmu_disable(struct perf_event *event)
                cpuhw->mmcr[0] &= ~(MMCR0_PMXE | MMCR0_FCECE);
        }
 
-       perf_enable();
+       perf_pmu_enable(event->pmu);
        local_irq_restore(flags);
 }
 
 /*
- * Re-enable interrupts on a event after they were throttled
- * because they were coming too fast.
+ * POWER-PMU does not support disabling individual counters, hence
+ * program their cycle counter to their max value and ignore the interrupts.
  */
-static void power_pmu_unthrottle(struct perf_event *event)
+
+static void power_pmu_start(struct perf_event *event, int ef_flags)
+{
+       unsigned long flags;
+       s64 left;
+
+       if (!event->hw.idx || !event->hw.sample_period)
+               return;
+
+       if (!(event->hw.state & PERF_HES_STOPPED))
+               return;
+
+       if (ef_flags & PERF_EF_RELOAD)
+               WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
+
+       local_irq_save(flags);
+       perf_pmu_disable(event->pmu);
+
+       event->hw.state = 0;
+       left = local64_read(&event->hw.period_left);
+       write_pmc(event->hw.idx, left);
+
+       perf_event_update_userpage(event);
+       perf_pmu_enable(event->pmu);
+       local_irq_restore(flags);
+}
+
+static void power_pmu_stop(struct perf_event *event, int ef_flags)
 {
-       s64 val, left;
        unsigned long flags;
 
        if (!event->hw.idx || !event->hw.sample_period)
                return;
+
+       if (event->hw.state & PERF_HES_STOPPED)
+               return;
+
        local_irq_save(flags);
-       perf_disable();
+       perf_pmu_disable(event->pmu);
+
        power_pmu_read(event);
-       left = event->hw.sample_period;
-       event->hw.last_period = left;
-       val = 0;
-       if (left < 0x80000000L)
-               val = 0x80000000L - left;
-       write_pmc(event->hw.idx, val);
-       local64_set(&event->hw.prev_count, val);
-       local64_set(&event->hw.period_left, left);
+       event->hw.state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
+       write_pmc(event->hw.idx, 0);
+
        perf_event_update_userpage(event);
-       perf_enable();
+       perf_pmu_enable(event->pmu);
        local_irq_restore(flags);
 }
 
@@ -857,10 +891,11 @@ static void power_pmu_unthrottle(struct perf_event *event)
  * Set the flag to make pmu::enable() not perform the
  * schedulability test, it will be performed at commit time
  */
-void power_pmu_start_txn(const struct pmu *pmu)
+void power_pmu_start_txn(struct pmu *pmu)
 {
        struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
 
+       perf_pmu_disable(pmu);
        cpuhw->group_flag |= PERF_EVENT_TXN;
        cpuhw->n_txn_start = cpuhw->n_events;
 }
@@ -870,11 +905,12 @@ void power_pmu_start_txn(const struct pmu *pmu)
  * Clear the flag and pmu::enable() will perform the
  * schedulability test.
  */
-void power_pmu_cancel_txn(const struct pmu *pmu)
+void power_pmu_cancel_txn(struct pmu *pmu)
 {
        struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
 
        cpuhw->group_flag &= ~PERF_EVENT_TXN;
+       perf_pmu_enable(pmu);
 }
 
 /*
@@ -882,7 +918,7 @@ void power_pmu_cancel_txn(const struct pmu *pmu)
  * Perform the group schedulability test as a whole
  * Return 0 if success
  */
-int power_pmu_commit_txn(const struct pmu *pmu)
+int power_pmu_commit_txn(struct pmu *pmu)
 {
        struct cpu_hw_events *cpuhw;
        long i, n;
@@ -901,19 +937,10 @@ int power_pmu_commit_txn(const struct pmu *pmu)
                cpuhw->event[i]->hw.config = cpuhw->events[i];
 
        cpuhw->group_flag &= ~PERF_EVENT_TXN;
+       perf_pmu_enable(pmu);
        return 0;
 }
 
-struct pmu power_pmu = {
-       .enable         = power_pmu_enable,
-       .disable        = power_pmu_disable,
-       .read           = power_pmu_read,
-       .unthrottle     = power_pmu_unthrottle,
-       .start_txn      = power_pmu_start_txn,
-       .cancel_txn     = power_pmu_cancel_txn,
-       .commit_txn     = power_pmu_commit_txn,
-};
-
 /*
  * Return 1 if we might be able to put event on a limited PMC,
  * or 0 if not.
@@ -1014,7 +1041,7 @@ static int hw_perf_cache_event(u64 config, u64 *eventp)
        return 0;
 }
 
-const struct pmu *hw_perf_event_init(struct perf_event *event)
+static int power_pmu_event_init(struct perf_event *event)
 {
        u64 ev;
        unsigned long flags;
@@ -1026,25 +1053,27 @@ const struct pmu *hw_perf_event_init(struct perf_event *event)
        struct cpu_hw_events *cpuhw;
 
        if (!ppmu)
-               return ERR_PTR(-ENXIO);
+               return -ENOENT;
+
        switch (event->attr.type) {
        case PERF_TYPE_HARDWARE:
                ev = event->attr.config;
                if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0)
-                       return ERR_PTR(-EOPNOTSUPP);
+                       return -EOPNOTSUPP;
                ev = ppmu->generic_events[ev];
                break;
        case PERF_TYPE_HW_CACHE:
                err = hw_perf_cache_event(event->attr.config, &ev);
                if (err)
-                       return ERR_PTR(err);
+                       return err;
                break;
        case PERF_TYPE_RAW:
                ev = event->attr.config;
                break;
        default:
-               return ERR_PTR(-EINVAL);
+               return -ENOENT;
        }
+
        event->hw.config_base = ev;
        event->hw.idx = 0;
 
@@ -1063,7 +1092,7 @@ const struct pmu *hw_perf_event_init(struct perf_event *event)
         * XXX we should check if the task is an idle task.
         */
        flags = 0;
-       if (event->ctx->task)
+       if (event->attach_state & PERF_ATTACH_TASK)
                flags |= PPMU_ONLY_COUNT_RUN;
 
        /*
@@ -1081,7 +1110,7 @@ const struct pmu *hw_perf_event_init(struct perf_event *event)
                         */
                        ev = normal_pmc_alternative(ev, flags);
                        if (!ev)
-                               return ERR_PTR(-EINVAL);
+                               return -EINVAL;
                }
        }
 
@@ -1095,19 +1124,19 @@ const struct pmu *hw_perf_event_init(struct perf_event *event)
                n = collect_events(event->group_leader, ppmu->n_counter - 1,
                                   ctrs, events, cflags);
                if (n < 0)
-                       return ERR_PTR(-EINVAL);
+                       return -EINVAL;
        }
        events[n] = ev;
        ctrs[n] = event;
        cflags[n] = flags;
        if (check_excludes(ctrs, cflags, n, 1))
-               return ERR_PTR(-EINVAL);
+               return -EINVAL;
 
        cpuhw = &get_cpu_var(cpu_hw_events);
        err = power_check_constraints(cpuhw, events, cflags, n + 1);
        put_cpu_var(cpu_hw_events);
        if (err)
-               return ERR_PTR(-EINVAL);
+               return -EINVAL;
 
        event->hw.config = events[n];
        event->hw.event_base = cflags[n];
@@ -1132,11 +1161,23 @@ const struct pmu *hw_perf_event_init(struct perf_event *event)
        }
        event->destroy = hw_perf_event_destroy;
 
-       if (err)
-               return ERR_PTR(err);
-       return &power_pmu;
+       return err;
 }
 
+struct pmu power_pmu = {
+       .pmu_enable     = power_pmu_enable,
+       .pmu_disable    = power_pmu_disable,
+       .event_init     = power_pmu_event_init,
+       .add            = power_pmu_add,
+       .del            = power_pmu_del,
+       .start          = power_pmu_start,
+       .stop           = power_pmu_stop,
+       .read           = power_pmu_read,
+       .start_txn      = power_pmu_start_txn,
+       .cancel_txn     = power_pmu_cancel_txn,
+       .commit_txn     = power_pmu_commit_txn,
+};
+
 /*
  * A counter has overflowed; update its count and record
  * things if requested.  Note that interrupts are hard-disabled
@@ -1149,6 +1190,11 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
        s64 prev, delta, left;
        int record = 0;
 
+       if (event->hw.state & PERF_HES_STOPPED) {
+               write_pmc(event->hw.idx, 0);
+               return;
+       }
+
        /* we don't have to worry about interrupts here */
        prev = local64_read(&event->hw.prev_count);
        delta = (val - prev) & 0xfffffffful;
@@ -1171,6 +1217,11 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
                        val = 0x80000000LL - left;
        }
 
+       write_pmc(event->hw.idx, val);
+       local64_set(&event->hw.prev_count, val);
+       local64_set(&event->hw.period_left, left);
+       perf_event_update_userpage(event);
+
        /*
         * Finally record data if requested.
         */
@@ -1183,23 +1234,9 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
                if (event->attr.sample_type & PERF_SAMPLE_ADDR)
                        perf_get_data_addr(regs, &data.addr);
 
-               if (perf_event_overflow(event, nmi, &data, regs)) {
-                       /*
-                        * Interrupts are coming too fast - throttle them
-                        * by setting the event to 0, so it will be
-                        * at least 2^30 cycles until the next interrupt
-                        * (assuming each event counts at most 2 counts
-                        * per cycle).
-                        */
-                       val = 0;
-                       left = ~0ULL >> 1;
-               }
+               if (perf_event_overflow(event, nmi, &data, regs))
+                       power_pmu_stop(event, 0);
        }
-
-       write_pmc(event->hw.idx, val);
-       local64_set(&event->hw.prev_count, val);
-       local64_set(&event->hw.period_left, left);
-       perf_event_update_userpage(event);
 }
 
 /*
@@ -1342,6 +1379,7 @@ int register_power_pmu(struct power_pmu *pmu)
                freeze_events_kernel = MMCR0_FCHV;
 #endif /* CONFIG_PPC64 */
 
+       perf_pmu_register(&power_pmu);
        perf_cpu_notifier(power_pmu_notifier);
 
        return 0;
index 1ba4547..7ecca59 100644 (file)
@@ -156,6 +156,9 @@ static void fsl_emb_pmu_read(struct perf_event *event)
 {
        s64 val, delta, prev;
 
+       if (event->hw.state & PERF_HES_STOPPED)
+               return;
+
        /*
         * Performance monitor interrupts come even when interrupts
         * are soft-disabled, as long as interrupts are hard-enabled.
@@ -177,7 +180,7 @@ static void fsl_emb_pmu_read(struct perf_event *event)
  * Disable all events to prevent PMU interrupts and to allow
  * events to be added or removed.
  */
-void hw_perf_disable(void)
+static void fsl_emb_pmu_disable(struct pmu *pmu)
 {
        struct cpu_hw_events *cpuhw;
        unsigned long flags;
@@ -216,7 +219,7 @@ void hw_perf_disable(void)
  * If we were previously disabled and events were added, then
  * put the new config on the PMU.
  */
-void hw_perf_enable(void)
+static void fsl_emb_pmu_enable(struct pmu *pmu)
 {
        struct cpu_hw_events *cpuhw;
        unsigned long flags;
@@ -262,8 +265,8 @@ static int collect_events(struct perf_event *group, int max_count,
        return n;
 }
 
-/* perf must be disabled, context locked on entry */
-static int fsl_emb_pmu_enable(struct perf_event *event)
+/* context locked on entry */
+static int fsl_emb_pmu_add(struct perf_event *event, int flags)
 {
        struct cpu_hw_events *cpuhw;
        int ret = -EAGAIN;
@@ -271,6 +274,7 @@ static int fsl_emb_pmu_enable(struct perf_event *event)
        u64 val;
        int i;
 
+       perf_pmu_disable(event->pmu);
        cpuhw = &get_cpu_var(cpu_hw_events);
 
        if (event->hw.config & FSL_EMB_EVENT_RESTRICTED)
@@ -301,6 +305,12 @@ static int fsl_emb_pmu_enable(struct perf_event *event)
                        val = 0x80000000L - left;
        }
        local64_set(&event->hw.prev_count, val);
+
+       if (!(flags & PERF_EF_START)) {
+               event->hw.state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
+               val = 0;
+       }
+
        write_pmc(i, val);
        perf_event_update_userpage(event);
 
@@ -310,15 +320,17 @@ static int fsl_emb_pmu_enable(struct perf_event *event)
        ret = 0;
  out:
        put_cpu_var(cpu_hw_events);
+       perf_pmu_enable(event->pmu);
        return ret;
 }
 
-/* perf must be disabled, context locked on entry */
-static void fsl_emb_pmu_disable(struct perf_event *event)
+/* context locked on entry */
+static void fsl_emb_pmu_del(struct perf_event *event, int flags)
 {
        struct cpu_hw_events *cpuhw;
        int i = event->hw.idx;
 
+       perf_pmu_disable(event->pmu);
        if (i < 0)
                goto out;
 
@@ -346,44 +358,57 @@ static void fsl_emb_pmu_disable(struct perf_event *event)
        cpuhw->n_events--;
 
  out:
+       perf_pmu_enable(event->pmu);
        put_cpu_var(cpu_hw_events);
 }
 
-/*
- * Re-enable interrupts on a event after they were throttled
- * because they were coming too fast.
- *
- * Context is locked on entry, but perf is not disabled.
- */
-static void fsl_emb_pmu_unthrottle(struct perf_event *event)
+static void fsl_emb_pmu_start(struct perf_event *event, int ef_flags)
 {
-       s64 val, left;
        unsigned long flags;
+       s64 left;
 
        if (event->hw.idx < 0 || !event->hw.sample_period)
                return;
+
+       if (!(event->hw.state & PERF_HES_STOPPED))
+               return;
+
+       if (ef_flags & PERF_EF_RELOAD)
+               WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
+
        local_irq_save(flags);
-       perf_disable();
-       fsl_emb_pmu_read(event);
-       left = event->hw.sample_period;
-       event->hw.last_period = left;
-       val = 0;
-       if (left < 0x80000000L)
-               val = 0x80000000L - left;
-       write_pmc(event->hw.idx, val);
-       local64_set(&event->hw.prev_count, val);
-       local64_set(&event->hw.period_left, left);
+       perf_pmu_disable(event->pmu);
+
+       event->hw.state = 0;
+       left = local64_read(&event->hw.period_left);
+       write_pmc(event->hw.idx, left);
+
        perf_event_update_userpage(event);
-       perf_enable();
+       perf_pmu_enable(event->pmu);
        local_irq_restore(flags);
 }
 
-static struct pmu fsl_emb_pmu = {
-       .enable         = fsl_emb_pmu_enable,
-       .disable        = fsl_emb_pmu_disable,
-       .read           = fsl_emb_pmu_read,
-       .unthrottle     = fsl_emb_pmu_unthrottle,
-};
+static void fsl_emb_pmu_stop(struct perf_event *event, int ef_flags)
+{
+       unsigned long flags;
+
+       if (event->hw.idx < 0 || !event->hw.sample_period)
+               return;
+
+       if (event->hw.state & PERF_HES_STOPPED)
+               return;
+
+       local_irq_save(flags);
+       perf_pmu_disable(event->pmu);
+
+       fsl_emb_pmu_read(event);
+       event->hw.state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
+       write_pmc(event->hw.idx, 0);
+
+       perf_event_update_userpage(event);
+       perf_pmu_enable(event->pmu);
+       local_irq_restore(flags);
+}
 
 /*
  * Release the PMU if this is the last perf_event.
@@ -428,7 +453,7 @@ static int hw_perf_cache_event(u64 config, u64 *eventp)
        return 0;
 }
 
-const struct pmu *hw_perf_event_init(struct perf_event *event)
+static int fsl_emb_pmu_event_init(struct perf_event *event)
 {
        u64 ev;
        struct perf_event *events[MAX_HWEVENTS];
@@ -441,14 +466,14 @@ const struct pmu *hw_perf_event_init(struct perf_event *event)
        case PERF_TYPE_HARDWARE:
                ev = event->attr.config;
                if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0)
-                       return ERR_PTR(-EOPNOTSUPP);
+                       return -EOPNOTSUPP;
                ev = ppmu->generic_events[ev];
                break;
 
        case PERF_TYPE_HW_CACHE:
                err = hw_perf_cache_event(event->attr.config, &ev);
                if (err)
-                       return ERR_PTR(err);
+                       return err;
                break;
 
        case PERF_TYPE_RAW:
@@ -456,12 +481,12 @@ const struct pmu *hw_perf_event_init(struct perf_event *event)
                break;
 
        default:
-               return ERR_PTR(-EINVAL);
+               return -ENOENT;
        }
 
        event->hw.config = ppmu->xlate_event(ev);
        if (!(event->hw.config & FSL_EMB_EVENT_VALID))
-               return ERR_PTR(-EINVAL);
+               return -EINVAL;
 
        /*
         * If this is in a group, check if it can go on with all the
@@ -473,7 +498,7 @@ const struct pmu *hw_perf_event_init(struct perf_event *event)
                n = collect_events(event->group_leader,
                                   ppmu->n_counter - 1, events);
                if (n < 0)
-                       return ERR_PTR(-EINVAL);
+                       return -EINVAL;
        }
 
        if (event->hw.config & FSL_EMB_EVENT_RESTRICTED) {
@@ -484,7 +509,7 @@ const struct pmu *hw_perf_event_init(struct perf_event *event)
                }
 
                if (num_restricted >= ppmu->n_restricted)
-                       return ERR_PTR(-EINVAL);
+                       return -EINVAL;
        }
 
        event->hw.idx = -1;
@@ -497,7 +522,7 @@ const struct pmu *hw_perf_event_init(struct perf_event *event)
        if (event->attr.exclude_kernel)
                event->hw.config_base |= PMLCA_FCS;
        if (event->attr.exclude_idle)
-               return ERR_PTR(-ENOTSUPP);
+               return -ENOTSUPP;
 
        event->hw.last_period = event->hw.sample_period;
        local64_set(&event->hw.period_left, event->hw.last_period);
@@ -523,11 +548,20 @@ const struct pmu *hw_perf_event_init(struct perf_event *event)
        }
        event->destroy = hw_perf_event_destroy;
 
-       if (err)
-               return ERR_PTR(err);
-       return &fsl_emb_pmu;
+       return err;
 }
 
+static struct pmu fsl_emb_pmu = {
+       .pmu_enable     = fsl_emb_pmu_enable,
+       .pmu_disable    = fsl_emb_pmu_disable,
+       .event_init     = fsl_emb_pmu_event_init,
+       .add            = fsl_emb_pmu_add,
+       .del            = fsl_emb_pmu_del,
+       .start          = fsl_emb_pmu_start,
+       .stop           = fsl_emb_pmu_stop,
+       .read           = fsl_emb_pmu_read,
+};
+
 /*
  * A counter has overflowed; update its count and record
  * things if requested.  Note that interrupts are hard-disabled
@@ -540,6 +574,11 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
        s64 prev, delta, left;
        int record = 0;
 
+       if (event->hw.state & PERF_HES_STOPPED) {
+               write_pmc(event->hw.idx, 0);
+               return;
+       }
+
        /* we don't have to worry about interrupts here */
        prev = local64_read(&event->hw.prev_count);
        delta = (val - prev) & 0xfffffffful;
@@ -562,6 +601,11 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
                        val = 0x80000000LL - left;
        }
 
+       write_pmc(event->hw.idx, val);
+       local64_set(&event->hw.prev_count, val);
+       local64_set(&event->hw.period_left, left);
+       perf_event_update_userpage(event);
+
        /*
         * Finally record data if requested.
         */
@@ -571,23 +615,9 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
                perf_sample_data_init(&data, 0);
                data.period = event->hw.last_period;
 
-               if (perf_event_overflow(event, nmi, &data, regs)) {
-                       /*
-                        * Interrupts are coming too fast - throttle them
-                        * by setting the event to 0, so it will be
-                        * at least 2^30 cycles until the next interrupt
-                        * (assuming each event counts at most 2 counts
-                        * per cycle).
-                        */
-                       val = 0;
-                       left = ~0ULL >> 1;
-               }
+               if (perf_event_overflow(event, nmi, &data, regs))
+                       fsl_emb_pmu_stop(event, 0);
        }
-
-       write_pmc(event->hw.idx, val);
-       local64_set(&event->hw.prev_count, val);
-       local64_set(&event->hw.period_left, left);
-       perf_event_update_userpage(event);
 }
 
 static void perf_event_interrupt(struct pt_regs *regs)
@@ -651,5 +681,7 @@ int register_fsl_emb_pmu(struct fsl_emb_pmu *pmu)
        pr_info("%s performance monitor hardware support registered\n",
                pmu->name);
 
+       perf_pmu_register(&fsl_emb_pmu);
+
        return 0;
 }
index 8533b3b..54888eb 100644 (file)
@@ -53,7 +53,7 @@
 #include <linux/posix-timers.h>
 #include <linux/irq.h>
 #include <linux/delay.h>
-#include <linux/perf_event.h>
+#include <linux/irq_work.h>
 #include <asm/trace.h>
 
 #include <asm/io.h>
@@ -493,60 +493,60 @@ void __init iSeries_time_init_early(void)
 }
 #endif /* CONFIG_PPC_ISERIES */
 
-#ifdef CONFIG_PERF_EVENTS
+#ifdef CONFIG_IRQ_WORK
 
 /*
  * 64-bit uses a byte in the PACA, 32-bit uses a per-cpu variable...
  */
 #ifdef CONFIG_PPC64
-static inline unsigned long test_perf_event_pending(void)
+static inline unsigned long test_irq_work_pending(void)
 {
        unsigned long x;
 
        asm volatile("lbz %0,%1(13)"
                : "=r" (x)
-               : "i" (offsetof(struct paca_struct, perf_event_pending)));
+               : "i" (offsetof(struct paca_struct, irq_work_pending)));
        return x;
 }
 
-static inline void set_perf_event_pending_flag(void)
+static inline void set_irq_work_pending_flag(void)
 {
        asm volatile("stb %0,%1(13)" : :
                "r" (1),
-               "i" (offsetof(struct paca_struct, perf_event_pending)));
+               "i" (offsetof(struct paca_struct, irq_work_pending)));
 }
 
-static inline void clear_perf_event_pending(void)
+static inline void clear_irq_work_pending(void)
 {
        asm volatile("stb %0,%1(13)" : :
                "r" (0),
-               "i" (offsetof(struct paca_struct, perf_event_pending)));
+               "i" (offsetof(struct paca_struct, irq_work_pending)));
 }
 
 #else /* 32-bit */
 
-DEFINE_PER_CPU(u8, perf_event_pending);
+DEFINE_PER_CPU(u8, irq_work_pending);
 
-#define set_perf_event_pending_flag()  __get_cpu_var(perf_event_pending) = 1
-#define test_perf_event_pending()      __get_cpu_var(perf_event_pending)
-#define clear_perf_event_pending()     __get_cpu_var(perf_event_pending) = 0
+#define set_irq_work_pending_flag()    __get_cpu_var(irq_work_pending) = 1
+#define test_irq_work_pending()                __get_cpu_var(irq_work_pending)
+#define clear_irq_work_pending()       __get_cpu_var(irq_work_pending) = 0
 
 #endif /* 32 vs 64 bit */
 
-void set_perf_event_pending(void)
+void set_irq_work_pending(void)
 {
        preempt_disable();
-       set_perf_event_pending_flag();
+       set_irq_work_pending_flag();
        set_dec(1);
        preempt_enable();
 }
 
-#else  /* CONFIG_PERF_EVENTS */
+#else  /* CONFIG_IRQ_WORK */
 
-#define test_perf_event_pending()      0
-#define clear_perf_event_pending()
+#define test_irq_work_pending()        0
+#define clear_irq_work_pending()
 
-#endif /* CONFIG_PERF_EVENTS */
+#endif /* CONFIG_IRQ_WORK */
 
 /*
  * For iSeries shared processors, we have to let the hypervisor
@@ -587,9 +587,9 @@ void timer_interrupt(struct pt_regs * regs)
 
        calculate_steal_time();
 
-       if (test_perf_event_pending()) {
-               clear_perf_event_pending();
-               perf_event_do_pending();
+       if (test_irq_work_pending()) {
+               clear_irq_work_pending();
+               irq_work_run();
        }
 
 #ifdef CONFIG_PPC_ISERIES
index f0777a4..958f0da 100644 (file)
@@ -95,6 +95,7 @@ config S390
        select HAVE_KVM if 64BIT
        select HAVE_ARCH_TRACEHOOK
        select INIT_ALL_POSSIBLE
+       select HAVE_IRQ_WORK
        select HAVE_PERF_EVENTS
        select HAVE_KERNEL_GZIP
        select HAVE_KERNEL_BZIP2
index 498bc38..881d945 100644 (file)
 #ifndef __ASM_HARDIRQ_H
 #define __ASM_HARDIRQ_H
 
-#include <linux/threads.h>
-#include <linux/sched.h>
-#include <linux/cache.h>
-#include <linux/interrupt.h>
 #include <asm/lowcore.h>
 
 #define local_softirq_pending() (S390_lowcore.softirq_pending)
index 3840cbe..a75f168 100644 (file)
@@ -4,7 +4,6 @@
  * Copyright 2009 Martin Schwidefsky, IBM Corporation.
  */
 
-static inline void set_perf_event_pending(void) {}
-static inline void clear_perf_event_pending(void) {}
+/* Empty, just to avoid compiling error */
 
 #define PERF_EVENT_INDEX_OFFSET 0
index 33990fa..35b6879 100644 (file)
@@ -16,6 +16,7 @@ config SUPERH
        select HAVE_ARCH_TRACEHOOK
        select HAVE_DMA_API_DEBUG
        select HAVE_DMA_ATTRS
+       select HAVE_IRQ_WORK
        select HAVE_PERF_EVENTS
        select PERF_USE_VMALLOC
        select HAVE_KERNEL_GZIP
@@ -249,6 +250,11 @@ config ARCH_SHMOBILE
        select PM
        select PM_RUNTIME
 
+config CPU_HAS_PMU
+       depends on CPU_SH4 || CPU_SH4A
+       default y
+       bool
+
 if SUPERH32
 
 choice
@@ -738,6 +744,14 @@ config GUSA_RB
          LLSC, this should be more efficient than the other alternative of
          disabling interrupts around the atomic sequence.
 
+config HW_PERF_EVENTS
+       bool "Enable hardware performance counter support for perf events"
+       depends on PERF_EVENTS && CPU_HAS_PMU
+       default y
+       help
+         Enable hardware performance counter support for perf events. If
+         disabled, perf events will use software events only.
+
 source "drivers/sh/Kconfig"
 
 endmenu
index 3d0c9f3..14308be 100644 (file)
@@ -26,11 +26,4 @@ extern int register_sh_pmu(struct sh_pmu *);
 extern int reserve_pmc_hardware(void);
 extern void release_pmc_hardware(void);
 
-static inline void set_perf_event_pending(void)
-{
-       /* Nothing to see here, move along. */
-}
-
-#define PERF_EVENT_INDEX_OFFSET        0
-
 #endif /* __ASM_SH_PERF_EVENT_H */
index a9dd3ab..d5ca1ef 100644 (file)
 #include <asm/unwinder.h>
 #include <asm/ptrace.h>
 
-static inline void callchain_store(struct perf_callchain_entry *entry, u64 ip)
-{
-       if (entry->nr < PERF_MAX_STACK_DEPTH)
-               entry->ip[entry->nr++] = ip;
-}
 
 static void callchain_warning(void *data, char *msg)
 {
@@ -39,7 +34,7 @@ static void callchain_address(void *data, unsigned long addr, int reliable)
        struct perf_callchain_entry *entry = data;
 
        if (reliable)
-               callchain_store(entry, addr);
+               perf_callchain_store(entry, addr);
 }
 
 static const struct stacktrace_ops callchain_ops = {
@@ -49,47 +44,10 @@ static const struct stacktrace_ops callchain_ops = {
        .address        = callchain_address,
 };
 
-static void
-perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry)
+void
+perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs)
 {
-       callchain_store(entry, PERF_CONTEXT_KERNEL);
-       callchain_store(entry, regs->pc);
+       perf_callchain_store(entry, regs->pc);
 
        unwind_stack(NULL, regs, NULL, &callchain_ops, entry);
 }
-
-static void
-perf_do_callchain(struct pt_regs *regs, struct perf_callchain_entry *entry)
-{
-       int is_user;
-
-       if (!regs)
-               return;
-
-       is_user = user_mode(regs);
-
-       if (is_user && current->state != TASK_RUNNING)
-               return;
-
-       /*
-        * Only the kernel side is implemented for now.
-        */
-       if (!is_user)
-               perf_callchain_kernel(regs, entry);
-}
-
-/*
- * No need for separate IRQ and NMI entries.
- */
-static DEFINE_PER_CPU(struct perf_callchain_entry, callchain);
-
-struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
-{
-       struct perf_callchain_entry *entry = &__get_cpu_var(callchain);
-
-       entry->nr = 0;
-
-       perf_do_callchain(regs, entry);
-
-       return entry;
-}
index 7a3dc35..5a4b334 100644 (file)
@@ -59,6 +59,24 @@ static inline int sh_pmu_initialized(void)
        return !!sh_pmu;
 }
 
+const char *perf_pmu_name(void)
+{
+       if (!sh_pmu)
+               return NULL;
+
+       return sh_pmu->name;
+}
+EXPORT_SYMBOL_GPL(perf_pmu_name);
+
+int perf_num_counters(void)
+{
+       if (!sh_pmu)
+               return 0;
+
+       return sh_pmu->num_events;
+}
+EXPORT_SYMBOL_GPL(perf_num_counters);
+
 /*
  * Release the PMU if this is the last perf_event.
  */
@@ -206,50 +224,80 @@ again:
        local64_add(delta, &event->count);
 }
 
-static void sh_pmu_disable(struct perf_event *event)
+static void sh_pmu_stop(struct perf_event *event, int flags)
 {
        struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
        struct hw_perf_event *hwc = &event->hw;
        int idx = hwc->idx;
 
-       clear_bit(idx, cpuc->active_mask);
-       sh_pmu->disable(hwc, idx);
+       if (!(event->hw.state & PERF_HES_STOPPED)) {
+               sh_pmu->disable(hwc, idx);
+               cpuc->events[idx] = NULL;
+               event->hw.state |= PERF_HES_STOPPED;
+       }
+
+       if ((flags & PERF_EF_UPDATE) && !(event->hw.state & PERF_HES_UPTODATE)) {
+               sh_perf_event_update(event, &event->hw, idx);
+               event->hw.state |= PERF_HES_UPTODATE;
+       }
+}
+
+static void sh_pmu_start(struct perf_event *event, int flags)
+{
+       struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+       struct hw_perf_event *hwc = &event->hw;
+       int idx = hwc->idx;
+
+       if (WARN_ON_ONCE(idx == -1))
+               return;
+
+       if (flags & PERF_EF_RELOAD)
+               WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
 
-       barrier();
+       cpuc->events[idx] = event;
+       event->hw.state = 0;
+       sh_pmu->enable(hwc, idx);
+}
 
-       sh_perf_event_update(event, &event->hw, idx);
+static void sh_pmu_del(struct perf_event *event, int flags)
+{
+       struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 
-       cpuc->events[idx] = NULL;
-       clear_bit(idx, cpuc->used_mask);
+       sh_pmu_stop(event, PERF_EF_UPDATE);
+       __clear_bit(event->hw.idx, cpuc->used_mask);
 
        perf_event_update_userpage(event);
 }
 
-static int sh_pmu_enable(struct perf_event *event)
+static int sh_pmu_add(struct perf_event *event, int flags)
 {
        struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
        struct hw_perf_event *hwc = &event->hw;
        int idx = hwc->idx;
+       int ret = -EAGAIN;
+
+       perf_pmu_disable(event->pmu);
 
-       if (test_and_set_bit(idx, cpuc->used_mask)) {
+       if (__test_and_set_bit(idx, cpuc->used_mask)) {
                idx = find_first_zero_bit(cpuc->used_mask, sh_pmu->num_events);
                if (idx == sh_pmu->num_events)
-                       return -EAGAIN;
+                       goto out;
 
-               set_bit(idx, cpuc->used_mask);
+               __set_bit(idx, cpuc->used_mask);
                hwc->idx = idx;
        }
 
        sh_pmu->disable(hwc, idx);
 
-       cpuc->events[idx] = event;
-       set_bit(idx, cpuc->active_mask);
-
-       sh_pmu->enable(hwc, idx);
+       event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
+       if (flags & PERF_EF_START)
+               sh_pmu_start(event, PERF_EF_RELOAD);
 
        perf_event_update_userpage(event);
-
-       return 0;
+       ret = 0;
+out:
+       perf_pmu_enable(event->pmu);
+       return ret;
 }
 
 static void sh_pmu_read(struct perf_event *event)
@@ -257,24 +305,56 @@ static void sh_pmu_read(struct perf_event *event)
        sh_perf_event_update(event, &event->hw, event->hw.idx);
 }
 
-static const struct pmu pmu = {
-       .enable         = sh_pmu_enable,
-       .disable        = sh_pmu_disable,
-       .read           = sh_pmu_read,
-};
-
-const struct pmu *hw_perf_event_init(struct perf_event *event)
+static int sh_pmu_event_init(struct perf_event *event)
 {
-       int err = __hw_perf_event_init(event);
+       int err;
+
+       switch (event->attr.type) {
+       case PERF_TYPE_RAW:
+       case PERF_TYPE_HW_CACHE:
+       case PERF_TYPE_HARDWARE:
+               err = __hw_perf_event_init(event);
+               break;
+
+       default:
+               return -ENOENT;
+       }
+
        if (unlikely(err)) {
                if (event->destroy)
                        event->destroy(event);
-               return ERR_PTR(err);
        }
 
-       return &pmu;
+       return err;
+}
+
+static void sh_pmu_enable(struct pmu *pmu)
+{
+       if (!sh_pmu_initialized())
+               return;
+
+       sh_pmu->enable_all();
+}
+
+static void sh_pmu_disable(struct pmu *pmu)
+{
+       if (!sh_pmu_initialized())
+               return;
+
+       sh_pmu->disable_all();
 }
 
+static struct pmu pmu = {
+       .pmu_enable     = sh_pmu_enable,
+       .pmu_disable    = sh_pmu_disable,
+       .event_init     = sh_pmu_event_init,
+       .add            = sh_pmu_add,
+       .del            = sh_pmu_del,
+       .start          = sh_pmu_start,
+       .stop           = sh_pmu_stop,
+       .read           = sh_pmu_read,
+};
+
 static void sh_pmu_setup(int cpu)
 {
        struct cpu_hw_events *cpuhw = &per_cpu(cpu_hw_events, cpu);
@@ -299,32 +379,17 @@ sh_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
        return NOTIFY_OK;
 }
 
-void hw_perf_enable(void)
-{
-       if (!sh_pmu_initialized())
-               return;
-
-       sh_pmu->enable_all();
-}
-
-void hw_perf_disable(void)
-{
-       if (!sh_pmu_initialized())
-               return;
-
-       sh_pmu->disable_all();
-}
-
-int __cpuinit register_sh_pmu(struct sh_pmu *pmu)
+int __cpuinit register_sh_pmu(struct sh_pmu *_pmu)
 {
        if (sh_pmu)
                return -EBUSY;
-       sh_pmu = pmu;
+       sh_pmu = _pmu;
 
-       pr_info("Performance Events: %s support registered\n", pmu->name);
+       pr_info("Performance Events: %s support registered\n", _pmu->name);
 
-       WARN_ON(pmu->num_events > MAX_HWEVENTS);
+       WARN_ON(_pmu->num_events > MAX_HWEVENTS);
 
+       perf_pmu_register(&pmu);
        perf_cpu_notifier(sh_pmu_notifier);
        return 0;
 }
index 4886c5c..e85aae7 100644 (file)
@@ -6,4 +6,8 @@ DRIVER_OBJS = $(addprefix ../../../drivers/oprofile/, \
                oprofilefs.o oprofile_stats.o \
                timer_int.o )
 
+ifeq ($(CONFIG_HW_PERF_EVENTS),y)
+DRIVER_OBJS += $(addprefix ../../../drivers/oprofile/, oprofile_perf.o)
+endif
+
 oprofile-y     := $(DRIVER_OBJS) common.o backtrace.o
index ac60493..e10d893 100644 (file)
 #include <linux/init.h>
 #include <linux/errno.h>
 #include <linux/smp.h>
+#include <linux/perf_event.h>
 #include <asm/processor.h>
-#include "op_impl.h"
-
-static struct op_sh_model *model;
-
-static struct op_counter_config ctr[20];
 
+#ifdef CONFIG_HW_PERF_EVENTS
 extern void sh_backtrace(struct pt_regs * const regs, unsigned int depth);
 
-static int op_sh_setup(void)
-{
-       /* Pre-compute the values to stuff in the hardware registers.  */
-       model->reg_setup(ctr);
-
-       /* Configure the registers on all cpus.  */
-       on_each_cpu(model->cpu_setup, NULL, 1);
-
-        return 0;
-}
-
-static int op_sh_create_files(struct super_block *sb, struct dentry *root)
+char *op_name_from_perf_id(void)
 {
-       int i, ret = 0;
+       const char *pmu;
+       char buf[20];
+       int size;
 
-       for (i = 0; i < model->num_counters; i++) {
-               struct dentry *dir;
-               char buf[4];
+       pmu = perf_pmu_name();
+       if (!pmu)
+               return NULL;
 
-               snprintf(buf, sizeof(buf), "%d", i);
-               dir = oprofilefs_mkdir(sb, root, buf);
+       size = snprintf(buf, sizeof(buf), "sh/%s", pmu);
+       if (size > -1 && size < sizeof(buf))
+               return buf;
 
-               ret |= oprofilefs_create_ulong(sb, dir, "enabled", &ctr[i].enabled);
-               ret |= oprofilefs_create_ulong(sb, dir, "event", &ctr[i].event);
-               ret |= oprofilefs_create_ulong(sb, dir, "kernel", &ctr[i].kernel);
-               ret |= oprofilefs_create_ulong(sb, dir, "user", &ctr[i].user);
-
-               if (model->create_files)
-                       ret |= model->create_files(sb, dir);
-               else
-                       ret |= oprofilefs_create_ulong(sb, dir, "count", &ctr[i].count);
-
-               /* Dummy entries */
-               ret |= oprofilefs_create_ulong(sb, dir, "unit_mask", &ctr[i].unit_mask);
-       }
-
-       return ret;
+       return NULL;
 }
 
-static int op_sh_start(void)
+int __init oprofile_arch_init(struct oprofile_operations *ops)
 {
-       /* Enable performance monitoring for all counters.  */
-       on_each_cpu(model->cpu_start, NULL, 1);
+       ops->backtrace = sh_backtrace;
 
-       return 0;
+       return oprofile_perf_init(ops);
 }
 
-static void op_sh_stop(void)
+void __exit oprofile_arch_exit(void)
 {
-       /* Disable performance monitoring for all counters.  */
-       on_each_cpu(model->cpu_stop, NULL, 1);
+       oprofile_perf_exit();
 }
-
+#else
 int __init oprofile_arch_init(struct oprofile_operations *ops)
 {
-       struct op_sh_model *lmodel = NULL;
-       int ret;
-
-       /*
-        * Always assign the backtrace op. If the counter initialization
-        * fails, we fall back to the timer which will still make use of
-        * this.
-        */
-       ops->backtrace = sh_backtrace;
-
-       /*
-        * XXX
-        *
-        * All of the SH7750/SH-4A counters have been converted to perf,
-        * this infrastructure hook is left for other users until they've
-        * had a chance to convert over, at which point all of this
-        * will be deleted.
-        */
-
-       if (!lmodel)
-               return -ENODEV;
-       if (!(current_cpu_data.flags & CPU_HAS_PERF_COUNTER))
-               return -ENODEV;
-
-       ret = lmodel->init();
-       if (unlikely(ret != 0))
-               return ret;
-
-       model = lmodel;
-
-       ops->setup              = op_sh_setup;
-       ops->create_files       = op_sh_create_files;
-       ops->start              = op_sh_start;
-       ops->stop               = op_sh_stop;
-       ops->cpu_type           = lmodel->cpu_type;
-
-       printk(KERN_INFO "oprofile: using %s performance monitoring.\n",
-              lmodel->cpu_type);
-
-       return 0;
-}
-
-void oprofile_arch_exit(void)
-{
-       if (model && model->exit)
-               model->exit();
+       pr_info("oprofile: hardware counters not available\n");
+       return -ENODEV;
 }
+void __exit oprofile_arch_exit(void) {}
+#endif /* CONFIG_HW_PERF_EVENTS */
diff --git a/arch/sh/oprofile/op_impl.h b/arch/sh/oprofile/op_impl.h
deleted file mode 100644 (file)
index 1244479..0000000
+++ /dev/null
@@ -1,33 +0,0 @@
-#ifndef __OP_IMPL_H
-#define __OP_IMPL_H
-
-/* Per-counter configuration as set via oprofilefs.  */
-struct op_counter_config {
-       unsigned long enabled;
-       unsigned long event;
-
-       unsigned long count;
-
-       /* Dummy values for userspace tool compliance */
-       unsigned long kernel;
-       unsigned long user;
-       unsigned long unit_mask;
-};
-
-/* Per-architecture configury and hooks.  */
-struct op_sh_model {
-       void (*reg_setup)(struct op_counter_config *);
-       int (*create_files)(struct super_block *sb, struct dentry *dir);
-       void (*cpu_setup)(void *dummy);
-       int (*init)(void);
-       void (*exit)(void);
-       void (*cpu_start)(void *args);
-       void (*cpu_stop)(void *args);
-       char *cpu_type;
-       unsigned char num_counters;
-};
-
-/* arch/sh/oprofile/common.c */
-extern void sh_backtrace(struct pt_regs * const regs, unsigned int depth);
-
-#endif /* __OP_IMPL_H */
index 491e9d6..3e9d314 100644 (file)
@@ -26,10 +26,12 @@ config SPARC
        select ARCH_WANT_OPTIONAL_GPIOLIB
        select RTC_CLASS
        select RTC_DRV_M48T59
+       select HAVE_IRQ_WORK
        select HAVE_PERF_EVENTS
        select PERF_USE_VMALLOC
        select HAVE_DMA_ATTRS
        select HAVE_DMA_API_DEBUG
+       select HAVE_ARCH_JUMP_LABEL
 
 config SPARC32
        def_bool !64BIT
@@ -53,6 +55,7 @@ config SPARC64
        select RTC_DRV_BQ4802
        select RTC_DRV_SUN4V
        select RTC_DRV_STARFIRE
+       select HAVE_IRQ_WORK
        select HAVE_PERF_EVENTS
        select PERF_USE_VMALLOC
 
diff --git a/arch/sparc/include/asm/jump_label.h b/arch/sparc/include/asm/jump_label.h
new file mode 100644 (file)
index 0000000..62e66d7
--- /dev/null
@@ -0,0 +1,32 @@
+#ifndef _ASM_SPARC_JUMP_LABEL_H
+#define _ASM_SPARC_JUMP_LABEL_H
+
+#ifdef __KERNEL__
+
+#include <linux/types.h>
+#include <asm/system.h>
+
+#define JUMP_LABEL_NOP_SIZE 4
+
+#define JUMP_LABEL(key, label)                                 \
+       do {                                                    \
+               asm goto("1:\n\t"                               \
+                        "nop\n\t"                              \
+                        "nop\n\t"                              \
+                        ".pushsection __jump_table,  \"a\"\n\t"\
+                        ".word 1b, %l[" #label "], %c0\n\t"    \
+                        ".popsection \n\t"                     \
+                        : :  "i" (key) :  : label);\
+       } while (0)
+
+#endif /* __KERNEL__ */
+
+typedef u32 jump_label_t;
+
+struct jump_entry {
+       jump_label_t code;
+       jump_label_t target;
+       jump_label_t key;
+};
+
+#endif
index 727af70..6e8bfa1 100644 (file)
@@ -1,10 +1,6 @@
 #ifndef __ASM_SPARC_PERF_EVENT_H
 #define __ASM_SPARC_PERF_EVENT_H
 
-extern void set_perf_event_pending(void);
-
-#define        PERF_EVENT_INDEX_OFFSET 0
-
 #ifdef CONFIG_PERF_EVENTS
 #include <asm/ptrace.h>
 
index 0c2dc1f..599398f 100644 (file)
@@ -119,3 +119,5 @@ obj-$(CONFIG_COMPAT)    += $(audit--y)
 
 pc--$(CONFIG_PERF_EVENTS) := perf_event.o
 obj-$(CONFIG_SPARC64)  += $(pc--y)
+
+obj-$(CONFIG_SPARC64)  += jump_label.o
diff --git a/arch/sparc/kernel/jump_label.c b/arch/sparc/kernel/jump_label.c
new file mode 100644 (file)
index 0000000..ea2dafc
--- /dev/null
@@ -0,0 +1,47 @@
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/mutex.h>
+#include <linux/cpu.h>
+
+#include <linux/jump_label.h>
+#include <linux/memory.h>
+
+#ifdef HAVE_JUMP_LABEL
+
+void arch_jump_label_transform(struct jump_entry *entry,
+                              enum jump_label_type type)
+{
+       u32 val;
+       u32 *insn = (u32 *) (unsigned long) entry->code;
+
+       if (type == JUMP_LABEL_ENABLE) {
+               s32 off = (s32)entry->target - (s32)entry->code;
+
+#ifdef CONFIG_SPARC64
+               /* ba,pt %xcc, . + (off << 2) */
+               val = 0x10680000 | ((u32) off >> 2);
+#else
+               /* ba . + (off << 2) */
+               val = 0x10800000 | ((u32) off >> 2);
+#endif
+       } else {
+               val = 0x01000000;
+       }
+
+       get_online_cpus();
+       mutex_lock(&text_mutex);
+       *insn = val;
+       flushi(insn);
+       mutex_unlock(&text_mutex);
+       put_online_cpus();
+}
+
+void arch_jump_label_text_poke_early(jump_label_t addr)
+{
+       u32 *insn_p = (u32 *) (unsigned long) addr;
+
+       *insn_p = 0x01000000;
+       flushi(insn_p);
+}
+
+#endif
index f848aad..ee3c7dd 100644 (file)
@@ -18,6 +18,9 @@
 #include <asm/spitfire.h>
 
 #ifdef CONFIG_SPARC64
+
+#include <linux/jump_label.h>
+
 static void *module_map(unsigned long size)
 {
        struct vm_struct *area;
@@ -227,6 +230,9 @@ int module_finalize(const Elf_Ehdr *hdr,
                    const Elf_Shdr *sechdrs,
                    struct module *me)
 {
+       /* make jump label nops */
+       jump_label_apply_nops(me);
+
        /* Cheetah's I-cache is fully coherent.  */
        if (tlb_type == spitfire) {
                unsigned long va;
index c4a6a50..b87873c 100644 (file)
@@ -7,7 +7,7 @@
 #include <linux/init.h>
 #include <linux/irq.h>
 
-#include <linux/perf_event.h>
+#include <linux/irq_work.h>
 #include <linux/ftrace.h>
 
 #include <asm/pil.h>
@@ -43,14 +43,14 @@ void __irq_entry deferred_pcr_work_irq(int irq, struct pt_regs *regs)
 
        old_regs = set_irq_regs(regs);
        irq_enter();
-#ifdef CONFIG_PERF_EVENTS
-       perf_event_do_pending();
+#ifdef CONFIG_IRQ_WORK
+       irq_work_run();
 #endif
        irq_exit();
        set_irq_regs(old_regs);
 }
 
-void set_perf_event_pending(void)
+void arch_irq_work_raise(void)
 {
        set_softint(1 << PIL_DEFERRED_PCR_WORK);
 }
index 6318e62..0d6deb5 100644 (file)
@@ -658,13 +658,16 @@ static u64 maybe_change_configuration(struct cpu_hw_events *cpuc, u64 pcr)
 
                enc = perf_event_get_enc(cpuc->events[i]);
                pcr &= ~mask_for_index(idx);
-               pcr |= event_encoding(enc, idx);
+               if (hwc->state & PERF_HES_STOPPED)
+                       pcr |= nop_for_index(idx);
+               else
+                       pcr |= event_encoding(enc, idx);
        }
 out:
        return pcr;
 }
 
-void hw_perf_enable(void)
+static void sparc_pmu_enable(struct pmu *pmu)
 {
        struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
        u64 pcr;
@@ -691,7 +694,7 @@ void hw_perf_enable(void)
        pcr_ops->write(cpuc->pcr);
 }
 
-void hw_perf_disable(void)
+static void sparc_pmu_disable(struct pmu *pmu)
 {
        struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
        u64 val;
@@ -710,19 +713,65 @@ void hw_perf_disable(void)
        pcr_ops->write(cpuc->pcr);
 }
 
-static void sparc_pmu_disable(struct perf_event *event)
+static int active_event_index(struct cpu_hw_events *cpuc,
+                             struct perf_event *event)
+{
+       int i;
+
+       for (i = 0; i < cpuc->n_events; i++) {
+               if (cpuc->event[i] == event)
+                       break;
+       }
+       BUG_ON(i == cpuc->n_events);
+       return cpuc->current_idx[i];
+}
+
+static void sparc_pmu_start(struct perf_event *event, int flags)
+{
+       struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+       int idx = active_event_index(cpuc, event);
+
+       if (flags & PERF_EF_RELOAD) {
+               WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
+               sparc_perf_event_set_period(event, &event->hw, idx);
+       }
+
+       event->hw.state = 0;
+
+       sparc_pmu_enable_event(cpuc, &event->hw, idx);
+}
+
+static void sparc_pmu_stop(struct perf_event *event, int flags)
+{
+       struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+       int idx = active_event_index(cpuc, event);
+
+       if (!(event->hw.state & PERF_HES_STOPPED)) {
+               sparc_pmu_disable_event(cpuc, &event->hw, idx);
+               event->hw.state |= PERF_HES_STOPPED;
+       }
+
+       if (!(event->hw.state & PERF_HES_UPTODATE) && (flags & PERF_EF_UPDATE)) {
+               sparc_perf_event_update(event, &event->hw, idx);
+               event->hw.state |= PERF_HES_UPTODATE;
+       }
+}
+
+static void sparc_pmu_del(struct perf_event *event, int _flags)
 {
        struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
-       struct hw_perf_event *hwc = &event->hw;
        unsigned long flags;
        int i;
 
        local_irq_save(flags);
-       perf_disable();
+       perf_pmu_disable(event->pmu);
 
        for (i = 0; i < cpuc->n_events; i++) {
                if (event == cpuc->event[i]) {
-                       int idx = cpuc->current_idx[i];
+                       /* Absorb the final count and turn off the
+                        * event.
+                        */
+                       sparc_pmu_stop(event, PERF_EF_UPDATE);
 
                        /* Shift remaining entries down into
                         * the existing slot.
@@ -734,13 +783,6 @@ static void sparc_pmu_disable(struct perf_event *event)
                                        cpuc->current_idx[i];
                        }
 
-                       /* Absorb the final count and turn off the
-                        * event.
-                        */
-                       sparc_pmu_disable_event(cpuc, hwc, idx);
-                       barrier();
-                       sparc_perf_event_update(event, hwc, idx);
-
                        perf_event_update_userpage(event);
 
                        cpuc->n_events--;
@@ -748,23 +790,10 @@ static void sparc_pmu_disable(struct perf_event *event)
                }
        }
 
-       perf_enable();
+       perf_pmu_enable(event->pmu);
        local_irq_restore(flags);
 }
 
-static int active_event_index(struct cpu_hw_events *cpuc,
-                             struct perf_event *event)
-{
-       int i;
-
-       for (i = 0; i < cpuc->n_events; i++) {
-               if (cpuc->event[i] == event)
-                       break;
-       }
-       BUG_ON(i == cpuc->n_events);
-       return cpuc->current_idx[i];
-}
-
 static void sparc_pmu_read(struct perf_event *event)
 {
        struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
@@ -774,15 +803,6 @@ static void sparc_pmu_read(struct perf_event *event)
        sparc_perf_event_update(event, hwc, idx);
 }
 
-static void sparc_pmu_unthrottle(struct perf_event *event)
-{
-       struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
-       int idx = active_event_index(cpuc, event);
-       struct hw_perf_event *hwc = &event->hw;
-
-       sparc_pmu_enable_event(cpuc, hwc, idx);
-}
-
 static atomic_t active_events = ATOMIC_INIT(0);
 static DEFINE_MUTEX(pmc_grab_mutex);
 
@@ -877,7 +897,7 @@ static int sparc_check_constraints(struct perf_event **evts,
        if (!n_ev)
                return 0;
 
-       if (n_ev > perf_max_events)
+       if (n_ev > MAX_HWEVENTS)
                return -1;
 
        msk0 = perf_event_get_msk(events[0]);
@@ -984,23 +1004,27 @@ static int collect_events(struct perf_event *group, int max_count,
        return n;
 }
 
-static int sparc_pmu_enable(struct perf_event *event)
+static int sparc_pmu_add(struct perf_event *event, int ef_flags)
 {
        struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
        int n0, ret = -EAGAIN;
        unsigned long flags;
 
        local_irq_save(flags);
-       perf_disable();
+       perf_pmu_disable(event->pmu);
 
        n0 = cpuc->n_events;
-       if (n0 >= perf_max_events)
+       if (n0 >= MAX_HWEVENTS)
                goto out;
 
        cpuc->event[n0] = event;
        cpuc->events[n0] = event->hw.event_base;
        cpuc->current_idx[n0] = PIC_NO_INDEX;
 
+       event->hw.state = PERF_HES_UPTODATE;
+       if (!(ef_flags & PERF_EF_START))
+               event->hw.state |= PERF_HES_STOPPED;
+
        /*
         * If group events scheduling transaction was started,
         * skip the schedulability test here, it will be peformed
@@ -1020,12 +1044,12 @@ nocheck:
 
        ret = 0;
 out:
-       perf_enable();
+       perf_pmu_enable(event->pmu);
        local_irq_restore(flags);
        return ret;
 }
 
-static int __hw_perf_event_init(struct perf_event *event)
+static int sparc_pmu_event_init(struct perf_event *event)
 {
        struct perf_event_attr *attr = &event->attr;
        struct perf_event *evts[MAX_HWEVENTS];
@@ -1038,22 +1062,33 @@ static int __hw_perf_event_init(struct perf_event *event)
        if (atomic_read(&nmi_active) < 0)
                return -ENODEV;
 
-       pmap = NULL;
-       if (attr->type == PERF_TYPE_HARDWARE) {
+       switch (attr->type) {
+       case PERF_TYPE_HARDWARE:
                if (attr->config >= sparc_pmu->max_events)
                        return -EINVAL;
                pmap = sparc_pmu->event_map(attr->config);
-       } else if (attr->type == PERF_TYPE_HW_CACHE) {
+               break;
+
+       case PERF_TYPE_HW_CACHE:
                pmap = sparc_map_cache_event(attr->config);
                if (IS_ERR(pmap))
                        return PTR_ERR(pmap);
-       } else if (attr->type != PERF_TYPE_RAW)
-               return -EOPNOTSUPP;
+               break;
+
+       case PERF_TYPE_RAW:
+               pmap = NULL;
+               break;
+
+       default:
+               return -ENOENT;
+
+       }
 
        if (pmap) {
                hwc->event_base = perf_event_encode(pmap);
        } else {
-               /* User gives us "(encoding << 16) | pic_mask" for
+               /*
+                * User gives us "(encoding << 16) | pic_mask" for
                 * PERF_TYPE_RAW events.
                 */
                hwc->event_base = attr->config;
@@ -1071,7 +1106,7 @@ static int __hw_perf_event_init(struct perf_event *event)
        n = 0;
        if (event->group_leader != event) {
                n = collect_events(event->group_leader,
-                                  perf_max_events - 1,
+                                  MAX_HWEVENTS - 1,
                                   evts, events, current_idx_dmy);
                if (n < 0)
                        return -EINVAL;
@@ -1107,10 +1142,11 @@ static int __hw_perf_event_init(struct perf_event *event)
  * Set the flag to make pmu::enable() not perform the
  * schedulability test, it will be performed at commit time
  */
-static void sparc_pmu_start_txn(const struct pmu *pmu)
+static void sparc_pmu_start_txn(struct pmu *pmu)
 {
        struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
 
+       perf_pmu_disable(pmu);
        cpuhw->group_flag |= PERF_EVENT_TXN;
 }
 
@@ -1119,11 +1155,12 @@ static void sparc_pmu_start_txn(const struct pmu *pmu)
  * Clear the flag and pmu::enable() will perform the
  * schedulability test.
  */
-static void sparc_pmu_cancel_txn(const struct pmu *pmu)
+static void sparc_pmu_cancel_txn(struct pmu *pmu)
 {
        struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
 
        cpuhw->group_flag &= ~PERF_EVENT_TXN;
+       perf_pmu_enable(pmu);
 }
 
 /*
@@ -1131,7 +1168,7 @@ static void sparc_pmu_cancel_txn(const struct pmu *pmu)
  * Perform the group schedulability test as a whole
  * Return 0 if success
  */
-static int sparc_pmu_commit_txn(const struct pmu *pmu)
+static int sparc_pmu_commit_txn(struct pmu *pmu)
 {
        struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
        int n;
@@ -1147,28 +1184,24 @@ static int sparc_pmu_commit_txn(const struct pmu *pmu)
                return -EAGAIN;
 
        cpuc->group_flag &= ~PERF_EVENT_TXN;
+       perf_pmu_enable(pmu);
        return 0;
 }
 
-static const struct pmu pmu = {
-       .enable         = sparc_pmu_enable,
-       .disable        = sparc_pmu_disable,
+static struct pmu pmu = {
+       .pmu_enable     = sparc_pmu_enable,
+       .pmu_disable    = sparc_pmu_disable,
+       .event_init     = sparc_pmu_event_init,
+       .add            = sparc_pmu_add,
+       .del            = sparc_pmu_del,
+       .start          = sparc_pmu_start,
+       .stop           = sparc_pmu_stop,
        .read           = sparc_pmu_read,
-       .unthrottle     = sparc_pmu_unthrottle,
        .start_txn      = sparc_pmu_start_txn,
        .cancel_txn     = sparc_pmu_cancel_txn,
        .commit_txn     = sparc_pmu_commit_txn,
 };
 
-const struct pmu *hw_perf_event_init(struct perf_event *event)
-{
-       int err = __hw_perf_event_init(event);
-
-       if (err)
-               return ERR_PTR(err);
-       return &pmu;
-}
-
 void perf_event_print_debug(void)
 {
        unsigned long flags;
@@ -1244,7 +1277,7 @@ static int __kprobes perf_event_nmi_handler(struct notifier_block *self,
                        continue;
 
                if (perf_event_overflow(event, 1, &data, regs))
-                       sparc_pmu_disable_event(cpuc, hwc, idx);
+                       sparc_pmu_stop(event, 0);
        }
 
        return NOTIFY_STOP;
@@ -1285,28 +1318,21 @@ void __init init_hw_perf_events(void)
 
        pr_cont("Supported PMU type is '%s'\n", sparc_pmu_type);
 
-       /* All sparc64 PMUs currently have 2 events.  */
-       perf_max_events = 2;
-
+       perf_pmu_register(&pmu);
        register_die_notifier(&perf_event_nmi_notifier);
 }
 
-static inline void callchain_store(struct perf_callchain_entry *entry, u64 ip)
-{
-       if (entry->nr < PERF_MAX_STACK_DEPTH)
-               entry->ip[entry->nr++] = ip;
-}
-
-static void perf_callchain_kernel(struct pt_regs *regs,
-                                 struct perf_callchain_entry *entry)
+void perf_callchain_kernel(struct perf_callchain_entry *entry,
+                          struct pt_regs *regs)
 {
        unsigned long ksp, fp;
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
        int graph = 0;
 #endif
 
-       callchain_store(entry, PERF_CONTEXT_KERNEL);
-       callchain_store(entry, regs->tpc);
+       stack_trace_flush();
+
+       perf_callchain_store(entry, regs->tpc);
 
        ksp = regs->u_regs[UREG_I6];
        fp = ksp + STACK_BIAS;
@@ -1330,13 +1356,13 @@ static void perf_callchain_kernel(struct pt_regs *regs,
                        pc = sf->callers_pc;
                        fp = (unsigned long)sf->fp + STACK_BIAS;
                }
-               callchain_store(entry, pc);
+               perf_callchain_store(entry, pc);
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
                if ((pc + 8UL) == (unsigned long) &return_to_handler) {
                        int index = current->curr_ret_stack;
                        if (current->ret_stack && index >= graph) {
                                pc = current->ret_stack[index - graph].ret;
-                               callchain_store(entry, pc);
+                               perf_callchain_store(entry, pc);
                                graph++;
                        }
                }
@@ -1344,13 +1370,12 @@ static void perf_callchain_kernel(struct pt_regs *regs,
        } while (entry->nr < PERF_MAX_STACK_DEPTH);
 }
 
-static void perf_callchain_user_64(struct pt_regs *regs,
-                                  struct perf_callchain_entry *entry)
+static void perf_callchain_user_64(struct perf_callchain_entry *entry,
+                                  struct pt_regs *regs)
 {
        unsigned long ufp;
 
-       callchain_store(entry, PERF_CONTEXT_USER);
-       callchain_store(entry, regs->tpc);
+       perf_callchain_store(entry, regs->tpc);
 
        ufp = regs->u_regs[UREG_I6] + STACK_BIAS;
        do {
@@ -1363,17 +1388,16 @@ static void perf_callchain_user_64(struct pt_regs *regs,
 
                pc = sf.callers_pc;
                ufp = (unsigned long)sf.fp + STACK_BIAS;
-               callchain_store(entry, pc);
+               perf_callchain_store(entry, pc);
        } while (entry->nr < PERF_MAX_STACK_DEPTH);
 }
 
-static void perf_callchain_user_32(struct pt_regs *regs,
-                                  struct perf_callchain_entry *entry)
+static void perf_callchain_user_32(struct perf_callchain_entry *entry,
+                                  struct pt_regs *regs)
 {
        unsigned long ufp;
 
-       callchain_store(entry, PERF_CONTEXT_USER);
-       callchain_store(entry, regs->tpc);
+       perf_callchain_store(entry, regs->tpc);
 
        ufp = regs->u_regs[UREG_I6] & 0xffffffffUL;
        do {
@@ -1386,34 +1410,16 @@ static void perf_callchain_user_32(struct pt_regs *regs,
 
                pc = sf.callers_pc;
                ufp = (unsigned long)sf.fp;
-               callchain_store(entry, pc);
+               perf_callchain_store(entry, pc);
        } while (entry->nr < PERF_MAX_STACK_DEPTH);
 }
 
-/* Like powerpc we can't get PMU interrupts within the PMU handler,
- * so no need for separate NMI and IRQ chains as on x86.
- */
-static DEFINE_PER_CPU(struct perf_callchain_entry, callchain);
-
-struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
+void
+perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
 {
-       struct perf_callchain_entry *entry = &__get_cpu_var(callchain);
-
-       entry->nr = 0;
-       if (!user_mode(regs)) {
-               stack_trace_flush();
-               perf_callchain_kernel(regs, entry);
-               if (current->mm)
-                       regs = task_pt_regs(current);
-               else
-                       regs = NULL;
-       }
-       if (regs) {
-               flushw_user();
-               if (test_thread_flag(TIF_32BIT))
-                       perf_callchain_user_32(regs, entry);
-               else
-                       perf_callchain_user_64(regs, entry);
-       }
-       return entry;
+       flushw_user();
+       if (test_thread_flag(TIF_32BIT))
+               perf_callchain_user_32(entry, regs);
+       else
+               perf_callchain_user_64(entry, regs);
 }
index cea0cd9..fd227d6 100644 (file)
@@ -25,6 +25,7 @@ config X86
        select HAVE_IDE
        select HAVE_OPROFILE
        select HAVE_PERF_EVENTS if (!M386 && !M486)
+       select HAVE_IRQ_WORK
        select HAVE_IOREMAP_PROT
        select HAVE_KPROBES
        select ARCH_WANT_OPTIONAL_GPIOLIB
@@ -33,6 +34,7 @@ config X86
        select HAVE_KRETPROBES
        select HAVE_OPTPROBES
        select HAVE_FTRACE_MCOUNT_RECORD
+       select HAVE_C_RECORDMCOUNT
        select HAVE_DYNAMIC_FTRACE
        select HAVE_FUNCTION_TRACER
        select HAVE_FUNCTION_GRAPH_TRACER
@@ -59,6 +61,8 @@ config X86
        select ANON_INODES
        select HAVE_ARCH_KMEMCHECK
        select HAVE_USER_RETURN_NOTIFIER
+       select HAVE_ARCH_JUMP_LABEL
+       select HAVE_TEXT_POKE_SMP
 
 config INSTRUCTION_DECODER
        def_bool (KPROBES || PERF_EVENTS)
@@ -2125,6 +2129,10 @@ config HAVE_ATOMIC_IOMAP
        def_bool y
        depends on X86_32
 
+config HAVE_TEXT_POKE_SMP
+       bool
+       select STOP_MACHINE if SMP
+
 source "net/Kconfig"
 
 source "drivers/Kconfig"
index bc6abb7..76561d2 100644 (file)
@@ -4,6 +4,7 @@
 #include <linux/types.h>
 #include <linux/stddef.h>
 #include <linux/stringify.h>
+#include <linux/jump_label.h>
 #include <asm/asm.h>
 
 /*
@@ -160,6 +161,8 @@ static inline void apply_paravirt(struct paravirt_patch_site *start,
 #define __parainstructions_end NULL
 #endif
 
+extern void *text_poke_early(void *addr, const void *opcode, size_t len);
+
 /*
  * Clear and restore the kernel write-protection flag on the local CPU.
  * Allows the kernel to edit read-only pages.
@@ -180,4 +183,12 @@ static inline void apply_paravirt(struct paravirt_patch_site *start,
 extern void *text_poke(void *addr, const void *opcode, size_t len);
 extern void *text_poke_smp(void *addr, const void *opcode, size_t len);
 
+#if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL)
+#define IDEAL_NOP_SIZE_5 5
+extern unsigned char ideal_nop5[IDEAL_NOP_SIZE_5];
+extern void arch_init_ideal_nop5(void);
+#else
+static inline void arch_init_ideal_nop5(void) {}
+#endif
+
 #endif /* _ASM_X86_ALTERNATIVE_H */
index 8e8ec66..b8e96a1 100644 (file)
@@ -49,8 +49,8 @@ BUILD_INTERRUPT(apic_timer_interrupt,LOCAL_TIMER_VECTOR)
 BUILD_INTERRUPT(error_interrupt,ERROR_APIC_VECTOR)
 BUILD_INTERRUPT(spurious_interrupt,SPURIOUS_APIC_VECTOR)
 
-#ifdef CONFIG_PERF_EVENTS
-BUILD_INTERRUPT(perf_pending_interrupt, LOCAL_PENDING_VECTOR)
+#ifdef CONFIG_IRQ_WORK
+BUILD_INTERRUPT(irq_work_interrupt, IRQ_WORK_VECTOR)
 #endif
 
 #ifdef CONFIG_X86_THERMAL_VECTOR
index aeab29a..55e4de6 100644 (file)
@@ -14,7 +14,7 @@ typedef struct {
 #endif
        unsigned int x86_platform_ipis; /* arch dependent */
        unsigned int apic_perf_irqs;
-       unsigned int apic_pending_irqs;
+       unsigned int apic_irq_work_irqs;
 #ifdef CONFIG_SMP
        unsigned int irq_resched_count;
        unsigned int irq_call_count;
index 46c0fe0..3a54a1c 100644 (file)
@@ -29,7 +29,7 @@
 extern void apic_timer_interrupt(void);
 extern void x86_platform_ipi(void);
 extern void error_interrupt(void);
-extern void perf_pending_interrupt(void);
+extern void irq_work_interrupt(void);
 
 extern void spurious_interrupt(void);
 extern void thermal_interrupt(void);
index e2ca300..6af0894 100644 (file)
 #define X86_PLATFORM_IPI_VECTOR                0xed
 
 /*
- * Performance monitoring pending work vector:
+ * IRQ work vector:
  */
-#define LOCAL_PENDING_VECTOR           0xec
+#define IRQ_WORK_VECTOR                        0xec
 
 #define UV_BAU_MESSAGE                 0xea
 
diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h
new file mode 100644 (file)
index 0000000..f52d42e
--- /dev/null
@@ -0,0 +1,37 @@
+#ifndef _ASM_X86_JUMP_LABEL_H
+#define _ASM_X86_JUMP_LABEL_H
+
+#ifdef __KERNEL__
+
+#include <linux/types.h>
+#include <asm/nops.h>
+
+#define JUMP_LABEL_NOP_SIZE 5
+
+# define JUMP_LABEL_INITIAL_NOP ".byte 0xe9 \n\t .long 0\n\t"
+
+# define JUMP_LABEL(key, label)                                        \
+       do {                                                    \
+               asm goto("1:"                                   \
+                       JUMP_LABEL_INITIAL_NOP                  \
+                       ".pushsection __jump_table,  \"a\" \n\t"\
+                       _ASM_PTR "1b, %l[" #label "], %c0 \n\t" \
+                       ".popsection \n\t"                      \
+                       : :  "i" (key) :  : label);             \
+       } while (0)
+
+#endif /* __KERNEL__ */
+
+#ifdef CONFIG_X86_64
+typedef u64 jump_label_t;
+#else
+typedef u32 jump_label_t;
+#endif
+
+struct jump_entry {
+       jump_label_t code;
+       jump_label_t target;
+       jump_label_t key;
+};
+
+#endif
index def5007..a70cd21 100644 (file)
 #define P4_ESCR_EMASK(v)       ((v) << P4_ESCR_EVENTMASK_SHIFT)
 #define P4_ESCR_TAG(v)         ((v) << P4_ESCR_TAG_SHIFT)
 
-/* Non HT mask */
-#define P4_ESCR_MASK                   \
-       (P4_ESCR_EVENT_MASK     |       \
-       P4_ESCR_EVENTMASK_MASK  |       \
-       P4_ESCR_TAG_MASK        |       \
-       P4_ESCR_TAG_ENABLE      |       \
-       P4_ESCR_T0_OS           |       \
-       P4_ESCR_T0_USR)
-
-/* HT mask */
-#define P4_ESCR_MASK_HT                        \
-       (P4_ESCR_MASK | P4_ESCR_T1_OS | P4_ESCR_T1_USR)
-
 #define P4_CCCR_OVF                    0x80000000U
 #define P4_CCCR_CASCADE                        0x40000000U
 #define P4_CCCR_OVF_PMI_T0             0x04000000U
 #define P4_CCCR_THRESHOLD(v)           ((v) << P4_CCCR_THRESHOLD_SHIFT)
 #define P4_CCCR_ESEL(v)                        ((v) << P4_CCCR_ESCR_SELECT_SHIFT)
 
-/* Non HT mask */
-#define P4_CCCR_MASK                           \
-       (P4_CCCR_OVF                    |       \
-       P4_CCCR_CASCADE                 |       \
-       P4_CCCR_OVF_PMI_T0              |       \
-       P4_CCCR_FORCE_OVF               |       \
-       P4_CCCR_EDGE                    |       \
-       P4_CCCR_THRESHOLD_MASK          |       \
-       P4_CCCR_COMPLEMENT              |       \
-       P4_CCCR_COMPARE                 |       \
-       P4_CCCR_ESCR_SELECT_MASK        |       \
-       P4_CCCR_ENABLE)
-
-/* HT mask */
-#define P4_CCCR_MASK_HT                                \
-       (P4_CCCR_MASK | P4_CCCR_OVF_PMI_T1 | P4_CCCR_THREAD_ANY)
-
 #define P4_GEN_ESCR_EMASK(class, name, bit)    \
        class##__##name = ((1 << bit) << P4_ESCR_EVENTMASK_SHIFT)
 #define P4_ESCR_EMASK_BIT(class, name)         class##__##name
 #define P4_CONFIG_HT_SHIFT             63
 #define P4_CONFIG_HT                   (1ULL << P4_CONFIG_HT_SHIFT)
 
+/*
+ * The bits we allow to pass for RAW events
+ */
+#define P4_CONFIG_MASK_ESCR            \
+       P4_ESCR_EVENT_MASK      |       \
+       P4_ESCR_EVENTMASK_MASK  |       \
+       P4_ESCR_TAG_MASK        |       \
+       P4_ESCR_TAG_ENABLE
+
+#define P4_CONFIG_MASK_CCCR            \
+       P4_CCCR_EDGE            |       \
+       P4_CCCR_THRESHOLD_MASK  |       \
+       P4_CCCR_COMPLEMENT      |       \
+       P4_CCCR_COMPARE         |       \
+       P4_CCCR_THREAD_ANY      |       \
+       P4_CCCR_RESERVED
+
+/* some dangerous bits are reserved for kernel internals */
+#define P4_CONFIG_MASK                                   \
+       (p4_config_pack_escr(P4_CONFIG_MASK_ESCR))      | \
+       (p4_config_pack_cccr(P4_CONFIG_MASK_CCCR))
+
 static inline bool p4_is_event_cascaded(u64 config)
 {
        u32 cccr = p4_config_unpack_cccr(config);
index fedf32a..7490bf8 100644 (file)
@@ -34,7 +34,8 @@ GCOV_PROFILE_paravirt.o               := n
 obj-y                  := process_$(BITS).o signal.o entry_$(BITS).o
 obj-y                  += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o
 obj-y                  += time.o ioport.o ldt.o dumpstack.o
-obj-y                  += setup.o x86_init.o i8259.o irqinit.o
+obj-y                  += setup.o x86_init.o i8259.o irqinit.o jump_label.o
+obj-$(CONFIG_IRQ_WORK)  += irq_work.o
 obj-$(CONFIG_X86_VISWS)        += visws_quirks.o
 obj-$(CONFIG_X86_32)   += probe_roms_32.o
 obj-$(CONFIG_X86_32)   += sys_i386_32.o i386_ksyms_32.o
index f65ab8b..a36bb90 100644 (file)
@@ -195,7 +195,7 @@ static void __init_or_module add_nops(void *insns, unsigned int len)
 
 extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
 extern s32 __smp_locks[], __smp_locks_end[];
-static void *text_poke_early(void *addr, const void *opcode, size_t len);
+void *text_poke_early(void *addr, const void *opcode, size_t len);
 
 /* Replace instructions with better alternatives for this CPU type.
    This runs before SMP is initialized to avoid SMP problems with
@@ -522,7 +522,7 @@ void __init alternative_instructions(void)
  * instructions. And on the local CPU you need to be protected again NMI or MCE
  * handlers seeing an inconsistent instruction while you patch.
  */
-static void *__init_or_module text_poke_early(void *addr, const void *opcode,
+void *__init_or_module text_poke_early(void *addr, const void *opcode,
                                              size_t len)
 {
        unsigned long flags;
@@ -637,7 +637,72 @@ void *__kprobes text_poke_smp(void *addr, const void *opcode, size_t len)
        tpp.len = len;
        atomic_set(&stop_machine_first, 1);
        wrote_text = 0;
-       stop_machine(stop_machine_text_poke, (void *)&tpp, NULL);
+       /* Use __stop_machine() because the caller already got online_cpus. */
+       __stop_machine(stop_machine_text_poke, (void *)&tpp, NULL);
        return addr;
 }
 
+#if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL)
+
+unsigned char ideal_nop5[IDEAL_NOP_SIZE_5];
+
+void __init arch_init_ideal_nop5(void)
+{
+       extern const unsigned char ftrace_test_p6nop[];
+       extern const unsigned char ftrace_test_nop5[];
+       extern const unsigned char ftrace_test_jmp[];
+       int faulted = 0;
+
+       /*
+        * There is no good nop for all x86 archs.
+        * We will default to using the P6_NOP5, but first we
+        * will test to make sure that the nop will actually
+        * work on this CPU. If it faults, we will then
+        * go to a lesser efficient 5 byte nop. If that fails
+        * we then just use a jmp as our nop. This isn't the most
+        * efficient nop, but we can not use a multi part nop
+        * since we would then risk being preempted in the middle
+        * of that nop, and if we enabled tracing then, it might
+        * cause a system crash.
+        *
+        * TODO: check the cpuid to determine the best nop.
+        */
+       asm volatile (
+               "ftrace_test_jmp:"
+               "jmp ftrace_test_p6nop\n"
+               "nop\n"
+               "nop\n"
+               "nop\n"  /* 2 byte jmp + 3 bytes */
+               "ftrace_test_p6nop:"
+               P6_NOP5
+               "jmp 1f\n"
+               "ftrace_test_nop5:"
+               ".byte 0x66,0x66,0x66,0x66,0x90\n"
+               "1:"
+               ".section .fixup, \"ax\"\n"
+               "2:     movl $1, %0\n"
+               "       jmp ftrace_test_nop5\n"
+               "3:     movl $2, %0\n"
+               "       jmp 1b\n"
+               ".previous\n"
+               _ASM_EXTABLE(ftrace_test_p6nop, 2b)
+               _ASM_EXTABLE(ftrace_test_nop5, 3b)
+               : "=r"(faulted) : "0" (faulted));
+
+       switch (faulted) {
+       case 0:
+               pr_info("converting mcount calls to 0f 1f 44 00 00\n");
+               memcpy(ideal_nop5, ftrace_test_p6nop, IDEAL_NOP_SIZE_5);
+               break;
+       case 1:
+               pr_info("converting mcount calls to 66 66 66 66 90\n");
+               memcpy(ideal_nop5, ftrace_test_nop5, IDEAL_NOP_SIZE_5);
+               break;
+       case 2:
+               pr_info("converting mcount calls to jmp . + 5\n");
+               memcpy(ideal_nop5, ftrace_test_jmp, IDEAL_NOP_SIZE_5);
+               break;
+       }
+
+}
+#endif
index 03a5b03..fe73c18 100644 (file)
@@ -531,7 +531,7 @@ static int x86_pmu_hw_config(struct perf_event *event)
 /*
  * Setup the hardware configuration for a given attr_type
  */
-static int __hw_perf_event_init(struct perf_event *event)
+static int __x86_pmu_event_init(struct perf_event *event)
 {
        int err;
 
@@ -584,7 +584,7 @@ static void x86_pmu_disable_all(void)
        }
 }
 
-void hw_perf_disable(void)
+static void x86_pmu_disable(struct pmu *pmu)
 {
        struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 
@@ -619,7 +619,7 @@ static void x86_pmu_enable_all(int added)
        }
 }
 
-static const struct pmu pmu;
+static struct pmu pmu;
 
 static inline int is_x86_event(struct perf_event *event)
 {
@@ -801,10 +801,10 @@ static inline int match_prev_assignment(struct hw_perf_event *hwc,
                hwc->last_tag == cpuc->tags[i];
 }
 
-static int x86_pmu_start(struct perf_event *event);
-static void x86_pmu_stop(struct perf_event *event);
+static void x86_pmu_start(struct perf_event *event, int flags);
+static void x86_pmu_stop(struct perf_event *event, int flags);
 
-void hw_perf_enable(void)
+static void x86_pmu_enable(struct pmu *pmu)
 {
        struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
        struct perf_event *event;
@@ -840,7 +840,14 @@ void hw_perf_enable(void)
                            match_prev_assignment(hwc, cpuc, i))
                                continue;
 
-                       x86_pmu_stop(event);
+                       /*
+                        * Ensure we don't accidentally enable a stopped
+                        * counter simply because we rescheduled.
+                        */
+                       if (hwc->state & PERF_HES_STOPPED)
+                               hwc->state |= PERF_HES_ARCH;
+
+                       x86_pmu_stop(event, PERF_EF_UPDATE);
                }
 
                for (i = 0; i < cpuc->n_events; i++) {
@@ -852,7 +859,10 @@ void hw_perf_enable(void)
                        else if (i < n_running)
                                continue;
 
-                       x86_pmu_start(event);
+                       if (hwc->state & PERF_HES_ARCH)
+                               continue;
+
+                       x86_pmu_start(event, PERF_EF_RELOAD);
                }
                cpuc->n_added = 0;
                perf_events_lapic_init();
@@ -953,15 +963,12 @@ static void x86_pmu_enable_event(struct perf_event *event)
 }
 
 /*
- * activate a single event
+ * Add a single event to the PMU.
  *
  * The event is added to the group of enabled events
  * but only if it can be scehduled with existing events.
- *
- * Called with PMU disabled. If successful and return value 1,
- * then guaranteed to call perf_enable() and hw_perf_enable()
  */
-static int x86_pmu_enable(struct perf_event *event)
+static int x86_pmu_add(struct perf_event *event, int flags)
 {
        struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
        struct hw_perf_event *hwc;
@@ -970,58 +977,67 @@ static int x86_pmu_enable(struct perf_event *event)
 
        hwc = &event->hw;
 
+       perf_pmu_disable(event->pmu);
        n0 = cpuc->n_events;
-       n = collect_events(cpuc, event, false);
-       if (n < 0)
-               return n;
+       ret = n = collect_events(cpuc, event, false);
+       if (ret < 0)
+               goto out;
+
+       hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
+       if (!(flags & PERF_EF_START))
+               hwc->state |= PERF_HES_ARCH;
 
        /*
         * If group events scheduling transaction was started,
         * skip the schedulability test here, it will be peformed
-        * at commit time(->commit_txn) as a whole
+        * at commit time (->commit_txn) as a whole
         */
        if (cpuc->group_flag & PERF_EVENT_TXN)
-               goto out;
+               goto done_collect;
 
        ret = x86_pmu.schedule_events(cpuc, n, assign);
        if (ret)
-               return ret;
+               goto out;
        /*
         * copy new assignment, now we know it is possible
         * will be used by hw_perf_enable()
         */
        memcpy(cpuc->assign, assign, n*sizeof(int));
 
-out:
+done_collect:
        cpuc->n_events = n;
        cpuc->n_added += n - n0;
        cpuc->n_txn += n - n0;
 
-       return 0;
+       ret = 0;
+out:
+       perf_pmu_enable(event->pmu);
+       return ret;
 }
 
-static int x86_pmu_start(struct perf_event *event)
+static void x86_pmu_start(struct perf_event *event, int flags)
 {
        struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
        int idx = event->hw.idx;
 
-       if (idx == -1)
-               return -EAGAIN;
+       if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
+               return;
+
+       if (WARN_ON_ONCE(idx == -1))
+               return;
+
+       if (flags & PERF_EF_RELOAD) {
+               WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
+               x86_perf_event_set_period(event);
+       }
+
+       event->hw.state = 0;
 
-       x86_perf_event_set_period(event);
        cpuc->events[idx] = event;
        __set_bit(idx, cpuc->active_mask);
        __set_bit(idx, cpuc->running);
        x86_pmu.enable(event);
        perf_event_update_userpage(event);
-
-       return 0;
-}
-
-static void x86_pmu_unthrottle(struct perf_event *event)
-{
-       int ret = x86_pmu_start(event);
-       WARN_ON_ONCE(ret);
 }
 
 void perf_event_print_debug(void)
@@ -1078,27 +1094,29 @@ void perf_event_print_debug(void)
        local_irq_restore(flags);
 }
 
-static void x86_pmu_stop(struct perf_event *event)
+static void x86_pmu_stop(struct perf_event *event, int flags)
 {
        struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
        struct hw_perf_event *hwc = &event->hw;
-       int idx = hwc->idx;
 
-       if (!__test_and_clear_bit(idx, cpuc->active_mask))
-               return;
-
-       x86_pmu.disable(event);
-
-       /*
-        * Drain the remaining delta count out of a event
-        * that we are disabling:
-        */
-       x86_perf_event_update(event);
+       if (__test_and_clear_bit(hwc->idx, cpuc->active_mask)) {
+               x86_pmu.disable(event);
+               cpuc->events[hwc->idx] = NULL;
+               WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
+               hwc->state |= PERF_HES_STOPPED;
+       }
 
-       cpuc->events[idx] = NULL;
+       if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
+               /*
+                * Drain the remaining delta count out of a event
+                * that we are disabling:
+                */
+               x86_perf_event_update(event);
+               hwc->state |= PERF_HES_UPTODATE;
+       }
 }
 
-static void x86_pmu_disable(struct perf_event *event)
+static void x86_pmu_del(struct perf_event *event, int flags)
 {
        struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
        int i;
@@ -1111,7 +1129,7 @@ static void x86_pmu_disable(struct perf_event *event)
        if (cpuc->group_flag & PERF_EVENT_TXN)
                return;
 
-       x86_pmu_stop(event);
+       x86_pmu_stop(event, PERF_EF_UPDATE);
 
        for (i = 0; i < cpuc->n_events; i++) {
                if (event == cpuc->event_list[i]) {
@@ -1134,7 +1152,6 @@ static int x86_pmu_handle_irq(struct pt_regs *regs)
        struct perf_sample_data data;
        struct cpu_hw_events *cpuc;
        struct perf_event *event;
-       struct hw_perf_event *hwc;
        int idx, handled = 0;
        u64 val;
 
@@ -1155,7 +1172,6 @@ static int x86_pmu_handle_irq(struct pt_regs *regs)
                }
 
                event = cpuc->events[idx];
-               hwc = &event->hw;
 
                val = x86_perf_event_update(event);
                if (val & (1ULL << (x86_pmu.cntval_bits - 1)))
@@ -1171,7 +1187,7 @@ static int x86_pmu_handle_irq(struct pt_regs *regs)
                        continue;
 
                if (perf_event_overflow(event, 1, &data, regs))
-                       x86_pmu_stop(event);
+                       x86_pmu_stop(event, 0);
        }
 
        if (handled)
@@ -1180,25 +1196,6 @@ static int x86_pmu_handle_irq(struct pt_regs *regs)
        return handled;
 }
 
-void smp_perf_pending_interrupt(struct pt_regs *regs)
-{
-       irq_enter();
-       ack_APIC_irq();
-       inc_irq_stat(apic_pending_irqs);
-       perf_event_do_pending();
-       irq_exit();
-}
-
-void set_perf_event_pending(void)
-{
-#ifdef CONFIG_X86_LOCAL_APIC
-       if (!x86_pmu.apic || !x86_pmu_initialized())
-               return;
-
-       apic->send_IPI_self(LOCAL_PENDING_VECTOR);
-#endif
-}
-
 void perf_events_lapic_init(void)
 {
        if (!x86_pmu.apic || !x86_pmu_initialized())
@@ -1388,7 +1385,6 @@ void __init init_hw_perf_events(void)
                x86_pmu.num_counters = X86_PMC_MAX_GENERIC;
        }
        x86_pmu.intel_ctrl = (1 << x86_pmu.num_counters) - 1;
-       perf_max_events = x86_pmu.num_counters;
 
        if (x86_pmu.num_counters_fixed > X86_PMC_MAX_FIXED) {
                WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!",
@@ -1424,6 +1420,7 @@ void __init init_hw_perf_events(void)
        pr_info("... fixed-purpose events:   %d\n",     x86_pmu.num_counters_fixed);
        pr_info("... event mask:             %016Lx\n", x86_pmu.intel_ctrl);
 
+       perf_pmu_register(&pmu);
        perf_cpu_notifier(x86_pmu_notifier);
 }
 
@@ -1437,10 +1434,11 @@ static inline void x86_pmu_read(struct perf_event *event)
  * Set the flag to make pmu::enable() not perform the
  * schedulability test, it will be performed at commit time
  */
-static void x86_pmu_start_txn(const struct pmu *pmu)
+static void x86_pmu_start_txn(struct pmu *pmu)
 {
        struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 
+       perf_pmu_disable(pmu);
        cpuc->group_flag |= PERF_EVENT_TXN;
        cpuc->n_txn = 0;
 }
@@ -1450,7 +1448,7 @@ static void x86_pmu_start_txn(const struct pmu *pmu)
  * Clear the flag and pmu::enable() will perform the
  * schedulability test.
  */
-static void x86_pmu_cancel_txn(const struct pmu *pmu)
+static void x86_pmu_cancel_txn(struct pmu *pmu)
 {
        struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 
@@ -1460,6 +1458,7 @@ static void x86_pmu_cancel_txn(const struct pmu *pmu)
         */
        cpuc->n_added -= cpuc->n_txn;
        cpuc->n_events -= cpuc->n_txn;
+       perf_pmu_enable(pmu);
 }
 
 /*
@@ -1467,7 +1466,7 @@ static void x86_pmu_cancel_txn(const struct pmu *pmu)
  * Perform the group schedulability test as a whole
  * Return 0 if success
  */
-static int x86_pmu_commit_txn(const struct pmu *pmu)
+static int x86_pmu_commit_txn(struct pmu *pmu)
 {
        struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
        int assign[X86_PMC_IDX_MAX];
@@ -1489,22 +1488,10 @@ static int x86_pmu_commit_txn(const struct pmu *pmu)
        memcpy(cpuc->assign, assign, n*sizeof(int));
 
        cpuc->group_flag &= ~PERF_EVENT_TXN;
-
+       perf_pmu_enable(pmu);
        return 0;
 }
 
-static const struct pmu pmu = {
-       .enable         = x86_pmu_enable,
-       .disable        = x86_pmu_disable,
-       .start          = x86_pmu_start,
-       .stop           = x86_pmu_stop,
-       .read           = x86_pmu_read,
-       .unthrottle     = x86_pmu_unthrottle,
-       .start_txn      = x86_pmu_start_txn,
-       .cancel_txn     = x86_pmu_cancel_txn,
-       .commit_txn     = x86_pmu_commit_txn,
-};
-
 /*
  * validate that we can schedule this event
  */
@@ -1579,12 +1566,22 @@ out:
        return ret;
 }
 
-const struct pmu *hw_perf_event_init(struct perf_event *event)
+int x86_pmu_event_init(struct perf_event *event)
 {
-       const struct pmu *tmp;
+       struct pmu *tmp;
        int err;
 
-       err = __hw_perf_event_init(event);
+       switch (event->attr.type) {
+       case PERF_TYPE_RAW:
+       case PERF_TYPE_HARDWARE:
+       case PERF_TYPE_HW_CACHE:
+               break;
+
+       default:
+               return -ENOENT;
+       }
+
+       err = __x86_pmu_event_init(event);
        if (!err) {
                /*
                 * we temporarily connect event to its pmu
@@ -1604,26 +1601,31 @@ const struct pmu *hw_perf_event_init(struct perf_event *event)
        if (err) {
                if (event->destroy)
                        event->destroy(event);
-               return ERR_PTR(err);
        }
 
-       return &pmu;
+       return err;
 }
 
-/*
- * callchain support
- */
+static struct pmu pmu = {
+       .pmu_enable     = x86_pmu_enable,
+       .pmu_disable    = x86_pmu_disable,
 
-static inline
-void callchain_store(struct perf_callchain_entry *entry, u64 ip)
-{
-       if (entry->nr < PERF_MAX_STACK_DEPTH)
-               entry->ip[entry->nr++] = ip;
-}
+       .event_init     = x86_pmu_event_init,
 
-static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry);
-static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_nmi_entry);
+       .add            = x86_pmu_add,
+       .del            = x86_pmu_del,
+       .start          = x86_pmu_start,
+       .stop           = x86_pmu_stop,
+       .read           = x86_pmu_read,
 
+       .start_txn      = x86_pmu_start_txn,
+       .cancel_txn     = x86_pmu_cancel_txn,
+       .commit_txn     = x86_pmu_commit_txn,
+};
+
+/*
+ * callchain support
+ */
 
 static void
 backtrace_warning_symbol(void *data, char *msg, unsigned long symbol)
@@ -1645,7 +1647,7 @@ static void backtrace_address(void *data, unsigned long addr, int reliable)
 {
        struct perf_callchain_entry *entry = data;
 
-       callchain_store(entry, addr);
+       perf_callchain_store(entry, addr);
 }
 
 static const struct stacktrace_ops backtrace_ops = {
@@ -1656,11 +1658,15 @@ static const struct stacktrace_ops backtrace_ops = {
        .walk_stack             = print_context_stack_bp,
 };
 
-static void
-perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry)
+void
+perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs)
 {
-       callchain_store(entry, PERF_CONTEXT_KERNEL);
-       callchain_store(entry, regs->ip);
+       if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
+               /* TODO: We don't support guest os callchain now */
+               return;
+       }
+
+       perf_callchain_store(entry, regs->ip);
 
        dump_trace(NULL, regs, NULL, regs->bp, &backtrace_ops, entry);
 }
@@ -1689,7 +1695,7 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry)
                if (fp < compat_ptr(regs->sp))
                        break;
 
-               callchain_store(entry, frame.return_address);
+               perf_callchain_store(entry, frame.return_address);
                fp = compat_ptr(frame.next_frame);
        }
        return 1;
@@ -1702,19 +1708,20 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry)
 }
 #endif
 
-static void
-perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry)
+void
+perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
 {
        struct stack_frame frame;
        const void __user *fp;
 
-       if (!user_mode(regs))
-               regs = task_pt_regs(current);
+       if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
+               /* TODO: We don't support guest os callchain now */
+               return;
+       }
 
        fp = (void __user *)regs->bp;
 
-       callchain_store(entry, PERF_CONTEXT_USER);
-       callchain_store(entry, regs->ip);
+       perf_callchain_store(entry, regs->ip);
 
        if (perf_callchain_user32(regs, entry))
                return;
@@ -1731,52 +1738,11 @@ perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry)
                if ((unsigned long)fp < regs->sp)
                        break;
 
-               callchain_store(entry, frame.return_address);
+               perf_callchain_store(entry, frame.return_address);
                fp = frame.next_frame;
        }
 }
 
-static void
-perf_do_callchain(struct pt_regs *regs, struct perf_callchain_entry *entry)
-{
-       int is_user;
-
-       if (!regs)
-               return;
-
-       is_user = user_mode(regs);
-
-       if (is_user && current->state != TASK_RUNNING)
-               return;
-
-       if (!is_user)
-               perf_callchain_kernel(regs, entry);
-
-       if (current->mm)
-               perf_callchain_user(regs, entry);
-}
-
-struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
-{
-       struct perf_callchain_entry *entry;
-
-       if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
-               /* TODO: We don't support guest os callchain now */
-               return NULL;
-       }
-
-       if (in_nmi())
-               entry = &__get_cpu_var(pmc_nmi_entry);
-       else
-               entry = &__get_cpu_var(pmc_irq_entry);
-
-       entry->nr = 0;
-
-       perf_do_callchain(regs, entry);
-
-       return entry;
-}
-
 unsigned long perf_instruction_pointer(struct pt_regs *regs)
 {
        unsigned long ip;
index c2897b7..46d5844 100644 (file)
@@ -52,7 +52,7 @@ static __initconst const u64 amd_hw_cache_event_ids
  [ C(DTLB) ] = {
        [ C(OP_READ) ] = {
                [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses        */
-               [ C(RESULT_MISS)   ] = 0x0046, /* L1 DTLB and L2 DLTB Miss   */
+               [ C(RESULT_MISS)   ] = 0x0746, /* L1_DTLB_AND_L2_DLTB_MISS.ALL */
        },
        [ C(OP_WRITE) ] = {
                [ C(RESULT_ACCESS) ] = 0,
@@ -66,7 +66,7 @@ static __initconst const u64 amd_hw_cache_event_ids
  [ C(ITLB) ] = {
        [ C(OP_READ) ] = {
                [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction fecthes        */
-               [ C(RESULT_MISS)   ] = 0x0085, /* Instr. fetch ITLB misses   */
+               [ C(RESULT_MISS)   ] = 0x0385, /* L1_ITLB_AND_L2_ITLB_MISS.ALL */
        },
        [ C(OP_WRITE) ] = {
                [ C(RESULT_ACCESS) ] = -1,
index ee05c90..c8f5c08 100644 (file)
@@ -713,18 +713,18 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
        struct cpu_hw_events *cpuc;
        int bit, loops;
        u64 status;
-       int handled = 0;
+       int handled;
 
        perf_sample_data_init(&data, 0);
 
        cpuc = &__get_cpu_var(cpu_hw_events);
 
        intel_pmu_disable_all();
-       intel_pmu_drain_bts_buffer();
+       handled = intel_pmu_drain_bts_buffer();
        status = intel_pmu_get_status();
        if (!status) {
                intel_pmu_enable_all(0);
-               return 0;
+               return handled;
        }
 
        loops = 0;
@@ -763,7 +763,7 @@ again:
                data.period = event->hw.last_period;
 
                if (perf_event_overflow(event, 1, &data, regs))
-                       x86_pmu_stop(event);
+                       x86_pmu_stop(event, 0);
        }
 
        /*
index 18018d1..4977f9c 100644 (file)
@@ -214,7 +214,7 @@ static void intel_pmu_disable_bts(void)
        update_debugctlmsr(debugctlmsr);
 }
 
-static void intel_pmu_drain_bts_buffer(void)
+static int intel_pmu_drain_bts_buffer(void)
 {
        struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
        struct debug_store *ds = cpuc->ds;
@@ -231,16 +231,16 @@ static void intel_pmu_drain_bts_buffer(void)
        struct pt_regs regs;
 
        if (!event)
-               return;
+               return 0;
 
        if (!ds)
-               return;
+               return 0;
 
        at  = (struct bts_record *)(unsigned long)ds->bts_buffer_base;
        top = (struct bts_record *)(unsigned long)ds->bts_index;
 
        if (top <= at)
-               return;
+               return 0;
 
        ds->bts_index = ds->bts_buffer_base;
 
@@ -256,7 +256,7 @@ static void intel_pmu_drain_bts_buffer(void)
        perf_prepare_sample(&header, &data, event, &regs);
 
        if (perf_output_begin(&handle, event, header.size * (top - at), 1, 1))
-               return;
+               return 1;
 
        for (; at < top; at++) {
                data.ip         = at->from;
@@ -270,6 +270,7 @@ static void intel_pmu_drain_bts_buffer(void)
        /* There's new data available. */
        event->hw.interrupts++;
        event->pending_kill = POLL_IN;
+       return 1;
 }
 
 /*
@@ -491,7 +492,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
                regs.flags &= ~PERF_EFLAGS_EXACT;
 
        if (perf_event_overflow(event, 1, &data, &regs))
-               x86_pmu_stop(event);
+               x86_pmu_stop(event, 0);
 }
 
 static void intel_pmu_drain_pebs_core(struct pt_regs *iregs)
index 2490151..81400b9 100644 (file)
@@ -18,6 +18,8 @@
 struct p4_event_bind {
        unsigned int opcode;                    /* Event code and ESCR selector */
        unsigned int escr_msr[2];               /* ESCR MSR for this event */
+       unsigned int escr_emask;                /* valid ESCR EventMask bits */
+       unsigned int shared;                    /* event is shared across threads */
        char cntr[2][P4_CNTR_LIMIT];            /* counter index (offset), -1 on abscence */
 };
 
@@ -66,231 +68,435 @@ static struct p4_event_bind p4_event_bind_map[] = {
        [P4_EVENT_TC_DELIVER_MODE] = {
                .opcode         = P4_OPCODE(P4_EVENT_TC_DELIVER_MODE),
                .escr_msr       = { MSR_P4_TC_ESCR0, MSR_P4_TC_ESCR1 },
+               .escr_emask     =
+                       P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, DD)                 |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, DB)                 |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, DI)                 |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, BD)                 |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, BB)                 |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, BI)                 |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, ID),
+               .shared         = 1,
                .cntr           = { {4, 5, -1}, {6, 7, -1} },
        },
        [P4_EVENT_BPU_FETCH_REQUEST] = {
                .opcode         = P4_OPCODE(P4_EVENT_BPU_FETCH_REQUEST),
                .escr_msr       = { MSR_P4_BPU_ESCR0, MSR_P4_BPU_ESCR1 },
+               .escr_emask     =
+                       P4_ESCR_EMASK_BIT(P4_EVENT_BPU_FETCH_REQUEST, TCMISS),
                .cntr           = { {0, -1, -1}, {2, -1, -1} },
        },
        [P4_EVENT_ITLB_REFERENCE] = {
                .opcode         = P4_OPCODE(P4_EVENT_ITLB_REFERENCE),
                .escr_msr       = { MSR_P4_ITLB_ESCR0, MSR_P4_ITLB_ESCR1 },
+               .escr_emask     =
+                       P4_ESCR_EMASK_BIT(P4_EVENT_ITLB_REFERENCE, HIT)                 |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_ITLB_REFERENCE, MISS)                |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_ITLB_REFERENCE, HIT_UK),
                .cntr           = { {0, -1, -1}, {2, -1, -1} },
        },
        [P4_EVENT_MEMORY_CANCEL] = {
                .opcode         = P4_OPCODE(P4_EVENT_MEMORY_CANCEL),
                .escr_msr       = { MSR_P4_DAC_ESCR0, MSR_P4_DAC_ESCR1 },
+               .escr_emask     =
+                       P4_ESCR_EMASK_BIT(P4_EVENT_MEMORY_CANCEL, ST_RB_FULL)           |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_MEMORY_CANCEL, 64K_CONF),
                .cntr           = { {8, 9, -1}, {10, 11, -1} },
        },
        [P4_EVENT_MEMORY_COMPLETE] = {
                .opcode         = P4_OPCODE(P4_EVENT_MEMORY_COMPLETE),
                .escr_msr       = { MSR_P4_SAAT_ESCR0 , MSR_P4_SAAT_ESCR1 },
+               .escr_emask     =
+                       P4_ESCR_EMASK_BIT(P4_EVENT_MEMORY_COMPLETE, LSC)                |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_MEMORY_COMPLETE, SSC),
                .cntr           = { {8, 9, -1}, {10, 11, -1} },
        },
        [P4_EVENT_LOAD_PORT_REPLAY] = {
                .opcode         = P4_OPCODE(P4_EVENT_LOAD_PORT_REPLAY),
                .escr_msr       = { MSR_P4_SAAT_ESCR0, MSR_P4_SAAT_ESCR1 },
+               .escr_emask     =
+                       P4_ESCR_EMASK_BIT(P4_EVENT_LOAD_PORT_REPLAY, SPLIT_LD),
                .cntr           = { {8, 9, -1}, {10, 11, -1} },
        },
        [P4_EVENT_STORE_PORT_REPLAY] = {
                .opcode         = P4_OPCODE(P4_EVENT_STORE_PORT_REPLAY),
                .escr_msr       = { MSR_P4_SAAT_ESCR0 ,  MSR_P4_SAAT_ESCR1 },
+               .escr_emask     =
+                       P4_ESCR_EMASK_BIT(P4_EVENT_STORE_PORT_REPLAY, SPLIT_ST),
                .cntr           = { {8, 9, -1}, {10, 11, -1} },
        },
        [P4_EVENT_MOB_LOAD_REPLAY] = {
                .opcode         = P4_OPCODE(P4_EVENT_MOB_LOAD_REPLAY),
                .escr_msr       = { MSR_P4_MOB_ESCR0, MSR_P4_MOB_ESCR1 },
+               .escr_emask     =
+                       P4_ESCR_EMASK_BIT(P4_EVENT_MOB_LOAD_REPLAY, NO_STA)             |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_MOB_LOAD_REPLAY, NO_STD)             |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_MOB_LOAD_REPLAY, PARTIAL_DATA)       |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_MOB_LOAD_REPLAY, UNALGN_ADDR),
                .cntr           = { {0, -1, -1}, {2, -1, -1} },
        },
        [P4_EVENT_PAGE_WALK_TYPE] = {
                .opcode         = P4_OPCODE(P4_EVENT_PAGE_WALK_TYPE),
                .escr_msr       = { MSR_P4_PMH_ESCR0, MSR_P4_PMH_ESCR1 },
+               .escr_emask     =
+                       P4_ESCR_EMASK_BIT(P4_EVENT_PAGE_WALK_TYPE, DTMISS)              |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_PAGE_WALK_TYPE, ITMISS),
+               .shared         = 1,
                .cntr           = { {0, -1, -1}, {2, -1, -1} },
        },
        [P4_EVENT_BSQ_CACHE_REFERENCE] = {
                .opcode         = P4_OPCODE(P4_EVENT_BSQ_CACHE_REFERENCE),
                .escr_msr       = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR1 },
+               .escr_emask     =
+                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITS)   |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITE)   |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITM)   |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITS)   |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITE)   |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITM)   |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_MISS)   |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_MISS)   |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, WR_2ndL_MISS),
                .cntr           = { {0, -1, -1}, {2, -1, -1} },
        },
        [P4_EVENT_IOQ_ALLOCATION] = {
                .opcode         = P4_OPCODE(P4_EVENT_IOQ_ALLOCATION),
                .escr_msr       = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
+               .escr_emask     =
+                       P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, DEFAULT)             |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, ALL_READ)            |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, ALL_WRITE)           |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_UC)              |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_WC)              |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_WT)              |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_WP)              |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_WB)              |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, OWN)                 |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, OTHER)               |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, PREFETCH),
                .cntr           = { {0, -1, -1}, {2, -1, -1} },
        },
        [P4_EVENT_IOQ_ACTIVE_ENTRIES] = {       /* shared ESCR */
                .opcode         = P4_OPCODE(P4_EVENT_IOQ_ACTIVE_ENTRIES),
                .escr_msr       = { MSR_P4_FSB_ESCR1,  MSR_P4_FSB_ESCR1 },
+               .escr_emask     =
+                       P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, DEFAULT)         |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, ALL_READ)        |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, ALL_WRITE)       |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_UC)          |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WC)          |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WT)          |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WP)          |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WB)          |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, OWN)             |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, OTHER)           |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, PREFETCH),
                .cntr           = { {2, -1, -1}, {3, -1, -1} },
        },
        [P4_EVENT_FSB_DATA_ACTIVITY] = {
                .opcode         = P4_OPCODE(P4_EVENT_FSB_DATA_ACTIVITY),
                .escr_msr       = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
+               .escr_emask     =
+                       P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_DRV)         |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_OWN)         |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_OTHER)       |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DBSY_DRV)         |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DBSY_OWN)         |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DBSY_OTHER),
+               .shared         = 1,
                .cntr           = { {0, -1, -1}, {2, -1, -1} },
        },
        [P4_EVENT_BSQ_ALLOCATION] = {           /* shared ESCR, broken CCCR1 */
                .opcode         = P4_OPCODE(P4_EVENT_BSQ_ALLOCATION),
                .escr_msr       = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR0 },
+               .escr_emask     =
+                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_TYPE0)           |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_TYPE1)           |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_LEN0)            |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_LEN1)            |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_IO_TYPE)         |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_LOCK_TYPE)       |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_CACHE_TYPE)      |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_SPLIT_TYPE)      |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_DEM_TYPE)        |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_ORD_TYPE)        |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, MEM_TYPE0)           |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, MEM_TYPE1)           |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, MEM_TYPE2),
                .cntr           = { {0, -1, -1}, {1, -1, -1} },
        },
        [P4_EVENT_BSQ_ACTIVE_ENTRIES] = {       /* shared ESCR */
                .opcode         = P4_OPCODE(P4_EVENT_BSQ_ACTIVE_ENTRIES),
                .escr_msr       = { MSR_P4_BSU_ESCR1 , MSR_P4_BSU_ESCR1 },
+               .escr_emask     =
+                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_TYPE0)       |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_TYPE1)       |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_LEN0)        |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_LEN1)        |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_IO_TYPE)     |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_LOCK_TYPE)   |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_CACHE_TYPE)  |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_SPLIT_TYPE)  |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_DEM_TYPE)    |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_ORD_TYPE)    |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, MEM_TYPE0)       |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, MEM_TYPE1)       |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, MEM_TYPE2),
                .cntr           = { {2, -1, -1}, {3, -1, -1} },
        },
        [P4_EVENT_SSE_INPUT_ASSIST] = {
                .opcode         = P4_OPCODE(P4_EVENT_SSE_INPUT_ASSIST),
                .escr_msr       = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
+               .escr_emask     =
+                       P4_ESCR_EMASK_BIT(P4_EVENT_SSE_INPUT_ASSIST, ALL),
+               .shared         = 1,
                .cntr           = { {8, 9, -1}, {10, 11, -1} },
        },
        [P4_EVENT_PACKED_SP_UOP] = {
                .opcode         = P4_OPCODE(P4_EVENT_PACKED_SP_UOP),
                .escr_msr       = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
+               .escr_emask     =
+                       P4_ESCR_EMASK_BIT(P4_EVENT_PACKED_SP_UOP, ALL),
+               .shared         = 1,
                .cntr           = { {8, 9, -1}, {10, 11, -1} },
        },
        [P4_EVENT_PACKED_DP_UOP] = {
                .opcode         = P4_OPCODE(P4_EVENT_PACKED_DP_UOP),
                .escr_msr       = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
+               .escr_emask     =
+                       P4_ESCR_EMASK_BIT(P4_EVENT_PACKED_DP_UOP, ALL),
+               .shared         = 1,
                .cntr           = { {8, 9, -1}, {10, 11, -1} },
        },
        [P4_EVENT_SCALAR_SP_UOP] = {
                .opcode         = P4_OPCODE(P4_EVENT_SCALAR_SP_UOP),
                .escr_msr       = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
+               .escr_emask     =
+                       P4_ESCR_EMASK_BIT(P4_EVENT_SCALAR_SP_UOP, ALL),
+               .shared         = 1,
                .cntr           = { {8, 9, -1}, {10, 11, -1} },
        },
        [P4_EVENT_SCALAR_DP_UOP] = {
                .opcode         = P4_OPCODE(P4_EVENT_SCALAR_DP_UOP),
                .escr_msr       = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
+               .escr_emask     =
+                       P4_ESCR_EMASK_BIT(P4_EVENT_SCALAR_DP_UOP, ALL),
+               .shared         = 1,
                .cntr           = { {8, 9, -1}, {10, 11, -1} },
        },
        [P4_EVENT_64BIT_MMX_UOP] = {
                .opcode         = P4_OPCODE(P4_EVENT_64BIT_MMX_UOP),
                .escr_msr       = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
+               .escr_emask     =
+                       P4_ESCR_EMASK_BIT(P4_EVENT_64BIT_MMX_UOP, ALL),
+               .shared         = 1,
                .cntr           = { {8, 9, -1}, {10, 11, -1} },
        },
        [P4_EVENT_128BIT_MMX_UOP] = {
                .opcode         = P4_OPCODE(P4_EVENT_128BIT_MMX_UOP),
                .escr_msr       = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
+               .escr_emask     =
+                       P4_ESCR_EMASK_BIT(P4_EVENT_128BIT_MMX_UOP, ALL),
+               .shared         = 1,
                .cntr           = { {8, 9, -1}, {10, 11, -1} },
        },
        [P4_EVENT_X87_FP_UOP] = {
                .opcode         = P4_OPCODE(P4_EVENT_X87_FP_UOP),
                .escr_msr       = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
+               .escr_emask     =
+                       P4_ESCR_EMASK_BIT(P4_EVENT_X87_FP_UOP, ALL),
+               .shared         = 1,
                .cntr           = { {8, 9, -1}, {10, 11, -1} },
        },
        [P4_EVENT_TC_MISC] = {
                .opcode         = P4_OPCODE(P4_EVENT_TC_MISC),
                .escr_msr       = { MSR_P4_TC_ESCR0, MSR_P4_TC_ESCR1 },
+               .escr_emask     =
+                       P4_ESCR_EMASK_BIT(P4_EVENT_TC_MISC, FLUSH),
                .cntr           = { {4, 5, -1}, {6, 7, -1} },
        },
        [P4_EVENT_GLOBAL_POWER_EVENTS] = {
                .opcode         = P4_OPCODE(P4_EVENT_GLOBAL_POWER_EVENTS),
                .escr_msr       = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
+               .escr_emask     =
+                       P4_ESCR_EMASK_BIT(P4_EVENT_GLOBAL_POWER_EVENTS, RUNNING),
                .cntr           = { {0, -1, -1}, {2, -1, -1} },
        },
        [P4_EVENT_TC_MS_XFER] = {
                .opcode         = P4_OPCODE(P4_EVENT_TC_MS_XFER),
                .escr_msr       = { MSR_P4_MS_ESCR0, MSR_P4_MS_ESCR1 },
+               .escr_emask     =
+                       P4_ESCR_EMASK_BIT(P4_EVENT_TC_MS_XFER, CISC),
                .cntr           = { {4, 5, -1}, {6, 7, -1} },
        },
        [P4_EVENT_UOP_QUEUE_WRITES] = {
                .opcode         = P4_OPCODE(P4_EVENT_UOP_QUEUE_WRITES),
                .escr_msr       = { MSR_P4_MS_ESCR0, MSR_P4_MS_ESCR1 },
+               .escr_emask     =
+                       P4_ESCR_EMASK_BIT(P4_EVENT_UOP_QUEUE_WRITES, FROM_TC_BUILD)     |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_UOP_QUEUE_WRITES, FROM_TC_DELIVER)   |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_UOP_QUEUE_WRITES, FROM_ROM),
                .cntr           = { {4, 5, -1}, {6, 7, -1} },
        },
        [P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE] = {
                .opcode         = P4_OPCODE(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE),
                .escr_msr       = { MSR_P4_TBPU_ESCR0 , MSR_P4_TBPU_ESCR0 },
+               .escr_emask     =
+                       P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, CONDITIONAL)    |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, CALL)           |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, RETURN)         |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, INDIRECT),
                .cntr           = { {4, 5, -1}, {6, 7, -1} },
        },
        [P4_EVENT_RETIRED_BRANCH_TYPE] = {
                .opcode         = P4_OPCODE(P4_EVENT_RETIRED_BRANCH_TYPE),
                .escr_msr       = { MSR_P4_TBPU_ESCR0 , MSR_P4_TBPU_ESCR1 },
+               .escr_emask     =
+                       P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, CONDITIONAL)    |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, CALL)           |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, RETURN)         |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, INDIRECT),
                .cntr           = { {4, 5, -1}, {6, 7, -1} },
        },
        [P4_EVENT_RESOURCE_STALL] = {
                .opcode         = P4_OPCODE(P4_EVENT_RESOURCE_STALL),
                .escr_msr       = { MSR_P4_ALF_ESCR0, MSR_P4_ALF_ESCR1 },
+               .escr_emask     =
+                       P4_ESCR_EMASK_BIT(P4_EVENT_RESOURCE_STALL, SBFULL),
                .cntr           = { {12, 13, 16}, {14, 15, 17} },
        },
        [P4_EVENT_WC_BUFFER] = {
                .opcode         = P4_OPCODE(P4_EVENT_WC_BUFFER),
                .escr_msr       = { MSR_P4_DAC_ESCR0, MSR_P4_DAC_ESCR1 },
+               .escr_emask     =
+                       P4_ESCR_EMASK_BIT(P4_EVENT_WC_BUFFER, WCB_EVICTS)               |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_WC_BUFFER, WCB_FULL_EVICTS),
+               .shared         = 1,
                .cntr           = { {8, 9, -1}, {10, 11, -1} },
        },
        [P4_EVENT_B2B_CYCLES] = {
                .opcode         = P4_OPCODE(P4_EVENT_B2B_CYCLES),
                .escr_msr       = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
+               .escr_emask     = 0,
                .cntr           = { {0, -1, -1}, {2, -1, -1} },
        },
        [P4_EVENT_BNR] = {
                .opcode         = P4_OPCODE(P4_EVENT_BNR),
                .escr_msr       = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
+               .escr_emask     = 0,
                .cntr           = { {0, -1, -1}, {2, -1, -1} },
        },
        [P4_EVENT_SNOOP] = {
                .opcode         = P4_OPCODE(P4_EVENT_SNOOP),
                .escr_msr       = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
+               .escr_emask     = 0,
                .cntr           = { {0, -1, -1}, {2, -1, -1} },
        },
        [P4_EVENT_RESPONSE] = {
                .opcode         = P4_OPCODE(P4_EVENT_RESPONSE),
                .escr_msr       = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
+               .escr_emask     = 0,
                .cntr           = { {0, -1, -1}, {2, -1, -1} },
        },
        [P4_EVENT_FRONT_END_EVENT] = {
                .opcode         = P4_OPCODE(P4_EVENT_FRONT_END_EVENT),
                .escr_msr       = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
+               .escr_emask     =
+                       P4_ESCR_EMASK_BIT(P4_EVENT_FRONT_END_EVENT, NBOGUS)             |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_FRONT_END_EVENT, BOGUS),
                .cntr           = { {12, 13, 16}, {14, 15, 17} },
        },
        [P4_EVENT_EXECUTION_EVENT] = {
                .opcode         = P4_OPCODE(P4_EVENT_EXECUTION_EVENT),
                .escr_msr       = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
+               .escr_emask     =
+                       P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS0)            |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS1)            |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS2)            |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS3)            |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS0)             |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS1)             |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS2)             |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS3),
                .cntr           = { {12, 13, 16}, {14, 15, 17} },
        },
        [P4_EVENT_REPLAY_EVENT] = {
                .opcode         = P4_OPCODE(P4_EVENT_REPLAY_EVENT),
                .escr_msr       = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
+               .escr_emask     =
+                       P4_ESCR_EMASK_BIT(P4_EVENT_REPLAY_EVENT, NBOGUS)                |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_REPLAY_EVENT, BOGUS),
                .cntr           = { {12, 13, 16}, {14, 15, 17} },
        },
        [P4_EVENT_INSTR_RETIRED] = {
                .opcode         = P4_OPCODE(P4_EVENT_INSTR_RETIRED),
                .escr_msr       = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 },
+               .escr_emask     =
+                       P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, NBOGUSNTAG)           |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, NBOGUSTAG)            |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, BOGUSNTAG)            |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, BOGUSTAG),
                .cntr           = { {12, 13, 16}, {14, 15, 17} },
        },
        [P4_EVENT_UOPS_RETIRED] = {
                .opcode         = P4_OPCODE(P4_EVENT_UOPS_RETIRED),
                .escr_msr       = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 },
+               .escr_emask     =
+                       P4_ESCR_EMASK_BIT(P4_EVENT_UOPS_RETIRED, NBOGUS)                |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_UOPS_RETIRED, BOGUS),
                .cntr           = { {12, 13, 16}, {14, 15, 17} },
        },
        [P4_EVENT_UOP_TYPE] = {
                .opcode         = P4_OPCODE(P4_EVENT_UOP_TYPE),
                .escr_msr       = { MSR_P4_RAT_ESCR0, MSR_P4_RAT_ESCR1 },
+               .escr_emask     =
+                       P4_ESCR_EMASK_BIT(P4_EVENT_UOP_TYPE, TAGLOADS)                  |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_UOP_TYPE, TAGSTORES),
                .cntr           = { {12, 13, 16}, {14, 15, 17} },
        },
        [P4_EVENT_BRANCH_RETIRED] = {
                .opcode         = P4_OPCODE(P4_EVENT_BRANCH_RETIRED),
                .escr_msr       = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
+               .escr_emask     =
+                       P4_ESCR_EMASK_BIT(P4_EVENT_BRANCH_RETIRED, MMNP)                |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_BRANCH_RETIRED, MMNM)                |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_BRANCH_RETIRED, MMTP)                |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_BRANCH_RETIRED, MMTM),
                .cntr           = { {12, 13, 16}, {14, 15, 17} },
        },
        [P4_EVENT_MISPRED_BRANCH_RETIRED] = {
                .opcode         = P4_OPCODE(P4_EVENT_MISPRED_BRANCH_RETIRED),
                .escr_msr       = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 },
+               .escr_emask     =
+               P4_ESCR_EMASK_BIT(P4_EVENT_MISPRED_BRANCH_RETIRED, NBOGUS),
                .cntr           = { {12, 13, 16}, {14, 15, 17} },
        },
        [P4_EVENT_X87_ASSIST] = {
                .opcode         = P4_OPCODE(P4_EVENT_X87_ASSIST),
                .escr_msr       = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
+               .escr_emask     =
+                       P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, FPSU)                    |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, FPSO)                    |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, POAO)                    |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, POAU)                    |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, PREA),
                .cntr           = { {12, 13, 16}, {14, 15, 17} },
        },
        [P4_EVENT_MACHINE_CLEAR] = {
                .opcode         = P4_OPCODE(P4_EVENT_MACHINE_CLEAR),
                .escr_msr       = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
+               .escr_emask     =
+                       P4_ESCR_EMASK_BIT(P4_EVENT_MACHINE_CLEAR, CLEAR)                |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_MACHINE_CLEAR, MOCLEAR)              |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_MACHINE_CLEAR, SMCLEAR),
                .cntr           = { {12, 13, 16}, {14, 15, 17} },
        },
        [P4_EVENT_INSTR_COMPLETED] = {
                .opcode         = P4_OPCODE(P4_EVENT_INSTR_COMPLETED),
                .escr_msr       = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 },
+               .escr_emask     =
+                       P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_COMPLETED, NBOGUS)             |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_COMPLETED, BOGUS),
                .cntr           = { {12, 13, 16}, {14, 15, 17} },
        },
 };
@@ -428,29 +634,73 @@ static u64 p4_pmu_event_map(int hw_event)
        return config;
 }
 
+/* check cpu model specifics */
+static bool p4_event_match_cpu_model(unsigned int event_idx)
+{
+       /* INSTR_COMPLETED event only exist for model 3, 4, 6 (Prescott) */
+       if (event_idx == P4_EVENT_INSTR_COMPLETED) {
+               if (boot_cpu_data.x86_model != 3 &&
+                       boot_cpu_data.x86_model != 4 &&
+                       boot_cpu_data.x86_model != 6)
+                       return false;
+       }
+
+       /*
+        * For info
+        * - IQ_ESCR0, IQ_ESCR1 only for models 1 and 2
+        */
+
+       return true;
+}
+
 static int p4_validate_raw_event(struct perf_event *event)
 {
-       unsigned int v;
+       unsigned int v, emask;
 
-       /* user data may have out-of-bound event index */
+       /* User data may have out-of-bound event index */
        v = p4_config_unpack_event(event->attr.config);
-       if (v >= ARRAY_SIZE(p4_event_bind_map)) {
-               pr_warning("P4 PMU: Unknown event code: %d\n", v);
+       if (v >= ARRAY_SIZE(p4_event_bind_map))
+               return -EINVAL;
+
+       /* It may be unsupported: */
+       if (!p4_event_match_cpu_model(v))
                return -EINVAL;
+
+       /*
+        * NOTE: P4_CCCR_THREAD_ANY has not the same meaning as
+        * in Architectural Performance Monitoring, it means not
+        * on _which_ logical cpu to count but rather _when_, ie it
+        * depends on logical cpu state -- count event if one cpu active,
+        * none, both or any, so we just allow user to pass any value
+        * desired.
+        *
+        * In turn we always set Tx_OS/Tx_USR bits bound to logical
+        * cpu without their propagation to another cpu
+        */
+
+       /*
+        * if an event is shared accross the logical threads
+        * the user needs special permissions to be able to use it
+        */
+       if (p4_event_bind_map[v].shared) {
+               if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
+                       return -EACCES;
        }
 
+       /* ESCR EventMask bits may be invalid */
+       emask = p4_config_unpack_escr(event->attr.config) & P4_ESCR_EVENTMASK_MASK;
+       if (emask & ~p4_event_bind_map[v].escr_emask)
+               return -EINVAL;
+
        /*
-        * it may have some screwed PEBS bits
+        * it may have some invalid PEBS bits
         */
-       if (p4_config_pebs_has(event->attr.config, P4_PEBS_CONFIG_ENABLE)) {
-               pr_warning("P4 PMU: PEBS are not supported yet\n");
+       if (p4_config_pebs_has(event->attr.config, P4_PEBS_CONFIG_ENABLE))
                return -EINVAL;
-       }
+
        v = p4_config_unpack_metric(event->attr.config);
-       if (v >= ARRAY_SIZE(p4_pebs_bind_map)) {
-               pr_warning("P4 PMU: Unknown metric code: %d\n", v);
+       if (v >= ARRAY_SIZE(p4_pebs_bind_map))
                return -EINVAL;
-       }
 
        return 0;
 }
@@ -478,27 +728,21 @@ static int p4_hw_config(struct perf_event *event)
 
        if (event->attr.type == PERF_TYPE_RAW) {
 
+               /*
+                * Clear bits we reserve to be managed by kernel itself
+                * and never allowed from a user space
+                */
+                event->attr.config &= P4_CONFIG_MASK;
+
                rc = p4_validate_raw_event(event);
                if (rc)
                        goto out;
 
                /*
-                * We don't control raw events so it's up to the caller
-                * to pass sane values (and we don't count the thread number
-                * on HT machine but allow HT-compatible specifics to be
-                * passed on)
-                *
                 * Note that for RAW events we allow user to use P4_CCCR_RESERVED
                 * bits since we keep additional info here (for cache events and etc)
-                *
-                * XXX: HT wide things should check perf_paranoid_cpu() &&
-                *      CAP_SYS_ADMIN
                 */
-               event->hw.config |= event->attr.config &
-                       (p4_config_pack_escr(P4_ESCR_MASK_HT) |
-                        p4_config_pack_cccr(P4_CCCR_MASK_HT | P4_CCCR_RESERVED));
-
-               event->hw.config &= ~P4_CCCR_FORCE_OVF;
+               event->hw.config |= event->attr.config;
        }
 
        rc = x86_setup_perfctr(event);
index 17be5ec..c375c79 100644 (file)
@@ -1023,9 +1023,9 @@ apicinterrupt ERROR_APIC_VECTOR \
 apicinterrupt SPURIOUS_APIC_VECTOR \
        spurious_interrupt smp_spurious_interrupt
 
-#ifdef CONFIG_PERF_EVENTS
-apicinterrupt LOCAL_PENDING_VECTOR \
-       perf_pending_interrupt smp_perf_pending_interrupt
+#ifdef CONFIG_IRQ_WORK
+apicinterrupt IRQ_WORK_VECTOR \
+       irq_work_interrupt smp_irq_work_interrupt
 #endif
 
 /*
index cd37469..3afb33f 100644 (file)
@@ -257,14 +257,9 @@ do_ftrace_mod_code(unsigned long ip, void *new_code)
        return mod_code_status;
 }
 
-
-
-
-static unsigned char ftrace_nop[MCOUNT_INSN_SIZE];
-
 static unsigned char *ftrace_nop_replace(void)
 {
-       return ftrace_nop;
+       return ideal_nop5;
 }
 
 static int
@@ -338,62 +333,6 @@ int ftrace_update_ftrace_func(ftrace_func_t func)
 
 int __init ftrace_dyn_arch_init(void *data)
 {
-       extern const unsigned char ftrace_test_p6nop[];
-       extern const unsigned char ftrace_test_nop5[];
-       extern const unsigned char ftrace_test_jmp[];
-       int faulted = 0;
-
-       /*
-        * There is no good nop for all x86 archs.
-        * We will default to using the P6_NOP5, but first we
-        * will test to make sure that the nop will actually
-        * work on this CPU. If it faults, we will then
-        * go to a lesser efficient 5 byte nop. If that fails
-        * we then just use a jmp as our nop. This isn't the most
-        * efficient nop, but we can not use a multi part nop
-        * since we would then risk being preempted in the middle
-        * of that nop, and if we enabled tracing then, it might
-        * cause a system crash.
-        *
-        * TODO: check the cpuid to determine the best nop.
-        */
-       asm volatile (
-               "ftrace_test_jmp:"
-               "jmp ftrace_test_p6nop\n"
-               "nop\n"
-               "nop\n"
-               "nop\n"  /* 2 byte jmp + 3 bytes */
-               "ftrace_test_p6nop:"
-               P6_NOP5
-               "jmp 1f\n"
-               "ftrace_test_nop5:"
-               ".byte 0x66,0x66,0x66,0x66,0x90\n"
-               "1:"
-               ".section .fixup, \"ax\"\n"
-               "2:     movl $1, %0\n"
-               "       jmp ftrace_test_nop5\n"
-               "3:     movl $2, %0\n"
-               "       jmp 1b\n"
-               ".previous\n"
-               _ASM_EXTABLE(ftrace_test_p6nop, 2b)
-               _ASM_EXTABLE(ftrace_test_nop5, 3b)
-               : "=r"(faulted) : "0" (faulted));
-
-       switch (faulted) {
-       case 0:
-               pr_info("converting mcount calls to 0f 1f 44 00 00\n");
-               memcpy(ftrace_nop, ftrace_test_p6nop, MCOUNT_INSN_SIZE);
-               break;
-       case 1:
-               pr_info("converting mcount calls to 66 66 66 66 90\n");
-               memcpy(ftrace_nop, ftrace_test_nop5, MCOUNT_INSN_SIZE);
-               break;
-       case 2:
-               pr_info("converting mcount calls to jmp . + 5\n");
-               memcpy(ftrace_nop, ftrace_test_jmp, MCOUNT_INSN_SIZE);
-               break;
-       }
-
        /* The return code is retured via data */
        *(unsigned long *)data = 0;
 
index 91fd0c7..44edb03 100644 (file)
@@ -67,10 +67,10 @@ static int show_other_interrupts(struct seq_file *p, int prec)
        for_each_online_cpu(j)
                seq_printf(p, "%10u ", irq_stats(j)->apic_perf_irqs);
        seq_printf(p, "  Performance monitoring interrupts\n");
-       seq_printf(p, "%*s: ", prec, "PND");
+       seq_printf(p, "%*s: ", prec, "IWI");
        for_each_online_cpu(j)
-               seq_printf(p, "%10u ", irq_stats(j)->apic_pending_irqs);
-       seq_printf(p, "  Performance pending work\n");
+               seq_printf(p, "%10u ", irq_stats(j)->apic_irq_work_irqs);
+       seq_printf(p, "  IRQ work interrupts\n");
 #endif
        if (x86_platform_ipi_callback) {
                seq_printf(p, "%*s: ", prec, "PLT");
@@ -185,7 +185,7 @@ u64 arch_irq_stat_cpu(unsigned int cpu)
        sum += irq_stats(cpu)->apic_timer_irqs;
        sum += irq_stats(cpu)->irq_spurious_count;
        sum += irq_stats(cpu)->apic_perf_irqs;
-       sum += irq_stats(cpu)->apic_pending_irqs;
+       sum += irq_stats(cpu)->apic_irq_work_irqs;
 #endif
        if (x86_platform_ipi_callback)
                sum += irq_stats(cpu)->x86_platform_ipis;
diff --git a/arch/x86/kernel/irq_work.c b/arch/x86/kernel/irq_work.c
new file mode 100644 (file)
index 0000000..ca8f703
--- /dev/null
@@ -0,0 +1,30 @@
+/*
+ * x86 specific code for irq_work
+ *
+ * Copyright (C) 2010 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/irq_work.h>
+#include <linux/hardirq.h>
+#include <asm/apic.h>
+
+void smp_irq_work_interrupt(struct pt_regs *regs)
+{
+       irq_enter();
+       ack_APIC_irq();
+       inc_irq_stat(apic_irq_work_irqs);
+       irq_work_run();
+       irq_exit();
+}
+
+void arch_irq_work_raise(void)
+{
+#ifdef CONFIG_X86_LOCAL_APIC
+       if (!cpu_has_apic)
+               return;
+
+       apic->send_IPI_self(IRQ_WORK_VECTOR);
+       apic_wait_icr_idle();
+#endif
+}
index 990ae7c..713969b 100644 (file)
@@ -224,9 +224,9 @@ static void __init apic_intr_init(void)
        alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
        alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt);
 
-       /* Performance monitoring interrupts: */
-# ifdef CONFIG_PERF_EVENTS
-       alloc_intr_gate(LOCAL_PENDING_VECTOR, perf_pending_interrupt);
+       /* IRQ work interrupts: */
+# ifdef CONFIG_IRQ_WORK
+       alloc_intr_gate(IRQ_WORK_VECTOR, irq_work_interrupt);
 # endif
 
 #endif
diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c
new file mode 100644 (file)
index 0000000..961b6b3
--- /dev/null
@@ -0,0 +1,50 @@
+/*
+ * jump label x86 support
+ *
+ * Copyright (C) 2009 Jason Baron <jbaron@redhat.com>
+ *
+ */
+#include <linux/jump_label.h>
+#include <linux/memory.h>
+#include <linux/uaccess.h>
+#include <linux/module.h>
+#include <linux/list.h>
+#include <linux/jhash.h>
+#include <linux/cpu.h>
+#include <asm/kprobes.h>
+#include <asm/alternative.h>
+
+#ifdef HAVE_JUMP_LABEL
+
+union jump_code_union {
+       char code[JUMP_LABEL_NOP_SIZE];
+       struct {
+               char jump;
+               int offset;
+       } __attribute__((packed));
+};
+
+void arch_jump_label_transform(struct jump_entry *entry,
+                              enum jump_label_type type)
+{
+       union jump_code_union code;
+
+       if (type == JUMP_LABEL_ENABLE) {
+               code.jump = 0xe9;
+               code.offset = entry->target -
+                               (entry->code + JUMP_LABEL_NOP_SIZE);
+       } else
+               memcpy(&code, ideal_nop5, JUMP_LABEL_NOP_SIZE);
+       get_online_cpus();
+       mutex_lock(&text_mutex);
+       text_poke_smp((void *)entry->code, &code, JUMP_LABEL_NOP_SIZE);
+       mutex_unlock(&text_mutex);
+       put_online_cpus();
+}
+
+void arch_jump_label_text_poke_early(jump_label_t addr)
+{
+       text_poke_early((void *)addr, ideal_nop5, JUMP_LABEL_NOP_SIZE);
+}
+
+#endif
index 770ebfb..1cbd54c 100644 (file)
@@ -230,9 +230,6 @@ static int recover_probed_instruction(kprobe_opcode_t *buf, unsigned long addr)
        return 0;
 }
 
-/* Dummy buffers for kallsyms_lookup */
-static char __dummy_buf[KSYM_NAME_LEN];
-
 /* Check if paddr is at an instruction boundary */
 static int __kprobes can_probe(unsigned long paddr)
 {
@@ -241,7 +238,7 @@ static int __kprobes can_probe(unsigned long paddr)
        struct insn insn;
        kprobe_opcode_t buf[MAX_INSN_SIZE];
 
-       if (!kallsyms_lookup(paddr, NULL, &offset, NULL, __dummy_buf))
+       if (!kallsyms_lookup_size_offset(paddr, NULL, &offset))
                return 0;
 
        /* Decode instructions */
@@ -1129,7 +1126,7 @@ static void __kprobes synthesize_set_arg1(kprobe_opcode_t *addr,
        *(unsigned long *)addr = val;
 }
 
-void __kprobes kprobes_optinsn_template_holder(void)
+static void __used __kprobes kprobes_optinsn_template_holder(void)
 {
        asm volatile (
                        ".global optprobe_template_entry\n"
@@ -1221,7 +1218,8 @@ static int __kprobes copy_optimized_instructions(u8 *dest, u8 *src)
        }
        /* Check whether the address range is reserved */
        if (ftrace_text_reserved(src, src + len - 1) ||
-           alternatives_text_reserved(src, src + len - 1))
+           alternatives_text_reserved(src, src + len - 1) ||
+           jump_label_text_reserved(src, src + len - 1))
                return -EBUSY;
 
        return len;
@@ -1269,11 +1267,9 @@ static int __kprobes can_optimize(unsigned long paddr)
        unsigned long addr, size = 0, offset = 0;
        struct insn insn;
        kprobe_opcode_t buf[MAX_INSN_SIZE];
-       /* Dummy buffers for lookup_symbol_attrs */
-       static char __dummy_buf[KSYM_NAME_LEN];
 
        /* Lookup symbol including addr */
-       if (!kallsyms_lookup(paddr, &size, &offset, NULL, __dummy_buf))
+       if (!kallsyms_lookup_size_offset(paddr, &size, &offset))
                return 0;
 
        /* Check there is enough space for a relative jump. */
index 1c355c5..8f29560 100644 (file)
@@ -239,6 +239,9 @@ int module_finalize(const Elf_Ehdr *hdr,
                apply_paravirt(pseg, pseg + para->sh_size);
        }
 
+       /* make jump label nops */
+       jump_label_apply_nops(me);
+
        return 0;
 }
 
index c3a4fbb..00e1678 100644 (file)
 #include <asm/numa_64.h>
 #endif
 #include <asm/mce.h>
+#include <asm/alternative.h>
 
 /*
  * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries.
@@ -726,6 +727,7 @@ void __init setup_arch(char **cmdline_p)
 {
        int acpi = 0;
        int k8 = 0;
+       unsigned long flags;
 
 #ifdef CONFIG_X86_32
        memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data));
@@ -1071,6 +1073,10 @@ void __init setup_arch(char **cmdline_p)
        x86_init.oem.banner();
 
        mcheck_init();
+
+       local_irq_save(flags);
+       arch_init_ideal_nop5();
+       local_irq_restore(flags);
 }
 
 #ifdef CONFIG_X86_32
index 4c4508e..a24c6cf 100644 (file)
@@ -251,6 +251,8 @@ static noinline __kprobes int vmalloc_fault(unsigned long address)
        if (!(address >= VMALLOC_START && address < VMALLOC_END))
                return -1;
 
+       WARN_ON_ONCE(in_nmi());
+
        /*
         * Synchronize this task's top level page-table
         * with the 'reference' page table.
@@ -369,6 +371,8 @@ static noinline __kprobes int vmalloc_fault(unsigned long address)
        if (!(address >= VMALLOC_START && address < VMALLOC_END))
                return -1;
 
+       WARN_ON_ONCE(in_nmi());
+
        /*
         * Copy kernel mappings over when needed. This can also
         * happen within a race in page table update. In the later
index b3b531a..d87dd6d 100644 (file)
@@ -631,6 +631,8 @@ bool kmemcheck_fault(struct pt_regs *regs, unsigned long address,
        if (!pte)
                return false;
 
+       WARN_ON_ONCE(in_nmi());
+
        if (error_code & 2)
                kmemcheck_access(regs, address, KMEMCHECK_WRITE);
        else
index 3855096..2d49d4e 100644 (file)
@@ -14,6 +14,7 @@
 #include <asm/ptrace.h>
 #include <asm/uaccess.h>
 #include <asm/stacktrace.h>
+#include <linux/compat.h>
 
 static void backtrace_warning_symbol(void *data, char *msg,
                                     unsigned long symbol)
@@ -48,14 +49,12 @@ static struct stacktrace_ops backtrace_ops = {
        .walk_stack     = print_context_stack,
 };
 
-struct frame_head {
-       struct frame_head *bp;
-       unsigned long ret;
-} __attribute__((packed));
-
-static struct frame_head *dump_user_backtrace(struct frame_head *head)
+#ifdef CONFIG_COMPAT
+static struct stack_frame_ia32 *
+dump_user_backtrace_32(struct stack_frame_ia32 *head)
 {
-       struct frame_head bufhead[2];
+       struct stack_frame_ia32 bufhead[2];
+       struct stack_frame_ia32 *fp;
 
        /* Also check accessibility of one struct frame_head beyond */
        if (!access_ok(VERIFY_READ, head, sizeof(bufhead)))
@@ -63,20 +62,66 @@ static struct frame_head *dump_user_backtrace(struct frame_head *head)
        if (__copy_from_user_inatomic(bufhead, head, sizeof(bufhead)))
                return NULL;
 
-       oprofile_add_trace(bufhead[0].ret);
+       fp = (struct stack_frame_ia32 *) compat_ptr(bufhead[0].next_frame);
+
+       oprofile_add_trace(bufhead[0].return_address);
+
+       /* frame pointers should strictly progress back up the stack
+       * (towards higher addresses) */
+       if (head >= fp)
+               return NULL;
+
+       return fp;
+}
+
+static inline int
+x86_backtrace_32(struct pt_regs * const regs, unsigned int depth)
+{
+       struct stack_frame_ia32 *head;
+
+       /* User process is 32-bit */
+       if (!current || !test_thread_flag(TIF_IA32))
+               return 0;
+
+       head = (struct stack_frame_ia32 *) regs->bp;
+       while (depth-- && head)
+               head = dump_user_backtrace_32(head);
+
+       return 1;
+}
+
+#else
+static inline int
+x86_backtrace_32(struct pt_regs * const regs, unsigned int depth)
+{
+       return 0;
+}
+#endif /* CONFIG_COMPAT */
+
+static struct stack_frame *dump_user_backtrace(struct stack_frame *head)
+{
+       struct stack_frame bufhead[2];
+
+       /* Also check accessibility of one struct stack_frame beyond */
+       if (!access_ok(VERIFY_READ, head, sizeof(bufhead)))
+               return NULL;
+       if (__copy_from_user_inatomic(bufhead, head, sizeof(bufhead)))
+               return NULL;
+
+       oprofile_add_trace(bufhead[0].return_address);
 
        /* frame pointers should strictly progress back up the stack
         * (towards higher addresses) */
-       if (head >= bufhead[0].bp)
+       if (head >= bufhead[0].next_frame)
                return NULL;
 
-       return bufhead[0].bp;
+       return bufhead[0].next_frame;
 }
 
 void
 x86_backtrace(struct pt_regs * const regs, unsigned int depth)
 {
-       struct frame_head *head = (struct frame_head *)frame_pointer(regs);
+       struct stack_frame *head = (struct stack_frame *)frame_pointer(regs);
 
        if (!user_mode_vm(regs)) {
                unsigned long stack = kernel_stack_pointer(regs);
@@ -86,6 +131,9 @@ x86_backtrace(struct pt_regs * const regs, unsigned int depth)
                return;
        }
 
+       if (x86_backtrace_32(regs, depth))
+               return;
+
        while (depth-- && head)
                head = dump_user_backtrace(head);
 }
index f1575c9..bd1489c 100644 (file)
@@ -695,9 +695,6 @@ static int __init ppro_init(char **cpu_type)
        return 1;
 }
 
-/* in order to get sysfs right */
-static int using_nmi;
-
 int __init op_nmi_init(struct oprofile_operations *ops)
 {
        __u8 vendor = boot_cpu_data.x86_vendor;
@@ -705,8 +702,6 @@ int __init op_nmi_init(struct oprofile_operations *ops)
        char *cpu_type = NULL;
        int ret = 0;
 
-       using_nmi = 0;
-
        if (!cpu_has_apic)
                return -ENODEV;
 
@@ -790,13 +785,11 @@ int __init op_nmi_init(struct oprofile_operations *ops)
        if (ret)
                return ret;
 
-       using_nmi = 1;
        printk(KERN_INFO "oprofile: using NMI interrupt.\n");
        return 0;
 }
 
 void op_nmi_exit(void)
 {
-       if (using_nmi)
-               exit_sysfs();
+       exit_sysfs();
 }
index b336cd9..f9bda64 100644 (file)
@@ -225,26 +225,17 @@ post_sync:
        mutex_unlock(&start_mutex);
 }
 
-int oprofile_set_backtrace(unsigned long val)
+int oprofile_set_ulong(unsigned long *addr, unsigned long val)
 {
-       int err = 0;
+       int err = -EBUSY;
 
        mutex_lock(&start_mutex);
-
-       if (oprofile_started) {
-               err = -EBUSY;
-               goto out;
-       }
-
-       if (!oprofile_ops.backtrace) {
-               err = -EINVAL;
-               goto out;
+       if (!oprofile_started) {
+               *addr = val;
+               err = 0;
        }
-
-       oprofile_backtrace_depth = val;
-
-out:
        mutex_unlock(&start_mutex);
+
        return err;
 }
 
@@ -257,16 +248,9 @@ static int __init oprofile_init(void)
                printk(KERN_INFO "oprofile: using timer interrupt.\n");
                err = oprofile_timer_init(&oprofile_ops);
                if (err)
-                       goto out_arch;
+                       return err;
        }
-       err = oprofilefs_register();
-       if (err)
-               goto out_arch;
-       return 0;
-
-out_arch:
-       oprofile_arch_exit();
-       return err;
+       return oprofilefs_register();
 }
 
 
index 47e12cb..177b73d 100644 (file)
@@ -37,7 +37,7 @@ void oprofile_create_files(struct super_block *sb, struct dentry *root);
 int oprofile_timer_init(struct oprofile_operations *ops);
 void oprofile_timer_exit(void);
 
-int oprofile_set_backtrace(unsigned long depth);
+int oprofile_set_ulong(unsigned long *addr, unsigned long val);
 int oprofile_set_timeout(unsigned long time);
 
 #endif /* OPROF_H */
index bbd7516..ccf099e 100644 (file)
@@ -79,14 +79,17 @@ static ssize_t depth_write(struct file *file, char const __user *buf, size_t cou
        if (*offset)
                return -EINVAL;
 
+       if (!oprofile_ops.backtrace)
+               return -EINVAL;
+
        retval = oprofilefs_ulong_from_user(&val, buf, count);
        if (retval)
                return retval;
 
-       retval = oprofile_set_backtrace(val);
-
+       retval = oprofile_set_ulong(&oprofile_backtrace_depth, val);
        if (retval)
                return retval;
+
        return count;
 }
 
diff --git a/drivers/oprofile/oprofile_perf.c b/drivers/oprofile/oprofile_perf.c
new file mode 100644 (file)
index 0000000..9046f7b
--- /dev/null
@@ -0,0 +1,328 @@
+/*
+ * Copyright 2010 ARM Ltd.
+ *
+ * Perf-events backend for OProfile.
+ */
+#include <linux/perf_event.h>
+#include <linux/platform_device.h>
+#include <linux/oprofile.h>
+#include <linux/slab.h>
+
+/*
+ * Per performance monitor configuration as set via oprofilefs.
+ */
+struct op_counter_config {
+       unsigned long count;
+       unsigned long enabled;
+       unsigned long event;
+       unsigned long unit_mask;
+       unsigned long kernel;
+       unsigned long user;
+       struct perf_event_attr attr;
+};
+
+static int oprofile_perf_enabled;
+static DEFINE_MUTEX(oprofile_perf_mutex);
+
+static struct op_counter_config *counter_config;
+static struct perf_event **perf_events[nr_cpumask_bits];
+static int num_counters;
+
+/*
+ * Overflow callback for oprofile.
+ */
+static void op_overflow_handler(struct perf_event *event, int unused,
+                       struct perf_sample_data *data, struct pt_regs *regs)
+{
+       int id;
+       u32 cpu = smp_processor_id();
+
+       for (id = 0; id < num_counters; ++id)
+               if (perf_events[cpu][id] == event)
+                       break;
+
+       if (id != num_counters)
+               oprofile_add_sample(regs, id);
+       else
+               pr_warning("oprofile: ignoring spurious overflow "
+                               "on cpu %u\n", cpu);
+}
+
+/*
+ * Called by oprofile_perf_setup to create perf attributes to mirror the oprofile
+ * settings in counter_config. Attributes are created as `pinned' events and
+ * so are permanently scheduled on the PMU.
+ */
+static void op_perf_setup(void)
+{
+       int i;
+       u32 size = sizeof(struct perf_event_attr);
+       struct perf_event_attr *attr;
+
+       for (i = 0; i < num_counters; ++i) {
+               attr = &counter_config[i].attr;
+               memset(attr, 0, size);
+               attr->type              = PERF_TYPE_RAW;
+               attr->size              = size;
+               attr->config            = counter_config[i].event;
+               attr->sample_period     = counter_config[i].count;
+               attr->pinned            = 1;
+       }
+}
+
+static int op_create_counter(int cpu, int event)
+{
+       struct perf_event *pevent;
+
+       if (!counter_config[event].enabled || perf_events[cpu][event])
+               return 0;
+
+       pevent = perf_event_create_kernel_counter(&counter_config[event].attr,
+                                                 cpu, NULL,
+                                                 op_overflow_handler);
+
+       if (IS_ERR(pevent))
+               return PTR_ERR(pevent);
+
+       if (pevent->state != PERF_EVENT_STATE_ACTIVE) {
+               perf_event_release_kernel(pevent);
+               pr_warning("oprofile: failed to enable event %d "
+                               "on CPU %d\n", event, cpu);
+               return -EBUSY;
+       }
+
+       perf_events[cpu][event] = pevent;
+
+       return 0;
+}
+
+static void op_destroy_counter(int cpu, int event)
+{
+       struct perf_event *pevent = perf_events[cpu][event];
+
+       if (pevent) {
+               perf_event_release_kernel(pevent);
+               perf_events[cpu][event] = NULL;
+       }
+}
+
+/*
+ * Called by oprofile_perf_start to create active perf events based on the
+ * perviously configured attributes.
+ */
+static int op_perf_start(void)
+{
+       int cpu, event, ret = 0;
+
+       for_each_online_cpu(cpu) {
+               for (event = 0; event < num_counters; ++event) {
+                       ret = op_create_counter(cpu, event);
+                       if (ret)
+                               return ret;
+               }
+       }
+
+       return ret;
+}
+
+/*
+ * Called by oprofile_perf_stop at the end of a profiling run.
+ */
+static void op_perf_stop(void)
+{
+       int cpu, event;
+
+       for_each_online_cpu(cpu)
+               for (event = 0; event < num_counters; ++event)
+                       op_destroy_counter(cpu, event);
+}
+
+static int oprofile_perf_create_files(struct super_block *sb, struct dentry *root)
+{
+       unsigned int i;
+
+       for (i = 0; i < num_counters; i++) {
+               struct dentry *dir;
+               char buf[4];
+
+               snprintf(buf, sizeof buf, "%d", i);
+               dir = oprofilefs_mkdir(sb, root, buf);
+               oprofilefs_create_ulong(sb, dir, "enabled", &counter_config[i].enabled);
+               oprofilefs_create_ulong(sb, dir, "event", &counter_config[i].event);
+               oprofilefs_create_ulong(sb, dir, "count", &counter_config[i].count);
+               oprofilefs_create_ulong(sb, dir, "unit_mask", &counter_config[i].unit_mask);
+               oprofilefs_create_ulong(sb, dir, "kernel", &counter_config[i].kernel);
+               oprofilefs_create_ulong(sb, dir, "user", &counter_config[i].user);
+       }
+
+       return 0;
+}
+
+static int oprofile_perf_setup(void)
+{
+       spin_lock(&oprofilefs_lock);
+       op_perf_setup();
+       spin_unlock(&oprofilefs_lock);
+       return 0;
+}
+
+static int oprofile_perf_start(void)
+{
+       int ret = -EBUSY;
+
+       mutex_lock(&oprofile_perf_mutex);
+       if (!oprofile_perf_enabled) {
+               ret = 0;
+               op_perf_start();
+               oprofile_perf_enabled = 1;
+       }
+       mutex_unlock(&oprofile_perf_mutex);
+       return ret;
+}
+
+static void oprofile_perf_stop(void)
+{
+       mutex_lock(&oprofile_perf_mutex);
+       if (oprofile_perf_enabled)
+               op_perf_stop();
+       oprofile_perf_enabled = 0;
+       mutex_unlock(&oprofile_perf_mutex);
+}
+
+#ifdef CONFIG_PM
+
+static int oprofile_perf_suspend(struct platform_device *dev, pm_message_t state)
+{
+       mutex_lock(&oprofile_perf_mutex);
+       if (oprofile_perf_enabled)
+               op_perf_stop();
+       mutex_unlock(&oprofile_perf_mutex);
+       return 0;
+}
+
+static int oprofile_perf_resume(struct platform_device *dev)
+{
+       mutex_lock(&oprofile_perf_mutex);
+       if (oprofile_perf_enabled && op_perf_start())
+               oprofile_perf_enabled = 0;
+       mutex_unlock(&oprofile_perf_mutex);
+       return 0;
+}
+
+static struct platform_driver oprofile_driver = {
+       .driver         = {
+               .name           = "oprofile-perf",
+       },
+       .resume         = oprofile_perf_resume,
+       .suspend        = oprofile_perf_suspend,
+};
+
+static struct platform_device *oprofile_pdev;
+
+static int __init init_driverfs(void)
+{
+       int ret;
+
+       ret = platform_driver_register(&oprofile_driver);
+       if (ret)
+               return ret;
+
+       oprofile_pdev = platform_device_register_simple(
+                               oprofile_driver.driver.name, 0, NULL, 0);
+       if (IS_ERR(oprofile_pdev)) {
+               ret = PTR_ERR(oprofile_pdev);
+               platform_driver_unregister(&oprofile_driver);
+       }
+
+       return ret;
+}
+
+static void exit_driverfs(void)
+{
+       platform_device_unregister(oprofile_pdev);
+       platform_driver_unregister(&oprofile_driver);
+}
+
+#else
+
+static inline int  init_driverfs(void) { return 0; }
+static inline void exit_driverfs(void) { }
+
+#endif /* CONFIG_PM */
+
+void oprofile_perf_exit(void)
+{
+       int cpu, id;
+       struct perf_event *event;
+
+       for_each_possible_cpu(cpu) {
+               for (id = 0; id < num_counters; ++id) {
+                       event = perf_events[cpu][id];
+                       if (event)
+                               perf_event_release_kernel(event);
+               }
+
+               kfree(perf_events[cpu]);
+       }
+
+       kfree(counter_config);
+       exit_driverfs();
+}
+
+int __init oprofile_perf_init(struct oprofile_operations *ops)
+{
+       int cpu, ret = 0;
+
+       ret = init_driverfs();
+       if (ret)
+               return ret;
+
+       memset(&perf_events, 0, sizeof(perf_events));
+
+       num_counters = perf_num_counters();
+       if (num_counters <= 0) {
+               pr_info("oprofile: no performance counters\n");
+               ret = -ENODEV;
+               goto out;
+       }
+
+       counter_config = kcalloc(num_counters,
+                       sizeof(struct op_counter_config), GFP_KERNEL);
+
+       if (!counter_config) {
+               pr_info("oprofile: failed to allocate %d "
+                               "counters\n", num_counters);
+               ret = -ENOMEM;
+               num_counters = 0;
+               goto out;
+       }
+
+       for_each_possible_cpu(cpu) {
+               perf_events[cpu] = kcalloc(num_counters,
+                               sizeof(struct perf_event *), GFP_KERNEL);
+               if (!perf_events[cpu]) {
+                       pr_info("oprofile: failed to allocate %d perf events "
+                                       "for cpu %d\n", num_counters, cpu);
+                       ret = -ENOMEM;
+                       goto out;
+               }
+       }
+
+       ops->create_files       = oprofile_perf_create_files;
+       ops->setup              = oprofile_perf_setup;
+       ops->start              = oprofile_perf_start;
+       ops->stop               = oprofile_perf_stop;
+       ops->shutdown           = oprofile_perf_stop;
+       ops->cpu_type           = op_name_from_perf_id();
+
+       if (!ops->cpu_type)
+               ret = -ENODEV;
+       else
+               pr_info("oprofile: using %s\n", ops->cpu_type);
+
+out:
+       if (ret)
+               oprofile_perf_exit();
+
+       return ret;
+}
index 2766a6d..1944621 100644 (file)
@@ -91,16 +91,20 @@ static ssize_t ulong_read_file(struct file *file, char __user *buf, size_t count
 
 static ssize_t ulong_write_file(struct file *file, char const __user *buf, size_t count, loff_t *offset)
 {
-       unsigned long *value = file->private_data;
+       unsigned long value;
        int retval;
 
        if (*offset)
                return -EINVAL;
 
-       retval = oprofilefs_ulong_from_user(value, buf, count);
+       retval = oprofilefs_ulong_from_user(&value, buf, count);
+       if (retval)
+               return retval;
 
+       retval = oprofile_set_ulong(file->private_data, value);
        if (retval)
                return retval;
+
        return count;
 }
 
@@ -126,50 +130,41 @@ static const struct file_operations ulong_ro_fops = {
 };
 
 
-static struct dentry *__oprofilefs_create_file(struct super_block *sb,
+static int __oprofilefs_create_file(struct super_block *sb,
        struct dentry *root, char const *name, const struct file_operations *fops,
-       int perm)
+       int perm, void *priv)
 {
        struct dentry *dentry;
        struct inode *inode;
 
        dentry = d_alloc_name(root, name);
        if (!dentry)
-               return NULL;
+               return -ENOMEM;
        inode = oprofilefs_get_inode(sb, S_IFREG | perm);
        if (!inode) {
                dput(dentry);
-               return NULL;
+               return -ENOMEM;
        }
        inode->i_fop = fops;
        d_add(dentry, inode);
-       return dentry;
+       dentry->d_inode->i_private = priv;
+       return 0;
 }
 
 
 int oprofilefs_create_ulong(struct super_block *sb, struct dentry *root,
        char const *name, unsigned long *val)
 {
-       struct dentry *d = __oprofilefs_create_file(sb, root, name,
-                                                    &ulong_fops, 0644);
-       if (!d)
-               return -EFAULT;
-
-       d->d_inode->i_private = val;
-       return 0;
+       return __oprofilefs_create_file(sb, root, name,
+                                       &ulong_fops, 0644, val);
 }
 
 
 int oprofilefs_create_ro_ulong(struct super_block *sb, struct dentry *root,
        char const *name, unsigned long *val)
 {
-       struct dentry *d = __oprofilefs_create_file(sb, root, name,
-                                                    &ulong_ro_fops, 0444);
-       if (!d)
-               return -EFAULT;
-
-       d->d_inode->i_private = val;
-       return 0;
+       return __oprofilefs_create_file(sb, root, name,
+                                       &ulong_ro_fops, 0444, val);
 }
 
 
@@ -189,31 +184,22 @@ static const struct file_operations atomic_ro_fops = {
 int oprofilefs_create_ro_atomic(struct super_block *sb, struct dentry *root,
        char const *name, atomic_t *val)
 {
-       struct dentry *d = __oprofilefs_create_file(sb, root, name,
-                                                    &atomic_ro_fops, 0444);
-       if (!d)
-               return -EFAULT;
-
-       d->d_inode->i_private = val;
-       return 0;
+       return __oprofilefs_create_file(sb, root, name,
+                                       &atomic_ro_fops, 0444, val);
 }
 
 
 int oprofilefs_create_file(struct super_block *sb, struct dentry *root,
        char const *name, const struct file_operations *fops)
 {
-       if (!__oprofilefs_create_file(sb, root, name, fops, 0644))
-               return -EFAULT;
-       return 0;
+       return __oprofilefs_create_file(sb, root, name, fops, 0644, NULL);
 }
 
 
 int oprofilefs_create_file_perm(struct super_block *sb, struct dentry *root,
        char const *name, const struct file_operations *fops, int perm)
 {
-       if (!__oprofilefs_create_file(sb, root, name, fops, perm))
-               return -EFAULT;
-       return 0;
+       return __oprofilefs_create_file(sb, root, name, fops, perm, NULL);
 }
 
 
index 62f5908..04d0a97 100644 (file)
@@ -3,13 +3,13 @@
 
 #include <linux/cache.h>
 #include <linux/threads.h>
-#include <linux/irq.h>
 
 typedef struct {
        unsigned int __softirq_pending;
 } ____cacheline_aligned irq_cpustat_t;
 
 #include <linux/irq_cpustat.h> /* Standard mappings for irq_cpustat_t above */
+#include <linux/irq.h>
 
 #ifndef ack_bad_irq
 static inline void ack_bad_irq(unsigned int irq)
index 8a92a17..ef2af99 100644 (file)
                                                                        \
        BUG_TABLE                                                       \
                                                                        \
+       JUMP_TABLE                                                      \
+                                                                       \
        /* PCI quirks */                                                \
        .pci_fixup        : AT(ADDR(.pci_fixup) - LOAD_OFFSET) {        \
                VMLINUX_SYMBOL(__start_pci_fixups_early) = .;           \
 #define BUG_TABLE
 #endif
 
+#define JUMP_TABLE                                                     \
+       . = ALIGN(8);                                                   \
+       __jump_table : AT(ADDR(__jump_table) - LOAD_OFFSET) {           \
+               VMLINUX_SYMBOL(__start___jump_table) = .;               \
+               *(__jump_table)                                         \
+               VMLINUX_SYMBOL(__stop___jump_table) = .;                \
+       }
+
 #ifdef CONFIG_PM_TRACE
 #define TRACEDATA                                                      \
        . = ALIGN(4);                                                   \
index 52c0da4..bef3cda 100644 (file)
@@ -1,6 +1,8 @@
 #ifndef _DYNAMIC_DEBUG_H
 #define _DYNAMIC_DEBUG_H
 
+#include <linux/jump_label.h>
+
 /* dynamic_printk_enabled, and dynamic_printk_enabled2 are bitmasks in which
  * bit n is set to 1 if any modname hashes into the bucket n, 0 otherwise. They
  * use independent hash functions, to reduce the chance of false positives.
@@ -22,8 +24,6 @@ struct _ddebug {
        const char *function;
        const char *filename;
        const char *format;
-       char primary_hash;
-       char secondary_hash;
        unsigned int lineno:24;
        /*
         * The flags field controls the behaviour at the callsite.
@@ -33,6 +33,7 @@ struct _ddebug {
 #define _DPRINTK_FLAGS_PRINT   (1<<0)  /* printk() a message using the format */
 #define _DPRINTK_FLAGS_DEFAULT 0
        unsigned int flags:8;
+       char enabled;
 } __attribute__((aligned(8)));
 
 
@@ -42,33 +43,35 @@ int ddebug_add_module(struct _ddebug *tab, unsigned int n,
 #if defined(CONFIG_DYNAMIC_DEBUG)
 extern int ddebug_remove_module(const char *mod_name);
 
-#define __dynamic_dbg_enabled(dd)  ({       \
-       int __ret = 0;                                                       \
-       if (unlikely((dynamic_debug_enabled & (1LL << DEBUG_HASH)) &&        \
-                       (dynamic_debug_enabled2 & (1LL << DEBUG_HASH2))))   \
-                               if (unlikely(dd.flags))                      \
-                                       __ret = 1;                           \
-       __ret; })
-
 #define dynamic_pr_debug(fmt, ...) do {                                        \
+       __label__ do_printk;                                            \
+       __label__ out;                                                  \
        static struct _ddebug descriptor                                \
        __used                                                          \
        __attribute__((section("__verbose"), aligned(8))) =             \
-       { KBUILD_MODNAME, __func__, __FILE__, fmt, DEBUG_HASH,  \
-               DEBUG_HASH2, __LINE__, _DPRINTK_FLAGS_DEFAULT };        \
-       if (__dynamic_dbg_enabled(descriptor))                          \
-               printk(KERN_DEBUG pr_fmt(fmt),  ##__VA_ARGS__);         \
+       { KBUILD_MODNAME, __func__, __FILE__, fmt, __LINE__,            \
+               _DPRINTK_FLAGS_DEFAULT };                               \
+       JUMP_LABEL(&descriptor.enabled, do_printk);                     \
+       goto out;                                                       \
+do_printk:                                                             \
+       printk(KERN_DEBUG pr_fmt(fmt),  ##__VA_ARGS__);                 \
+out:   ;                                                               \
        } while (0)
 
 
 #define dynamic_dev_dbg(dev, fmt, ...) do {                            \
+       __label__ do_printk;                                            \
+       __label__ out;                                                  \
        static struct _ddebug descriptor                                \
        __used                                                          \
        __attribute__((section("__verbose"), aligned(8))) =             \
-       { KBUILD_MODNAME, __func__, __FILE__, fmt, DEBUG_HASH,  \
-               DEBUG_HASH2, __LINE__, _DPRINTK_FLAGS_DEFAULT };        \
-       if (__dynamic_dbg_enabled(descriptor))                          \
-               dev_printk(KERN_DEBUG, dev, fmt, ##__VA_ARGS__);        \
+       { KBUILD_MODNAME, __func__, __FILE__, fmt, __LINE__,            \
+               _DPRINTK_FLAGS_DEFAULT };                               \
+       JUMP_LABEL(&descriptor.enabled, do_printk);                     \
+       goto out;                                                       \
+do_printk:                                                             \
+       dev_printk(KERN_DEBUG, dev, fmt, ##__VA_ARGS__);                \
+out:   ;                                                               \
        } while (0)
 
 #else
index 02b8b24..8beabb9 100644 (file)
@@ -191,8 +191,8 @@ struct ftrace_event_call {
        unsigned int            flags;
 
 #ifdef CONFIG_PERF_EVENTS
-       int                     perf_refcount;
-       struct hlist_head       *perf_events;
+       int                             perf_refcount;
+       struct hlist_head __percpu      *perf_events;
 #endif
 };
 
@@ -252,8 +252,8 @@ DECLARE_PER_CPU(struct pt_regs, perf_trace_regs);
 
 extern int  perf_trace_init(struct perf_event *event);
 extern void perf_trace_destroy(struct perf_event *event);
-extern int  perf_trace_enable(struct perf_event *event);
-extern void perf_trace_disable(struct perf_event *event);
+extern int  perf_trace_add(struct perf_event *event, int flags);
+extern void perf_trace_del(struct perf_event *event, int flags);
 extern int  ftrace_profile_set_filter(struct perf_event *event, int event_id,
                                     char *filter_str);
 extern void ftrace_profile_free_filter(struct perf_event *event);
index a0384a4..531495d 100644 (file)
@@ -18,6 +18,7 @@
 #include <asm/atomic.h>
 #include <asm/ptrace.h>
 #include <asm/system.h>
+#include <trace/events/irq.h>
 
 /*
  * These correspond to the IORESOURCE_IRQ_* defines in
@@ -407,7 +408,12 @@ asmlinkage void do_softirq(void);
 asmlinkage void __do_softirq(void);
 extern void open_softirq(int nr, void (*action)(struct softirq_action *));
 extern void softirq_init(void);
-#define __raise_softirq_irqoff(nr) do { or_softirq_pending(1UL << (nr)); } while (0)
+static inline void __raise_softirq_irqoff(unsigned int nr)
+{
+       trace_softirq_raise((struct softirq_action *)(unsigned long)nr, NULL);
+       or_softirq_pending(1UL << nr);
+}
+
 extern void raise_softirq_irqoff(unsigned int nr);
 extern void raise_softirq(unsigned int nr);
 extern void wakeup_softirqd(void);
diff --git a/include/linux/irq_work.h b/include/linux/irq_work.h
new file mode 100644 (file)
index 0000000..4fa09d4
--- /dev/null
@@ -0,0 +1,20 @@
+#ifndef _LINUX_IRQ_WORK_H
+#define _LINUX_IRQ_WORK_H
+
+struct irq_work {
+       struct irq_work *next;
+       void (*func)(struct irq_work *);
+};
+
+static inline
+void init_irq_work(struct irq_work *entry, void (*func)(struct irq_work *))
+{
+       entry->next = NULL;
+       entry->func = func;
+}
+
+bool irq_work_queue(struct irq_work *entry);
+void irq_work_run(void);
+void irq_work_sync(struct irq_work *entry);
+
+#endif /* _LINUX_IRQ_WORK_H */
diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h
new file mode 100644 (file)
index 0000000..b67cb18
--- /dev/null
@@ -0,0 +1,74 @@
+#ifndef _LINUX_JUMP_LABEL_H
+#define _LINUX_JUMP_LABEL_H
+
+#if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_HAVE_ARCH_JUMP_LABEL)
+# include <asm/jump_label.h>
+# define HAVE_JUMP_LABEL
+#endif
+
+enum jump_label_type {
+       JUMP_LABEL_ENABLE,
+       JUMP_LABEL_DISABLE
+};
+
+struct module;
+
+#ifdef HAVE_JUMP_LABEL
+
+extern struct jump_entry __start___jump_table[];
+extern struct jump_entry __stop___jump_table[];
+
+extern void arch_jump_label_transform(struct jump_entry *entry,
+                                enum jump_label_type type);
+extern void arch_jump_label_text_poke_early(jump_label_t addr);
+extern void jump_label_update(unsigned long key, enum jump_label_type type);
+extern void jump_label_apply_nops(struct module *mod);
+extern int jump_label_text_reserved(void *start, void *end);
+
+#define jump_label_enable(key) \
+       jump_label_update((unsigned long)key, JUMP_LABEL_ENABLE);
+
+#define jump_label_disable(key) \
+       jump_label_update((unsigned long)key, JUMP_LABEL_DISABLE);
+
+#else
+
+#define JUMP_LABEL(key, label)                 \
+do {                                           \
+       if (unlikely(*key))                     \
+               goto label;                     \
+} while (0)
+
+#define jump_label_enable(cond_var)    \
+do {                                   \
+       *(cond_var) = 1;                        \
+} while (0)
+
+#define jump_label_disable(cond_var)   \
+do {                                   \
+       *(cond_var) = 0;                        \
+} while (0)
+
+static inline int jump_label_apply_nops(struct module *mod)
+{
+       return 0;
+}
+
+static inline int jump_label_text_reserved(void *start, void *end)
+{
+       return 0;
+}
+
+#endif
+
+#define COND_STMT(key, stmt)                                   \
+do {                                                           \
+       __label__ jl_enabled;                                   \
+       JUMP_LABEL(key, jl_enabled);                            \
+       if (0) {                                                \
+jl_enabled:                                                    \
+               stmt;                                           \
+       }                                                       \
+} while (0)
+
+#endif
diff --git a/include/linux/jump_label_ref.h b/include/linux/jump_label_ref.h
new file mode 100644 (file)
index 0000000..e5d012a
--- /dev/null
@@ -0,0 +1,44 @@
+#ifndef _LINUX_JUMP_LABEL_REF_H
+#define _LINUX_JUMP_LABEL_REF_H
+
+#include <linux/jump_label.h>
+#include <asm/atomic.h>
+
+#ifdef HAVE_JUMP_LABEL
+
+static inline void jump_label_inc(atomic_t *key)
+{
+       if (atomic_add_return(1, key) == 1)
+               jump_label_enable(key);
+}
+
+static inline void jump_label_dec(atomic_t *key)
+{
+       if (atomic_dec_and_test(key))
+               jump_label_disable(key);
+}
+
+#else /* !HAVE_JUMP_LABEL */
+
+static inline void jump_label_inc(atomic_t *key)
+{
+       atomic_inc(key);
+}
+
+static inline void jump_label_dec(atomic_t *key)
+{
+       atomic_dec(key);
+}
+
+#undef JUMP_LABEL
+#define JUMP_LABEL(key, label)                                         \
+do {                                                                   \
+       if (unlikely(__builtin_choose_expr(                             \
+             __builtin_types_compatible_p(typeof(key), atomic_t *),    \
+             atomic_read((atomic_t *)(key)), *(key))))                 \
+               goto label;                                             \
+} while (0)
+
+#endif /* HAVE_JUMP_LABEL */
+
+#endif /* _LINUX_JUMP_LABEL_REF_H */
index aace066..b29e745 100644 (file)
@@ -350,7 +350,10 @@ struct module
        struct tracepoint *tracepoints;
        unsigned int num_tracepoints;
 #endif
-
+#ifdef HAVE_JUMP_LABEL
+       struct jump_entry *jump_entries;
+       unsigned int num_jump_entries;
+#endif
 #ifdef CONFIG_TRACING
        const char **trace_bprintk_fmt_start;
        unsigned int num_trace_bprintk_fmt;
index 5171639..32fb812 100644 (file)
@@ -15,6 +15,7 @@
 
 #include <linux/types.h>
 #include <linux/spinlock.h>
+#include <linux/init.h>
 #include <asm/atomic.h>
  
 /* Each escaped entry is prefixed by ESCAPE_CODE
@@ -185,4 +186,10 @@ int oprofile_add_data(struct op_entry *entry, unsigned long val);
 int oprofile_add_data64(struct op_entry *entry, u64 val);
 int oprofile_write_commit(struct op_entry *entry);
 
+#ifdef CONFIG_PERF_EVENTS
+int __init oprofile_perf_init(struct oprofile_operations *ops);
+void oprofile_perf_exit(void);
+char *op_name_from_perf_id(void);
+#endif /* CONFIG_PERF_EVENTS */
+
 #endif /* OPROFILE_H */
index 49466b1..0eb5083 100644 (file)
        preempt_enable();                               \
 } while (0)
 
+#define get_cpu_ptr(var) ({                            \
+       preempt_disable();                              \
+       this_cpu_ptr(var); })
+
+#define put_cpu_ptr(var) do {                          \
+       (void)(var);                                    \
+       preempt_enable();                               \
+} while (0)
+
 #ifdef CONFIG_SMP
 
 /* minimum unit size, also is the maximum supported allocation size */
index 716f99b..057bf22 100644 (file)
@@ -486,6 +486,8 @@ struct perf_guest_info_callbacks {
 #include <linux/workqueue.h>
 #include <linux/ftrace.h>
 #include <linux/cpu.h>
+#include <linux/irq_work.h>
+#include <linux/jump_label_ref.h>
 #include <asm/atomic.h>
 #include <asm/local.h>
 
@@ -529,16 +531,22 @@ struct hw_perf_event {
                        int             last_cpu;
                };
                struct { /* software */
-                       s64             remaining;
                        struct hrtimer  hrtimer;
                };
 #ifdef CONFIG_HAVE_HW_BREAKPOINT
                struct { /* breakpoint */
                        struct arch_hw_breakpoint       info;
                        struct list_head                bp_list;
+                       /*
+                        * Crufty hack to avoid the chicken and egg
+                        * problem hw_breakpoint has with context
+                        * creation and event initalization.
+                        */
+                       struct task_struct              *bp_target;
                };
 #endif
        };
+       int                             state;
        local64_t                       prev_count;
        u64                             sample_period;
        u64                             last_period;
@@ -550,6 +558,13 @@ struct hw_perf_event {
 #endif
 };
 
+/*