Merge branch 'perf/urgent' into perf/core
Ingo Molnar [Fri, 29 Jan 2010 08:24:57 +0000 (09:24 +0100)]
Merge reason: We want to queue up a dependent patch. Also update to
              later -rc's.

Signed-off-by: Ingo Molnar <mingo@elte.hu>

1  2 
arch/x86/include/asm/perf_event.h
arch/x86/kernel/cpu/perf_event.c
include/linux/perf_event.h
kernel/perf_event.c
kernel/sched.c
kernel/trace/trace_events_filter.c
tools/perf/builtin-kmem.c
tools/perf/builtin-report.c
tools/perf/builtin-timechart.c

@@@ -19,6 -19,7 +19,7 @@@
  #define MSR_ARCH_PERFMON_EVENTSEL1                         0x187
  
  #define ARCH_PERFMON_EVENTSEL0_ENABLE                   (1 << 22)
+ #define ARCH_PERFMON_EVENTSEL_ANY                       (1 << 21)
  #define ARCH_PERFMON_EVENTSEL_INT                       (1 << 20)
  #define ARCH_PERFMON_EVENTSEL_OS                        (1 << 17)
  #define ARCH_PERFMON_EVENTSEL_USR                       (1 << 16)
  /*
   * Includes eventsel and unit mask as well:
   */
 -#define ARCH_PERFMON_EVENT_MASK                                   0xffff
 +
 +
 +#define INTEL_ARCH_EVTSEL_MASK                0x000000FFULL
 +#define INTEL_ARCH_UNIT_MASK          0x0000FF00ULL
 +#define INTEL_ARCH_EDGE_MASK          0x00040000ULL
 +#define INTEL_ARCH_INV_MASK           0x00800000ULL
 +#define INTEL_ARCH_CNT_MASK           0xFF000000ULL
 +#define INTEL_ARCH_EVENT_MASK (INTEL_ARCH_UNIT_MASK|INTEL_ARCH_EVTSEL_MASK)
  
  /*
   * filter mask to validate fixed counter events.
   *  The other filters are supported by fixed counters.
   *  The any-thread option is supported starting with v3.
   */
 -#define ARCH_PERFMON_EVENT_FILTER_MASK                        0xff840000
 +#define INTEL_ARCH_FIXED_MASK \
 +      (INTEL_ARCH_CNT_MASK| \
 +       INTEL_ARCH_INV_MASK| \
 +       INTEL_ARCH_EDGE_MASK|\
 +       INTEL_ARCH_UNIT_MASK|\
 +       INTEL_ARCH_EVTSEL_MASK)
  
  #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL               0x3c
  #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK               (0x00 << 8)
@@@ -7,7 -7,6 +7,7 @@@
   *  Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter
   *  Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
   *  Copyright (C) 2009 Intel Corporation, <markus.t.metzger@intel.com>
 + *  Copyright (C) 2009 Google, Inc., Stephane Eranian
   *
   *  For licencing details see kernel-base/COPYING
   */
@@@ -23,7 -22,6 +23,7 @@@
  #include <linux/uaccess.h>
  #include <linux/highmem.h>
  #include <linux/cpu.h>
 +#include <linux/bitops.h>
  
  #include <asm/apic.h>
  #include <asm/stacktrace.h>
@@@ -70,47 -68,26 +70,47 @@@ struct debug_store 
        u64     pebs_event_reset[MAX_PEBS_EVENTS];
  };
  
 +struct event_constraint {
 +      union {
 +              unsigned long   idxmsk[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
 +              u64             idxmsk64[1];
 +      };
 +      int     code;
 +      int     cmask;
 +      int     weight;
 +};
 +
  struct cpu_hw_events {
 -      struct perf_event       *events[X86_PMC_IDX_MAX];
 -      unsigned long           used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
 +      struct perf_event       *events[X86_PMC_IDX_MAX]; /* in counter order */
        unsigned long           active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
        unsigned long           interrupts;
        int                     enabled;
        struct debug_store      *ds;
 -};
  
 -struct event_constraint {
 -      unsigned long   idxmsk[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
 -      int             code;
 +      int                     n_events;
 +      int                     n_added;
 +      int                     assign[X86_PMC_IDX_MAX]; /* event to counter assignment */
 +      struct perf_event       *event_list[X86_PMC_IDX_MAX]; /* in enabled order */
  };
  
 -#define EVENT_CONSTRAINT(c, m) { .code = (c), .idxmsk[0] = (m) }
 -#define EVENT_CONSTRAINT_END  { .code = 0, .idxmsk[0] = 0 }
 +#define EVENT_CONSTRAINT(c, n, m) {   \
 +      { .idxmsk64[0] = (n) },         \
 +      .code = (c),                    \
 +      .cmask = (m),                   \
 +      .weight = HWEIGHT64((u64)(n)),  \
 +}
 +
 +#define INTEL_EVENT_CONSTRAINT(c, n)  \
 +      EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVTSEL_MASK)
 +
 +#define FIXED_EVENT_CONSTRAINT(c, n)  \
 +      EVENT_CONSTRAINT(c, n, INTEL_ARCH_FIXED_MASK)
  
 -#define for_each_event_constraint(e, c) \
 -      for ((e) = (c); (e)->idxmsk[0]; (e)++)
 +#define EVENT_CONSTRAINT_END          \
 +      EVENT_CONSTRAINT(0, 0, 0)
  
 +#define for_each_event_constraint(e, c)       \
 +      for ((e) = (c); (e)->cmask; (e)++)
  
  /*
   * struct x86_pmu - generic x86 pmu
@@@ -137,14 -114,8 +137,14 @@@ struct x86_pmu 
        u64             intel_ctrl;
        void            (*enable_bts)(u64 config);
        void            (*disable_bts)(void);
 -      int             (*get_event_idx)(struct cpu_hw_events *cpuc,
 -                                       struct hw_perf_event *hwc);
 +
 +      struct event_constraint *
 +                      (*get_event_constraints)(struct cpu_hw_events *cpuc,
 +                                               struct perf_event *event);
 +
 +      void            (*put_event_constraints)(struct cpu_hw_events *cpuc,
 +                                               struct perf_event *event);
 +      struct event_constraint *event_constraints;
  };
  
  static struct x86_pmu x86_pmu __read_mostly;
@@@ -153,8 -124,7 +153,8 @@@ static DEFINE_PER_CPU(struct cpu_hw_eve
        .enabled = 1,
  };
  
 -static const struct event_constraint *event_constraints;
 +static int x86_perf_event_set_period(struct perf_event *event,
 +                           struct hw_perf_event *hwc, int idx);
  
  /*
   * Not sure about some of these
@@@ -201,14 -171,14 +201,14 @@@ static u64 p6_pmu_raw_event(u64 hw_even
        return hw_event & P6_EVNTSEL_MASK;
  }
  
 -static const struct event_constraint intel_p6_event_constraints[] =
 +static struct event_constraint intel_p6_event_constraints[] =
  {
 -      EVENT_CONSTRAINT(0xc1, 0x1),    /* FLOPS */
 -      EVENT_CONSTRAINT(0x10, 0x1),    /* FP_COMP_OPS_EXE */
 -      EVENT_CONSTRAINT(0x11, 0x1),    /* FP_ASSIST */
 -      EVENT_CONSTRAINT(0x12, 0x2),    /* MUL */
 -      EVENT_CONSTRAINT(0x13, 0x2),    /* DIV */
 -      EVENT_CONSTRAINT(0x14, 0x1),    /* CYCLES_DIV_BUSY */
 +      INTEL_EVENT_CONSTRAINT(0xc1, 0x1),      /* FLOPS */
 +      INTEL_EVENT_CONSTRAINT(0x10, 0x1),      /* FP_COMP_OPS_EXE */
 +      INTEL_EVENT_CONSTRAINT(0x11, 0x1),      /* FP_ASSIST */
 +      INTEL_EVENT_CONSTRAINT(0x12, 0x2),      /* MUL */
 +      INTEL_EVENT_CONSTRAINT(0x13, 0x2),      /* DIV */
 +      INTEL_EVENT_CONSTRAINT(0x14, 0x1),      /* CYCLES_DIV_BUSY */
        EVENT_CONSTRAINT_END
  };
  
@@@ -226,51 -196,32 +226,51 @@@ static const u64 intel_perfmon_event_ma
    [PERF_COUNT_HW_BUS_CYCLES]          = 0x013c,
  };
  
 -static const struct event_constraint intel_core_event_constraints[] =
 -{
 -      EVENT_CONSTRAINT(0x10, 0x1),    /* FP_COMP_OPS_EXE */
 -      EVENT_CONSTRAINT(0x11, 0x2),    /* FP_ASSIST */
 -      EVENT_CONSTRAINT(0x12, 0x2),    /* MUL */
 -      EVENT_CONSTRAINT(0x13, 0x2),    /* DIV */
 -      EVENT_CONSTRAINT(0x14, 0x1),    /* CYCLES_DIV_BUSY */
 -      EVENT_CONSTRAINT(0x18, 0x1),    /* IDLE_DURING_DIV */
 -      EVENT_CONSTRAINT(0x19, 0x2),    /* DELAYED_BYPASS */
 -      EVENT_CONSTRAINT(0xa1, 0x1),    /* RS_UOPS_DISPATCH_CYCLES */
 -      EVENT_CONSTRAINT(0xcb, 0x1),    /* MEM_LOAD_RETIRED */
 +static struct event_constraint intel_core_event_constraints[] =
 +{
 +      FIXED_EVENT_CONSTRAINT(0xc0, (0x3|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */
 +      FIXED_EVENT_CONSTRAINT(0x3c, (0x3|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */
 +      INTEL_EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */
 +      INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */
 +      INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */
 +      INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */
 +      INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */
 +      INTEL_EVENT_CONSTRAINT(0x18, 0x1), /* IDLE_DURING_DIV */
 +      INTEL_EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */
 +      INTEL_EVENT_CONSTRAINT(0xa1, 0x1), /* RS_UOPS_DISPATCH_CYCLES */
 +      INTEL_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED */
 +      EVENT_CONSTRAINT_END
 +};
 +
 +static struct event_constraint intel_nehalem_event_constraints[] =
 +{
 +      FIXED_EVENT_CONSTRAINT(0xc0, (0xf|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */
 +      FIXED_EVENT_CONSTRAINT(0x3c, (0xf|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */
 +      INTEL_EVENT_CONSTRAINT(0x40, 0x3), /* L1D_CACHE_LD */
 +      INTEL_EVENT_CONSTRAINT(0x41, 0x3), /* L1D_CACHE_ST */
 +      INTEL_EVENT_CONSTRAINT(0x42, 0x3), /* L1D_CACHE_LOCK */
 +      INTEL_EVENT_CONSTRAINT(0x43, 0x3), /* L1D_ALL_REF */
 +      INTEL_EVENT_CONSTRAINT(0x48, 0x3), /* L1D_PEND_MISS */
 +      INTEL_EVENT_CONSTRAINT(0x4e, 0x3), /* L1D_PREFETCH */
 +      INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */
 +      INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */
        EVENT_CONSTRAINT_END
  };
  
 -static const struct event_constraint intel_nehalem_event_constraints[] =
 -{
 -      EVENT_CONSTRAINT(0x40, 0x3),    /* L1D_CACHE_LD */
 -      EVENT_CONSTRAINT(0x41, 0x3),    /* L1D_CACHE_ST */
 -      EVENT_CONSTRAINT(0x42, 0x3),    /* L1D_CACHE_LOCK */
 -      EVENT_CONSTRAINT(0x43, 0x3),    /* L1D_ALL_REF */
 -      EVENT_CONSTRAINT(0x4e, 0x3),    /* L1D_PREFETCH */
 -      EVENT_CONSTRAINT(0x4c, 0x3),    /* LOAD_HIT_PRE */
 -      EVENT_CONSTRAINT(0x51, 0x3),    /* L1D */
 -      EVENT_CONSTRAINT(0x52, 0x3),    /* L1D_CACHE_PREFETCH_LOCK_FB_HIT */
 -      EVENT_CONSTRAINT(0x53, 0x3),    /* L1D_CACHE_LOCK_FB_HIT */
 -      EVENT_CONSTRAINT(0xc5, 0x3),    /* CACHE_LOCK_CYCLES */
 +static struct event_constraint intel_westmere_event_constraints[] =
 +{
 +      FIXED_EVENT_CONSTRAINT(0xc0, (0xf|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */
 +      FIXED_EVENT_CONSTRAINT(0x3c, (0xf|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */
 +      INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */
 +      INTEL_EVENT_CONSTRAINT(0x60, 0x1), /* OFFCORE_REQUESTS_OUTSTANDING */
 +      INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */
 +      EVENT_CONSTRAINT_END
 +};
 +
 +static struct event_constraint intel_gen_event_constraints[] =
 +{
 +      FIXED_EVENT_CONSTRAINT(0xc0, (0x3|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */
 +      FIXED_EVENT_CONSTRAINT(0x3c, (0x3|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */
        EVENT_CONSTRAINT_END
  };
  
@@@ -294,97 -245,6 +294,97 @@@ static u64 __read_mostly hw_cache_event
                                [PERF_COUNT_HW_CACHE_OP_MAX]
                                [PERF_COUNT_HW_CACHE_RESULT_MAX];
  
 +static __initconst u64 westmere_hw_cache_event_ids
 +                              [PERF_COUNT_HW_CACHE_MAX]
 +                              [PERF_COUNT_HW_CACHE_OP_MAX]
 +                              [PERF_COUNT_HW_CACHE_RESULT_MAX] =
 +{
 + [ C(L1D) ] = {
 +      [ C(OP_READ) ] = {
 +              [ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS       */
 +              [ C(RESULT_MISS)   ] = 0x0151, /* L1D.REPL                     */
 +      },
 +      [ C(OP_WRITE) ] = {
 +              [ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES      */
 +              [ C(RESULT_MISS)   ] = 0x0251, /* L1D.M_REPL                   */
 +      },
 +      [ C(OP_PREFETCH) ] = {
 +              [ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS        */
 +              [ C(RESULT_MISS)   ] = 0x024e, /* L1D_PREFETCH.MISS            */
 +      },
 + },
 + [ C(L1I ) ] = {
 +      [ C(OP_READ) ] = {
 +              [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS                    */
 +              [ C(RESULT_MISS)   ] = 0x0280, /* L1I.MISSES                   */
 +      },
 +      [ C(OP_WRITE) ] = {
 +              [ C(RESULT_ACCESS) ] = -1,
 +              [ C(RESULT_MISS)   ] = -1,
 +      },
 +      [ C(OP_PREFETCH) ] = {
 +              [ C(RESULT_ACCESS) ] = 0x0,
 +              [ C(RESULT_MISS)   ] = 0x0,
 +      },
 + },
 + [ C(LL  ) ] = {
 +      [ C(OP_READ) ] = {
 +              [ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS               */
 +              [ C(RESULT_MISS)   ] = 0x0224, /* L2_RQSTS.LD_MISS             */
 +      },
 +      [ C(OP_WRITE) ] = {
 +              [ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS                */
 +              [ C(RESULT_MISS)   ] = 0x0824, /* L2_RQSTS.RFO_MISS            */
 +      },
 +      [ C(OP_PREFETCH) ] = {
 +              [ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference                */
 +              [ C(RESULT_MISS)   ] = 0x412e, /* LLC Misses                   */
 +      },
 + },
 + [ C(DTLB) ] = {
 +      [ C(OP_READ) ] = {
 +              [ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS       */
 +              [ C(RESULT_MISS)   ] = 0x0108, /* DTLB_LOAD_MISSES.ANY         */
 +      },
 +      [ C(OP_WRITE) ] = {
 +              [ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES      */
 +              [ C(RESULT_MISS)   ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS  */
 +      },
 +      [ C(OP_PREFETCH) ] = {
 +              [ C(RESULT_ACCESS) ] = 0x0,
 +              [ C(RESULT_MISS)   ] = 0x0,
 +      },
 + },
 + [ C(ITLB) ] = {
 +      [ C(OP_READ) ] = {
 +              [ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P           */
 +              [ C(RESULT_MISS)   ] = 0x0185, /* ITLB_MISSES.ANY              */
 +      },
 +      [ C(OP_WRITE) ] = {
 +              [ C(RESULT_ACCESS) ] = -1,
 +              [ C(RESULT_MISS)   ] = -1,
 +      },
 +      [ C(OP_PREFETCH) ] = {
 +              [ C(RESULT_ACCESS) ] = -1,
 +              [ C(RESULT_MISS)   ] = -1,
 +      },
 + },
 + [ C(BPU ) ] = {
 +      [ C(OP_READ) ] = {
 +              [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */
 +              [ C(RESULT_MISS)   ] = 0x03e8, /* BPU_CLEARS.ANY               */
 +      },
 +      [ C(OP_WRITE) ] = {
 +              [ C(RESULT_ACCESS) ] = -1,
 +              [ C(RESULT_MISS)   ] = -1,
 +      },
 +      [ C(OP_PREFETCH) ] = {
 +              [ C(RESULT_ACCESS) ] = -1,
 +              [ C(RESULT_MISS)   ] = -1,
 +      },
 + },
 +};
 +
  static __initconst u64 nehalem_hw_cache_event_ids
                                [PERF_COUNT_HW_CACHE_MAX]
                                [PERF_COUNT_HW_CACHE_OP_MAX]
@@@ -667,11 -527,11 +667,11 @@@ static u64 intel_pmu_raw_event(u64 hw_e
  #define CORE_EVNTSEL_REG_MASK         0xFF000000ULL
  
  #define CORE_EVNTSEL_MASK             \
 -      (CORE_EVNTSEL_EVENT_MASK |      \
 -       CORE_EVNTSEL_UNIT_MASK  |      \
 -       CORE_EVNTSEL_EDGE_MASK  |      \
 -       CORE_EVNTSEL_INV_MASK  |       \
 -       CORE_EVNTSEL_REG_MASK)
 +      (INTEL_ARCH_EVTSEL_MASK |       \
 +       INTEL_ARCH_UNIT_MASK   |       \
 +       INTEL_ARCH_EDGE_MASK   |       \
 +       INTEL_ARCH_INV_MASK    |       \
 +       INTEL_ARCH_CNT_MASK)
  
        return hw_event & CORE_EVNTSEL_MASK;
  }
@@@ -1198,8 -1058,15 +1198,8 @@@ static int __hw_perf_event_init(struct 
  
  static void p6_pmu_disable_all(void)
  {
 -      struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
        u64 val;
  
 -      if (!cpuc->enabled)
 -              return;
 -
 -      cpuc->enabled = 0;
 -      barrier();
 -
        /* p6 only has one enable register */
        rdmsrl(MSR_P6_EVNTSEL0, val);
        val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE;
@@@ -1210,6 -1077,12 +1210,6 @@@ static void intel_pmu_disable_all(void
  {
        struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
  
 -      if (!cpuc->enabled)
 -              return;
 -
 -      cpuc->enabled = 0;
 -      barrier();
 -
        wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
  
        if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask))
@@@ -1221,6 -1094,17 +1221,6 @@@ static void amd_pmu_disable_all(void
        struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
        int idx;
  
 -      if (!cpuc->enabled)
 -              return;
 -
 -      cpuc->enabled = 0;
 -      /*
 -       * ensure we write the disable before we start disabling the
 -       * events proper, so that amd_pmu_enable_event() does the
 -       * right thing.
 -       */
 -      barrier();
 -
        for (idx = 0; idx < x86_pmu.num_events; idx++) {
                u64 val;
  
  
  void hw_perf_disable(void)
  {
 +      struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 +
        if (!x86_pmu_initialized())
                return;
 -      return x86_pmu.disable_all();
 +
 +      if (!cpuc->enabled)
 +              return;
 +
 +      cpuc->n_added = 0;
 +      cpuc->enabled = 0;
 +      barrier();
 +
 +      x86_pmu.disable_all();
  }
  
  static void p6_pmu_enable_all(void)
  {
 -      struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
        unsigned long val;
  
 -      if (cpuc->enabled)
 -              return;
 -
 -      cpuc->enabled = 1;
 -      barrier();
 -
        /* p6 only has one enable register */
        rdmsrl(MSR_P6_EVNTSEL0, val);
        val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
@@@ -1265,6 -1146,12 +1265,6 @@@ static void intel_pmu_enable_all(void
  {
        struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
  
 -      if (cpuc->enabled)
 -              return;
 -
 -      cpuc->enabled = 1;
 -      barrier();
 -
        wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl);
  
        if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) {
@@@ -1283,6 -1170,12 +1283,6 @@@ static void amd_pmu_enable_all(void
        struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
        int idx;
  
 -      if (cpuc->enabled)
 -              return;
 -
 -      cpuc->enabled = 1;
 -      barrier();
 -
        for (idx = 0; idx < x86_pmu.num_events; idx++) {
                struct perf_event *event = cpuc->events[idx];
                u64 val;
        }
  }
  
 +static const struct pmu pmu;
 +
 +static inline int is_x86_event(struct perf_event *event)
 +{
 +      return event->pmu == &pmu;
 +}
 +
 +static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
 +{
 +      struct event_constraint *c, *constraints[X86_PMC_IDX_MAX];
 +      unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
 +      int i, j, w, wmax, num = 0;
 +      struct hw_perf_event *hwc;
 +
 +      bitmap_zero(used_mask, X86_PMC_IDX_MAX);
 +
 +      for (i = 0; i < n; i++) {
 +              constraints[i] =
 +                x86_pmu.get_event_constraints(cpuc, cpuc->event_list[i]);
 +      }
 +
 +      /*
 +       * fastpath, try to reuse previous register
 +       */
 +      for (i = 0; i < n; i++) {
 +              hwc = &cpuc->event_list[i]->hw;
 +              c = constraints[i];
 +
 +              /* never assigned */
 +              if (hwc->idx == -1)
 +                      break;
 +
 +              /* constraint still honored */
 +              if (!test_bit(hwc->idx, c->idxmsk))
 +                      break;
 +
 +              /* not already used */
 +              if (test_bit(hwc->idx, used_mask))
 +                      break;
 +
 +              set_bit(hwc->idx, used_mask);
 +              if (assign)
 +                      assign[i] = hwc->idx;
 +      }
 +      if (i == n)
 +              goto done;
 +
 +      /*
 +       * begin slow path
 +       */
 +
 +      bitmap_zero(used_mask, X86_PMC_IDX_MAX);
 +
 +      /*
 +       * weight = number of possible counters
 +       *
 +       * 1    = most constrained, only works on one counter
 +       * wmax = least constrained, works on any counter
 +       *
 +       * assign events to counters starting with most
 +       * constrained events.
 +       */
 +      wmax = x86_pmu.num_events;
 +
 +      /*
 +       * when fixed event counters are present,
 +       * wmax is incremented by 1 to account
 +       * for one more choice
 +       */
 +      if (x86_pmu.num_events_fixed)
 +              wmax++;
 +
 +      for (w = 1, num = n; num && w <= wmax; w++) {
 +              /* for each event */
 +              for (i = 0; num && i < n; i++) {
 +                      c = constraints[i];
 +                      hwc = &cpuc->event_list[i]->hw;
 +
 +                      if (c->weight != w)
 +                              continue;
 +
 +                      for_each_bit(j, c->idxmsk, X86_PMC_IDX_MAX) {
 +                              if (!test_bit(j, used_mask))
 +                                      break;
 +                      }
 +
 +                      if (j == X86_PMC_IDX_MAX)
 +                              break;
 +
 +                      set_bit(j, used_mask);
 +
 +                      if (assign)
 +                              assign[i] = j;
 +                      num--;
 +              }
 +      }
 +done:
 +      /*
 +       * scheduling failed or is just a simulation,
 +       * free resources if necessary
 +       */
 +      if (!assign || num) {
 +              for (i = 0; i < n; i++) {
 +                      if (x86_pmu.put_event_constraints)
 +                              x86_pmu.put_event_constraints(cpuc, cpuc->event_list[i]);
 +              }
 +      }
 +      return num ? -ENOSPC : 0;
 +}
 +
 +/*
 + * dogrp: true if must collect siblings events (group)
 + * returns total number of events and error code
 + */
 +static int collect_events(struct cpu_hw_events *cpuc, struct perf_event *leader, bool dogrp)
 +{
 +      struct perf_event *event;
 +      int n, max_count;
 +
 +      max_count = x86_pmu.num_events + x86_pmu.num_events_fixed;
 +
 +      /* current number of events already accepted */
 +      n = cpuc->n_events;
 +
 +      if (is_x86_event(leader)) {
 +              if (n >= max_count)
 +                      return -ENOSPC;
 +              cpuc->event_list[n] = leader;
 +              n++;
 +      }
 +      if (!dogrp)
 +              return n;
 +
 +      list_for_each_entry(event, &leader->sibling_list, group_entry) {
 +              if (!is_x86_event(event) ||
 +                  event->state <= PERF_EVENT_STATE_OFF)
 +                      continue;
 +
 +              if (n >= max_count)
 +                      return -ENOSPC;
 +
 +              cpuc->event_list[n] = event;
 +              n++;
 +      }
 +      return n;
 +}
 +
 +
 +static inline void x86_assign_hw_event(struct perf_event *event,
 +                              struct hw_perf_event *hwc, int idx)
 +{
 +      hwc->idx = idx;
 +
 +      if (hwc->idx == X86_PMC_IDX_FIXED_BTS) {
 +              hwc->config_base = 0;
 +              hwc->event_base = 0;
 +      } else if (hwc->idx >= X86_PMC_IDX_FIXED) {
 +              hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
 +              /*
 +               * We set it so that event_base + idx in wrmsr/rdmsr maps to
 +               * MSR_ARCH_PERFMON_FIXED_CTR0 ... CTR2:
 +               */
 +              hwc->event_base =
 +                      MSR_ARCH_PERFMON_FIXED_CTR0 - X86_PMC_IDX_FIXED;
 +      } else {
 +              hwc->config_base = x86_pmu.eventsel;
 +              hwc->event_base  = x86_pmu.perfctr;
 +      }
 +}
 +
 +static void __x86_pmu_disable(struct perf_event *event, struct cpu_hw_events *cpuc);
 +
  void hw_perf_enable(void)
  {
 +      struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 +      struct perf_event *event;
 +      struct hw_perf_event *hwc;
 +      int i;
 +
        if (!x86_pmu_initialized())
                return;
 +
 +      if (cpuc->enabled)
 +              return;
 +
 +      if (cpuc->n_added) {
 +              /*
 +               * apply assignment obtained either from
 +               * hw_perf_group_sched_in() or x86_pmu_enable()
 +               *
 +               * step1: save events moving to new counters
 +               * step2: reprogram moved events into new counters
 +               */
 +              for (i = 0; i < cpuc->n_events; i++) {
 +
 +                      event = cpuc->event_list[i];
 +                      hwc = &event->hw;
 +
 +                      if (hwc->idx == -1 || hwc->idx == cpuc->assign[i])
 +                              continue;
 +
 +                      __x86_pmu_disable(event, cpuc);
 +
 +                      hwc->idx = -1;
 +              }
 +
 +              for (i = 0; i < cpuc->n_events; i++) {
 +
 +                      event = cpuc->event_list[i];
 +                      hwc = &event->hw;
 +
 +                      if (hwc->idx == -1) {
 +                              x86_assign_hw_event(event, hwc, cpuc->assign[i]);
 +                              x86_perf_event_set_period(event, hwc, hwc->idx);
 +                      }
 +                      /*
 +                       * need to mark as active because x86_pmu_disable()
 +                       * clear active_mask and eventsp[] yet it preserves
 +                       * idx
 +                       */
 +                      set_bit(hwc->idx, cpuc->active_mask);
 +                      cpuc->events[hwc->idx] = event;
 +
 +                      x86_pmu.enable(hwc, hwc->idx);
 +                      perf_event_update_userpage(event);
 +              }
 +              cpuc->n_added = 0;
 +              perf_events_lapic_init();
 +      }
 +
 +      cpuc->enabled = 1;
 +      barrier();
 +
        x86_pmu.enable_all();
  }
  
@@@ -1679,6 -1343,13 +1679,13 @@@ intel_pmu_enable_fixed(struct hw_perf_e
                bits |= 0x2;
        if (hwc->config & ARCH_PERFMON_EVENTSEL_OS)
                bits |= 0x1;
+       /*
+        * ANY bit is supported in v3 and up
+        */
+       if (x86_pmu.version > 2 && hwc->config & ARCH_PERFMON_EVENTSEL_ANY)
+               bits |= 0x4;
        bits <<= (idx * 4);
        mask = 0xfULL << (idx * 4);
  
@@@ -1727,40 -1398,148 +1734,40 @@@ static void amd_pmu_enable_event(struc
                x86_pmu_enable_event(hwc, idx);
  }
  
 -static int fixed_mode_idx(struct hw_perf_event *hwc)
 -{
 -      unsigned int hw_event;
 -
 -      hw_event = hwc->config & ARCH_PERFMON_EVENT_MASK;
 -
 -      if (unlikely((hw_event ==
 -                    x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS)) &&
 -                   (hwc->sample_period == 1)))
 -              return X86_PMC_IDX_FIXED_BTS;
 -
 -      if (!x86_pmu.num_events_fixed)
 -              return -1;
 -
 -      /*
 -       * fixed counters do not take all possible filters
 -       */
 -      if (hwc->config & ARCH_PERFMON_EVENT_FILTER_MASK)
 -              return -1;
 -
 -      if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_INSTRUCTIONS)))
 -              return X86_PMC_IDX_FIXED_INSTRUCTIONS;
 -      if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_CPU_CYCLES)))
 -              return X86_PMC_IDX_FIXED_CPU_CYCLES;
 -      if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_BUS_CYCLES)))
 -              return X86_PMC_IDX_FIXED_BUS_CYCLES;
 -
 -      return -1;
 -}
 -
 -/*
 - * generic counter allocator: get next free counter
 - */
 -static int
 -gen_get_event_idx(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc)
 -{
 -      int idx;
 -
 -      idx = find_first_zero_bit(cpuc->used_mask, x86_pmu.num_events);
 -      return idx == x86_pmu.num_events ? -1 : idx;
 -}
 -
  /*
 - * intel-specific counter allocator: check event constraints
 - */
 -static int
 -intel_get_event_idx(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc)
 -{
 -      const struct event_constraint *event_constraint;
 -      int i, code;
 -
 -      if (!event_constraints)
 -              goto skip;
 -
 -      code = hwc->config & CORE_EVNTSEL_EVENT_MASK;
 -
 -      for_each_event_constraint(event_constraint, event_constraints) {
 -              if (code == event_constraint->code) {
 -                      for_each_bit(i, event_constraint->idxmsk, X86_PMC_IDX_MAX) {
 -                              if (!test_and_set_bit(i, cpuc->used_mask))
 -                                      return i;
 -                      }
 -                      return -1;
 -              }
 -      }
 -skip:
 -      return gen_get_event_idx(cpuc, hwc);
 -}
 -
 -static int
 -x86_schedule_event(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc)
 -{
 -      int idx;
 -
 -      idx = fixed_mode_idx(hwc);
 -      if (idx == X86_PMC_IDX_FIXED_BTS) {
 -              /* BTS is already occupied. */
 -              if (test_and_set_bit(idx, cpuc->used_mask))
 -                      return -EAGAIN;
 -
 -              hwc->config_base        = 0;
 -              hwc->event_base         = 0;
 -              hwc->idx                = idx;
 -      } else if (idx >= 0) {
 -              /*
 -               * Try to get the fixed event, if that is already taken
 -               * then try to get a generic event:
 -               */
 -              if (test_and_set_bit(idx, cpuc->used_mask))
 -                      goto try_generic;
 -
 -              hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
 -              /*
 -               * We set it so that event_base + idx in wrmsr/rdmsr maps to
 -               * MSR_ARCH_PERFMON_FIXED_CTR0 ... CTR2:
 -               */
 -              hwc->event_base =
 -                      MSR_ARCH_PERFMON_FIXED_CTR0 - X86_PMC_IDX_FIXED;
 -              hwc->idx = idx;
 -      } else {
 -              idx = hwc->idx;
 -              /* Try to get the previous generic event again */
 -              if (idx == -1 || test_and_set_bit(idx, cpuc->used_mask)) {
 -try_generic:
 -                      idx = x86_pmu.get_event_idx(cpuc, hwc);
 -                      if (idx == -1)
 -                              return -EAGAIN;
 -
 -                      set_bit(idx, cpuc->used_mask);
 -                      hwc->idx = idx;
 -              }
 -              hwc->config_base = x86_pmu.eventsel;
 -              hwc->event_base  = x86_pmu.perfctr;
 -      }
 -
 -      return idx;
 -}
 -
 -/*
 - * Find a PMC slot for the freshly enabled / scheduled in event:
 + * activate a single event
 + *
 + * The event is added to the group of enabled events
 + * but only if it can be scehduled with existing events.
 + *
 + * Called with PMU disabled. If successful and return value 1,
 + * then guaranteed to call perf_enable() and hw_perf_enable()
   */
  static int x86_pmu_enable(struct perf_event *event)
  {
        struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 -      struct hw_perf_event *hwc = &event->hw;
 -      int idx;
 -
 -      idx = x86_schedule_event(cpuc, hwc);
 -      if (idx < 0)
 -              return idx;
 -
 -      perf_events_lapic_init();
 +      struct hw_perf_event *hwc;
 +      int assign[X86_PMC_IDX_MAX];
 +      int n, n0, ret;
  
 -      x86_pmu.disable(hwc, idx);
 +      hwc = &event->hw;
  
 -      cpuc->events[idx] = event;
 -      set_bit(idx, cpuc->active_mask);
 +      n0 = cpuc->n_events;
 +      n = collect_events(cpuc, event, false);
 +      if (n < 0)
 +              return n;
  
 -      x86_perf_event_set_period(event, hwc, idx);
 -      x86_pmu.enable(hwc, idx);
 +      ret = x86_schedule_events(cpuc, n, assign);
 +      if (ret)
 +              return ret;
 +      /*
 +       * copy new assignment, now we know it is possible
 +       * will be used by hw_perf_enable()
 +       */
 +      memcpy(cpuc->assign, assign, n*sizeof(int));
  
 -      perf_event_update_userpage(event);
 +      cpuc->n_events = n;
 +      cpuc->n_added  = n - n0;
  
        return 0;
  }
@@@ -1804,7 -1583,7 +1811,7 @@@ void perf_event_print_debug(void
                pr_info("CPU#%d: overflow:   %016llx\n", cpu, overflow);
                pr_info("CPU#%d: fixed:      %016llx\n", cpu, fixed);
        }
 -      pr_info("CPU#%d: used:       %016llx\n", cpu, *(u64 *)cpuc->used_mask);
 +      pr_info("CPU#%d: active:       %016llx\n", cpu, *(u64 *)cpuc->active_mask);
  
        for (idx = 0; idx < x86_pmu.num_events; idx++) {
                rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl);
@@@ -1888,8 -1667,9 +1895,8 @@@ static void intel_pmu_drain_bts_buffer(
        event->pending_kill = POLL_IN;
  }
  
 -static void x86_pmu_disable(struct perf_event *event)
 +static void __x86_pmu_disable(struct perf_event *event, struct cpu_hw_events *cpuc)
  {
 -      struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
        struct hw_perf_event *hwc = &event->hw;
        int idx = hwc->idx;
  
        x86_pmu.disable(hwc, idx);
  
        /*
 -       * Make sure the cleared pointer becomes visible before we
 -       * (potentially) free the event:
 -       */
 -      barrier();
 -
 -      /*
         * Drain the remaining delta count out of a event
         * that we are disabling:
         */
                intel_pmu_drain_bts_buffer(cpuc);
  
        cpuc->events[idx] = NULL;
 -      clear_bit(idx, cpuc->used_mask);
 +}
 +
 +static void x86_pmu_disable(struct perf_event *event)
 +{
 +      struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 +      int i;
 +
 +      __x86_pmu_disable(event, cpuc);
 +
 +      for (i = 0; i < cpuc->n_events; i++) {
 +              if (event == cpuc->event_list[i]) {
  
 +                      if (x86_pmu.put_event_constraints)
 +                              x86_pmu.put_event_constraints(cpuc, event);
 +
 +                      while (++i < cpuc->n_events)
 +                              cpuc->event_list[i-1] = cpuc->event_list[i];
 +
 +                      --cpuc->n_events;
 +                      break;
 +              }
 +      }
        perf_event_update_userpage(event);
  }
  
@@@ -2203,162 -1969,6 +2210,162 @@@ perf_event_nmi_handler(struct notifier_
        return NOTIFY_STOP;
  }
  
 +static struct event_constraint unconstrained;
 +
 +static struct event_constraint bts_constraint =
 +      EVENT_CONSTRAINT(0, 1ULL << X86_PMC_IDX_FIXED_BTS, 0);
 +
 +static struct event_constraint *
 +intel_special_constraints(struct perf_event *event)
 +{
 +      unsigned int hw_event;
 +
 +      hw_event = event->hw.config & INTEL_ARCH_EVENT_MASK;
 +
 +      if (unlikely((hw_event ==
 +                    x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS)) &&
 +                   (event->hw.sample_period == 1))) {
 +
 +              return &bts_constraint;
 +      }
 +      return NULL;
 +}
 +
 +static struct event_constraint *
 +intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
 +{
 +      struct event_constraint *c;
 +
 +      c = intel_special_constraints(event);
 +      if (c)
 +              return c;
 +
 +      if (x86_pmu.event_constraints) {
 +              for_each_event_constraint(c, x86_pmu.event_constraints) {
 +                      if ((event->hw.config & c->cmask) == c->code)
 +                              return c;
 +              }
 +      }
 +
 +      return &unconstrained;
 +}
 +
 +static struct event_constraint *
 +amd_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
 +{
 +      return &unconstrained;
 +}
 +
 +static int x86_event_sched_in(struct perf_event *event,
 +                        struct perf_cpu_context *cpuctx, int cpu)
 +{
 +      int ret = 0;
 +
 +      event->state = PERF_EVENT_STATE_ACTIVE;
 +      event->oncpu = cpu;
 +      event->tstamp_running += event->ctx->time - event->tstamp_stopped;
 +
 +      if (!is_x86_event(event))
 +              ret = event->pmu->enable(event);
 +
 +      if (!ret && !is_software_event(event))
 +              cpuctx->active_oncpu++;
 +
 +      if (!ret && event->attr.exclusive)
 +              cpuctx->exclusive = 1;
 +
 +      return ret;
 +}
 +
 +static void x86_event_sched_out(struct perf_event *event,
 +                          struct perf_cpu_context *cpuctx, int cpu)
 +{
 +      event->state = PERF_EVENT_STATE_INACTIVE;
 +      event->oncpu = -1;
 +
 +      if (!is_x86_event(event))
 +              event->pmu->disable(event);
 +
 +      event->tstamp_running -= event->ctx->time - event->tstamp_stopped;
 +
 +      if (!is_software_event(event))
 +              cpuctx->active_oncpu--;
 +
 +      if (event->attr.exclusive || !cpuctx->active_oncpu)
 +              cpuctx->exclusive = 0;
 +}
 +
 +/*
 + * Called to enable a whole group of events.
 + * Returns 1 if the group was enabled, or -EAGAIN if it could not be.
 + * Assumes the caller has disabled interrupts and has
 + * frozen the PMU with hw_perf_save_disable.
 + *
 + * called with PMU disabled. If successful and return value 1,
 + * then guaranteed to call perf_enable() and hw_perf_enable()
 + */
 +int hw_perf_group_sched_in(struct perf_event *leader,
 +             struct perf_cpu_context *cpuctx,
 +             struct perf_event_context *ctx, int cpu)
 +{
 +      struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
 +      struct perf_event *sub;
 +      int assign[X86_PMC_IDX_MAX];
 +      int n0, n1, ret;
 +
 +      /* n0 = total number of events */
 +      n0 = collect_events(cpuc, leader, true);
 +      if (n0 < 0)
 +              return n0;
 +
 +      ret = x86_schedule_events(cpuc, n0, assign);
 +      if (ret)
 +              return ret;
 +
 +      ret = x86_event_sched_in(leader, cpuctx, cpu);
 +      if (ret)
 +              return ret;
 +
 +      n1 = 1;
 +      list_for_each_entry(sub, &leader->sibling_list, group_entry) {
 +              if (sub->state > PERF_EVENT_STATE_OFF) {
 +                      ret = x86_event_sched_in(sub, cpuctx, cpu);
 +                      if (ret)
 +                              goto undo;
 +                      ++n1;
 +              }
 +      }
 +      /*
 +       * copy new assignment, now we know it is possible
 +       * will be used by hw_perf_enable()
 +       */
 +      memcpy(cpuc->assign, assign, n0*sizeof(int));
 +
 +      cpuc->n_events  = n0;
 +      cpuc->n_added   = n1;
 +      ctx->nr_active += n1;
 +
 +      /*
 +       * 1 means successful and events are active
 +       * This is not quite true because we defer
 +       * actual activation until hw_perf_enable() but
 +       * this way we* ensure caller won't try to enable
 +       * individual events
 +       */
 +      return 1;
 +undo:
 +      x86_event_sched_out(leader, cpuctx, cpu);
 +      n0  = 1;
 +      list_for_each_entry(sub, &leader->sibling_list, group_entry) {
 +              if (sub->state == PERF_EVENT_STATE_ACTIVE) {
 +                      x86_event_sched_out(sub, cpuctx, cpu);
 +                      if (++n0 == n1)
 +                              break;
 +              }
 +      }
 +      return ret;
 +}
 +
  static __read_mostly struct notifier_block perf_event_nmi_notifier = {
        .notifier_call          = perf_event_nmi_handler,
        .next                   = NULL,
@@@ -2390,8 -2000,7 +2397,8 @@@ static __initconst struct x86_pmu p6_pm
         */
        .event_bits             = 32,
        .event_mask             = (1ULL << 32) - 1,
 -      .get_event_idx          = intel_get_event_idx,
 +      .get_event_constraints  = intel_get_event_constraints,
 +      .event_constraints      = intel_p6_event_constraints
  };
  
  static __initconst struct x86_pmu intel_pmu = {
        .max_period             = (1ULL << 31) - 1,
        .enable_bts             = intel_pmu_enable_bts,
        .disable_bts            = intel_pmu_disable_bts,
 -      .get_event_idx          = intel_get_event_idx,
 +      .get_event_constraints  = intel_get_event_constraints
  };
  
  static __initconst struct x86_pmu amd_pmu = {
        .apic                   = 1,
        /* use highest bit to detect overflow */
        .max_period             = (1ULL << 47) - 1,
 -      .get_event_idx          = gen_get_event_idx,
 +      .get_event_constraints  = amd_get_event_constraints
  };
  
  static __init int p6_pmu_init(void)
        case 7:
        case 8:
        case 11: /* Pentium III */
        case 9:
        case 13:
                /* Pentium M */
 -              event_constraints = intel_p6_event_constraints;
                break;
        default:
                pr_cont("unsupported p6 CPU model %d ",
@@@ -2516,40 -2128,23 +2523,40 @@@ static __init int intel_pmu_init(void
                memcpy(hw_cache_event_ids, core2_hw_cache_event_ids,
                       sizeof(hw_cache_event_ids));
  
 +              x86_pmu.event_constraints = intel_core_event_constraints;
                pr_cont("Core2 events, ");
 -              event_constraints = intel_core_event_constraints;
                break;
 -      default:
 -      case 26:
 +
 +      case 26: /* 45 nm nehalem, "Bloomfield" */
 +      case 30: /* 45 nm nehalem, "Lynnfield" */
                memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids,
                       sizeof(hw_cache_event_ids));
  
 -              event_constraints = intel_nehalem_event_constraints;
 +              x86_pmu.event_constraints = intel_nehalem_event_constraints;
                pr_cont("Nehalem/Corei7 events, ");
                break;
        case 28:
                memcpy(hw_cache_event_ids, atom_hw_cache_event_ids,
                       sizeof(hw_cache_event_ids));
  
 +              x86_pmu.event_constraints = intel_gen_event_constraints;
                pr_cont("Atom events, ");
                break;
 +
 +      case 37: /* 32 nm nehalem, "Clarkdale" */
 +      case 44: /* 32 nm nehalem, "Gulftown" */
 +              memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids,
 +                     sizeof(hw_cache_event_ids));
 +
 +              x86_pmu.event_constraints = intel_westmere_event_constraints;
 +              pr_cont("Westmere events, ");
 +              break;
 +      default:
 +              /*
 +               * default constraints for v2 and up
 +               */
 +              x86_pmu.event_constraints = intel_gen_event_constraints;
 +              pr_cont("generic architected perfmon, ");
        }
        return 0;
  }
@@@ -2625,9 -2220,6 +2632,9 @@@ void __init init_hw_perf_events(void
        perf_events_lapic_init();
        register_die_notifier(&perf_event_nmi_notifier);
  
 +      unconstrained = (struct event_constraint)
 +              EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_events) - 1, 0);
 +
        pr_info("... version:                %d\n",     x86_pmu.version);
        pr_info("... bit width:              %d\n",     x86_pmu.event_bits);
        pr_info("... generic registers:      %d\n",     x86_pmu.num_events);
@@@ -2649,73 -2241,46 +2656,73 @@@ static const struct pmu pmu = 
        .unthrottle     = x86_pmu_unthrottle,
  };
  
 -static int
 -validate_event(struct cpu_hw_events *cpuc, struct perf_event *event)
 -{
 -      struct hw_perf_event fake_event = event->hw;
 -
 -      if (event->pmu && event->pmu != &pmu)
 -              return 0;
 -
 -      return x86_schedule_event(cpuc, &fake_event) >= 0;
 -}
 -
 +/*
 + * validate a single event group
 + *
 + * validation include:
 + *    - check events are compatible which each other
 + *    - events do not compete for the same counter
 + *    - number of events <= number of counters
 + *
 + * validation ensures the group can be loaded onto the
 + * PMU if it was the only group available.
 + */
  static int validate_group(struct perf_event *event)
  {
 -      struct perf_event *sibling, *leader = event->group_leader;
 -      struct cpu_hw_events fake_pmu;
 +      struct perf_event *leader = event->group_leader;
 +      struct cpu_hw_events *fake_cpuc;
 +      int ret, n;
  
 -      memset(&fake_pmu, 0, sizeof(fake_pmu));
 +      ret = -ENOMEM;
 +      fake_cpuc = kmalloc(sizeof(*fake_cpuc), GFP_KERNEL | __GFP_ZERO);
 +      if (!fake_cpuc)
 +              goto out;
  
 -      if (!validate_event(&fake_pmu, leader))
 -              return -ENOSPC;
 +      /*
 +       * the event is not yet connected with its
 +       * siblings therefore we must first collect
 +       * existing siblings, then add the new event
 +       * before we can simulate the scheduling
 +       */
 +      ret = -ENOSPC;
 +      n = collect_events(fake_cpuc, leader, true);
 +      if (n < 0)
 +              goto out_free;
  
 -      list_for_each_entry(sibling, &leader->sibling_list, group_entry) {
 -              if (!validate_event(&fake_pmu, sibling))
 -                      return -ENOSPC;
 -      }
 +      fake_cpuc->n_events = n;
 +      n = collect_events(fake_cpuc, event, false);
 +      if (n < 0)
 +              goto out_free;
  
 -      if (!validate_event(&fake_pmu, event))
 -              return -ENOSPC;
 +      fake_cpuc->n_events = n;
  
 -      return 0;
 +      ret = x86_schedule_events(fake_cpuc, n, NULL);
 +
 +out_free:
 +      kfree(fake_cpuc);
 +out:
 +      return ret;
  }
  
  const struct pmu *hw_perf_event_init(struct perf_event *event)
  {
 +      const struct pmu *tmp;
        int err;
  
        err = __hw_perf_event_init(event);
        if (!err) {
 +              /*
 +               * we temporarily connect event to its pmu
 +               * such that validate_group() can classify
 +               * it as an x86 event using is_x86_event()
 +               */
 +              tmp = event->pmu;
 +              event->pmu = &pmu;
 +
                if (event->group_leader != event)
                        err = validate_group(event);
 +
 +              event->pmu = tmp;
        }
        if (err) {
                if (event->destroy)
@@@ -2739,6 -2304,7 +2746,6 @@@ void callchain_store(struct perf_callch
  
  static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry);
  static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_nmi_entry);
 -static DEFINE_PER_CPU(int, in_ignored_frame);
  
  
  static void
@@@ -2754,6 -2320,10 +2761,6 @@@ static void backtrace_warning(void *dat
  
  static int backtrace_stack(void *data, char *name)
  {
 -      per_cpu(in_ignored_frame, smp_processor_id()) =
 -                      x86_is_stack_id(NMI_STACK, name) ||
 -                      x86_is_stack_id(DEBUG_STACK, name);
 -
        return 0;
  }
  
@@@ -2761,6 -2331,9 +2768,6 @@@ static void backtrace_address(void *dat
  {
        struct perf_callchain_entry *entry = data;
  
 -      if (per_cpu(in_ignored_frame, smp_processor_id()))
 -              return;
 -
        if (reliable)
                callchain_store(entry, addr);
  }
@@@ -2867,6 -2440,9 +2874,6 @@@ perf_do_callchain(struct pt_regs *regs
  
        is_user = user_mode(regs);
  
 -      if (!current || current->pid == 0)
 -              return;
 -
        if (is_user && current->state != TASK_RUNNING)
                return;
  
@@@ -290,7 -290,7 +290,7 @@@ struct perf_event_mmap_page 
  };
  
  #define PERF_RECORD_MISC_CPUMODE_MASK         (3 << 0)
 -#define PERF_RECORD_MISC_CPUMODE_UNKNOWN              (0 << 0)
 +#define PERF_RECORD_MISC_CPUMODE_UNKNOWN      (0 << 0)
  #define PERF_RECORD_MISC_KERNEL                       (1 << 0)
  #define PERF_RECORD_MISC_USER                 (2 << 0)
  #define PERF_RECORD_MISC_HYPERVISOR           (3 << 0)
@@@ -356,8 -356,8 +356,8 @@@ enum perf_event_type 
         *      u64                             stream_id;
         * };
         */
 -      PERF_RECORD_THROTTLE            = 5,
 -      PERF_RECORD_UNTHROTTLE          = 6,
 +      PERF_RECORD_THROTTLE                    = 5,
 +      PERF_RECORD_UNTHROTTLE                  = 6,
  
        /*
         * struct {
  
        /*
         * struct {
 -       *      struct perf_event_header        header;
 -       *      u32                             pid, tid;
 +       *      struct perf_event_header        header;
 +       *      u32                             pid, tid;
         *
 -       *      struct read_format              values;
 +       *      struct read_format              values;
         * };
         */
        PERF_RECORD_READ                        = 8,
         *        char                  data[size];}&& PERF_SAMPLE_RAW
         * };
         */
 -      PERF_RECORD_SAMPLE              = 9,
 +      PERF_RECORD_SAMPLE                      = 9,
  
        PERF_RECORD_MAX,                        /* non-ABI */
  };
@@@ -498,8 -498,9 +498,8 @@@ struct hw_perf_event 
        atomic64_t                      period_left;
        u64                             interrupts;
  
 -      u64                             freq_count;
 -      u64                             freq_interrupts;
 -      u64                             freq_stamp;
 +      u64                             freq_time_stamp;
 +      u64                             freq_count_stamp;
  #endif
  };
  
@@@ -564,10 -565,6 +564,10 @@@ typedef void (*perf_overflow_handler_t)
                                        struct perf_sample_data *,
                                        struct pt_regs *regs);
  
 +enum perf_group_flag {
 +      PERF_GROUP_SOFTWARE = 0x1,
 +};
 +
  /**
   * struct perf_event - performance event kernel representation:
   */
@@@ -577,7 -574,6 +577,7 @@@ struct perf_event 
        struct list_head                event_entry;
        struct list_head                sibling_list;
        int                             nr_siblings;
 +      int                             group_flags;
        struct perf_event               *group_leader;
        struct perf_event               *output;
        const struct pmu                *pmu;
  
        perf_overflow_handler_t         overflow_handler;
  
 -#ifdef CONFIG_EVENT_PROFILE
 +#ifdef CONFIG_EVENT_TRACING
        struct event_filter             *filter;
  #endif
  
@@@ -687,8 -683,7 +687,8 @@@ struct perf_event_context 
         */
        struct mutex                    mutex;
  
 -      struct list_head                group_list;
 +      struct list_head                pinned_groups;
 +      struct list_head                flexible_groups;
        struct list_head                event_list;
        int                             nr_events;
        int                             nr_active;
@@@ -751,9 -746,10 +751,9 @@@ extern int perf_max_events
  
  extern const struct pmu *hw_perf_event_init(struct perf_event *event);
  
 -extern void perf_event_task_sched_in(struct task_struct *task, int cpu);
 -extern void perf_event_task_sched_out(struct task_struct *task,
 -                                      struct task_struct *next, int cpu);
 -extern void perf_event_task_tick(struct task_struct *task, int cpu);
 +extern void perf_event_task_sched_in(struct task_struct *task);
 +extern void perf_event_task_sched_out(struct task_struct *task, struct task_struct *next);
 +extern void perf_event_task_tick(struct task_struct *task);
  extern int perf_event_init_task(struct task_struct *child);
  extern void perf_event_exit_task(struct task_struct *child);
  extern void perf_event_free_task(struct task_struct *task);
@@@ -818,9 -814,14 +818,14 @@@ extern int perf_event_overflow(struct p
   */
  static inline int is_software_event(struct perf_event *event)
  {
-       return (event->attr.type != PERF_TYPE_RAW) &&
-               (event->attr.type != PERF_TYPE_HARDWARE) &&
-               (event->attr.type != PERF_TYPE_HW_CACHE);
+       switch (event->attr.type) {
+       case PERF_TYPE_SOFTWARE:
+       case PERF_TYPE_TRACEPOINT:
+       /* for now the breakpoint stuff also works as software event */
+       case PERF_TYPE_BREAKPOINT:
+               return 1;
+       }
+       return 0;
  }
  
  extern atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX];
@@@ -852,7 -853,8 +857,7 @@@ extern int sysctl_perf_event_mlock
  extern int sysctl_perf_event_sample_rate;
  
  extern void perf_event_init(void);
 -extern void perf_tp_event(int event_id, u64 addr, u64 count,
 -                               void *record, int entry_size);
 +extern void perf_tp_event(int event_id, u64 addr, u64 count, void *record, int entry_size);
  extern void perf_bp_event(struct perf_event *event, void *data);
  
  #ifndef perf_misc_flags
@@@ -873,12 -875,12 +878,12 @@@ extern void perf_event_enable(struct pe
  extern void perf_event_disable(struct perf_event *event);
  #else
  static inline void
 -perf_event_task_sched_in(struct task_struct *task, int cpu)           { }
 +perf_event_task_sched_in(struct task_struct *task)                    { }
  static inline void
  perf_event_task_sched_out(struct task_struct *task,
 -                          struct task_struct *next, int cpu)          { }
 +                          struct task_struct *next)                   { }
  static inline void
 -perf_event_task_tick(struct task_struct *task, int cpu)                       { }
 +perf_event_task_tick(struct task_struct *task)                                { }
  static inline int perf_event_init_task(struct task_struct *child)     { return 0; }
  static inline void perf_event_exit_task(struct task_struct *child)    { }
  static inline void perf_event_free_task(struct task_struct *task)     { }
@@@ -893,13 -895,13 +898,13 @@@ static inline voi
  perf_sw_event(u32 event_id, u64 nr, int nmi,
                     struct pt_regs *regs, u64 addr)                    { }
  static inline void
 -perf_bp_event(struct perf_event *event, void *data)           { }
 +perf_bp_event(struct perf_event *event, void *data)                   { }
  
  static inline void perf_event_mmap(struct vm_area_struct *vma)                { }
  static inline void perf_event_comm(struct task_struct *tsk)           { }
  static inline void perf_event_fork(struct task_struct *tsk)           { }
  static inline void perf_event_init(void)                              { }
 -static inline int  perf_swevent_get_recursion_context(void)  { return -1; }
 +static inline int  perf_swevent_get_recursion_context(void)           { return -1; }
  static inline void perf_swevent_put_recursion_context(int rctx)               { }
  static inline void perf_event_enable(struct perf_event *event)                { }
  static inline void perf_event_disable(struct perf_event *event)               { }
diff --combined kernel/perf_event.c
@@@ -289,15 -289,6 +289,15 @@@ static void update_event_times(struct p
        event->total_time_running = run_end - event->tstamp_running;
  }
  
 +static struct list_head *
 +ctx_group_list(struct perf_event *event, struct perf_event_context *ctx)
 +{
 +      if (event->attr.pinned)
 +              return &ctx->pinned_groups;
 +      else
 +              return &ctx->flexible_groups;
 +}
 +
  /*
   * Add a event from the lists for its context.
   * Must be called with ctx->mutex and ctx->lock held.
@@@ -312,19 -303,9 +312,19 @@@ list_add_event(struct perf_event *event
         * add it straight to the context's event list, or to the group
         * leader's sibling list:
         */
 -      if (group_leader == event)
 -              list_add_tail(&event->group_entry, &ctx->group_list);
 -      else {
 +      if (group_leader == event) {
 +              struct list_head *list;
 +
 +              if (is_software_event(event))
 +                      event->group_flags |= PERF_GROUP_SOFTWARE;
 +
 +              list = ctx_group_list(event, ctx);
 +              list_add_tail(&event->group_entry, list);
 +      } else {
 +              if (group_leader->group_flags & PERF_GROUP_SOFTWARE &&
 +                  !is_software_event(event))
 +                      group_leader->group_flags &= ~PERF_GROUP_SOFTWARE;
 +
                list_add_tail(&event->group_entry, &group_leader->sibling_list);
                group_leader->nr_siblings++;
        }
@@@ -374,14 -355,9 +374,14 @@@ list_del_event(struct perf_event *event
         * to the context list directly:
         */
        list_for_each_entry_safe(sibling, tmp, &event->sibling_list, group_entry) {
 +              struct list_head *list;
  
 -              list_move_tail(&sibling->group_entry, &ctx->group_list);
 +              list = ctx_group_list(event, ctx);
 +              list_move_tail(&sibling->group_entry, list);
                sibling->group_leader = sibling;
 +
 +              /* Inherit group flags from the previous leader */
 +              sibling->group_flags = event->group_flags;
        }
  }
  
@@@ -710,6 -686,24 +710,6 @@@ group_error
  }
  
  /*
 - * Return 1 for a group consisting entirely of software events,
 - * 0 if the group contains any hardware events.
 - */
 -static int is_software_only_group(struct perf_event *leader)
 -{
 -      struct perf_event *event;
 -
 -      if (!is_software_event(leader))
 -              return 0;
 -
 -      list_for_each_entry(event, &leader->sibling_list, group_entry)
 -              if (!is_software_event(event))
 -                      return 0;
 -
 -      return 1;
 -}
 -
 -/*
   * Work out whether we can put this event group on the CPU now.
   */
  static int group_can_go_on(struct perf_event *event,
        /*
         * Groups consisting entirely of software events can always go on.
         */
 -      if (is_software_only_group(event))
 +      if (event->group_flags & PERF_GROUP_SOFTWARE)
                return 1;
        /*
         * If an exclusive group is already on, no other hardware
@@@ -1049,15 -1043,8 +1049,15 @@@ static int perf_event_refresh(struct pe
        return 0;
  }
  
 -void __perf_event_sched_out(struct perf_event_context *ctx,
 -                            struct perf_cpu_context *cpuctx)
 +enum event_type_t {
 +      EVENT_FLEXIBLE = 0x1,
 +      EVENT_PINNED = 0x2,
 +      EVENT_ALL = EVENT_FLEXIBLE | EVENT_PINNED,
 +};
 +
 +static void ctx_sched_out(struct perf_event_context *ctx,
 +                        struct perf_cpu_context *cpuctx,
 +                        enum event_type_t event_type)
  {
        struct perf_event *event;
  
        update_context_time(ctx);
  
        perf_disable();
 -      if (ctx->nr_active) {
 -              list_for_each_entry(event, &ctx->group_list, group_entry)
 +      if (!ctx->nr_active)
 +              goto out_enable;
 +
 +      if (event_type & EVENT_PINNED)
 +              list_for_each_entry(event, &ctx->pinned_groups, group_entry)
                        group_sched_out(event, cpuctx, ctx);
 -      }
 +
 +      if (event_type & EVENT_FLEXIBLE)
 +              list_for_each_entry(event, &ctx->flexible_groups, group_entry)
 +                      group_sched_out(event, cpuctx, ctx);
 +
 + out_enable:
        perf_enable();
   out:
        raw_spin_unlock(&ctx->lock);
@@@ -1191,9 -1170,9 +1191,9 @@@ static void perf_event_sync_stat(struc
   * not restart the event.
   */
  void perf_event_task_sched_out(struct task_struct *task,
 -                               struct task_struct *next, int cpu)
 +                               struct task_struct *next)
  {
 -      struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu);
 +      struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
        struct perf_event_context *ctx = task->perf_event_ctxp;
        struct perf_event_context *next_ctx;
        struct perf_event_context *parent;
        rcu_read_unlock();
  
        if (do_switch) {
 -              __perf_event_sched_out(ctx, cpuctx);
 +              ctx_sched_out(ctx, cpuctx, EVENT_ALL);
                cpuctx->task_ctx = NULL;
        }
  }
  
 -/*
 - * Called with IRQs disabled
 - */
 -static void __perf_event_task_sched_out(struct perf_event_context *ctx)
 +static void task_ctx_sched_out(struct perf_event_context *ctx,
 +                             enum event_type_t event_type)
  {
        struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
  
        if (WARN_ON_ONCE(ctx != cpuctx->task_ctx))
                return;
  
 -      __perf_event_sched_out(ctx, cpuctx);
 +      ctx_sched_out(ctx, cpuctx, event_type);
        cpuctx->task_ctx = NULL;
  }
  
  /*
   * Called with IRQs disabled
   */
 -static void perf_event_cpu_sched_out(struct perf_cpu_context *cpuctx)
 +static void __perf_event_task_sched_out(struct perf_event_context *ctx)
  {
 -      __perf_event_sched_out(&cpuctx->ctx, cpuctx);
 +      task_ctx_sched_out(ctx, EVENT_ALL);
 +}
 +
 +/*
 + * Called with IRQs disabled
 + */
 +static void cpu_ctx_sched_out(struct perf_cpu_context *cpuctx,
 +                            enum event_type_t event_type)
 +{
 +      ctx_sched_out(&cpuctx->ctx, cpuctx, event_type);
  }
  
  static void
 -__perf_event_sched_in(struct perf_event_context *ctx,
 -                      struct perf_cpu_context *cpuctx, int cpu)
 +ctx_pinned_sched_in(struct perf_event_context *ctx,
 +                  struct perf_cpu_context *cpuctx,
 +                  int cpu)
  {
        struct perf_event *event;
 -      int can_add_hw = 1;
 -
 -      raw_spin_lock(&ctx->lock);
 -      ctx->is_active = 1;
 -      if (likely(!ctx->nr_events))
 -              goto out;
 -
 -      ctx->timestamp = perf_clock();
  
 -      perf_disable();
 -
 -      /*
 -       * First go through the list and put on any pinned groups
 -       * in order to give them the best chance of going on.
 -       */
 -      list_for_each_entry(event, &ctx->group_list, group_entry) {
 -              if (event->state <= PERF_EVENT_STATE_OFF ||
 -                  !event->attr.pinned)
 +      list_for_each_entry(event, &ctx->pinned_groups, group_entry) {
 +              if (event->state <= PERF_EVENT_STATE_OFF)
                        continue;
                if (event->cpu != -1 && event->cpu != cpu)
                        continue;
                        event->state = PERF_EVENT_STATE_ERROR;
                }
        }
 +}
  
 -      list_for_each_entry(event, &ctx->group_list, group_entry) {
 -              /*
 -               * Ignore events in OFF or ERROR state, and
 -               * ignore pinned events since we did them already.
 -               */
 -              if (event->state <= PERF_EVENT_STATE_OFF ||
 -                  event->attr.pinned)
 -                      continue;
 +static void
 +ctx_flexible_sched_in(struct perf_event_context *ctx,
 +                    struct perf_cpu_context *cpuctx,
 +                    int cpu)
 +{
 +      struct perf_event *event;
 +      int can_add_hw = 1;
  
 +      list_for_each_entry(event, &ctx->flexible_groups, group_entry) {
 +              /* Ignore events in OFF or ERROR state */
 +              if (event->state <= PERF_EVENT_STATE_OFF)
 +                      continue;
                /*
                 * Listen to the 'cpu' scheduling filter constraint
                 * of events:
                        if (group_sched_in(event, cpuctx, ctx, cpu))
                                can_add_hw = 0;
        }
 +}
 +
 +static void
 +ctx_sched_in(struct perf_event_context *ctx,
 +           struct perf_cpu_context *cpuctx,
 +           enum event_type_t event_type)
 +{
 +      int cpu = smp_processor_id();
 +
 +      raw_spin_lock(&ctx->lock);
 +      ctx->is_active = 1;
 +      if (likely(!ctx->nr_events))
 +              goto out;
 +
 +      ctx->timestamp = perf_clock();
 +
 +      perf_disable();
 +
 +      /*
 +       * First go through the list and put on any pinned groups
 +       * in order to give them the best chance of going on.
 +       */
 +      if (event_type & EVENT_PINNED)
 +              ctx_pinned_sched_in(ctx, cpuctx, cpu);
 +
 +      /* Then walk through the lower prio flexible groups */
 +      if (event_type & EVENT_FLEXIBLE)
 +              ctx_flexible_sched_in(ctx, cpuctx, cpu);
 +
        perf_enable();
   out:
        raw_spin_unlock(&ctx->lock);
  }
  
 +static void cpu_ctx_sched_in(struct perf_cpu_context *cpuctx,
 +                           enum event_type_t event_type)
 +{
 +      struct perf_event_context *ctx = &cpuctx->ctx;
 +
 +      ctx_sched_in(ctx, cpuctx, event_type);
 +}
 +
 +static void task_ctx_sched_in(struct task_struct *task,
 +                            enum event_type_t event_type)
 +{
 +      struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
 +      struct perf_event_context *ctx = task->perf_event_ctxp;
 +
 +      if (likely(!ctx))
 +              return;
 +      if (cpuctx->task_ctx == ctx)
 +              return;
 +      ctx_sched_in(ctx, cpuctx, event_type);
 +      cpuctx->task_ctx = ctx;
 +}
  /*
   * Called from scheduler to add the events of the current task
   * with interrupts disabled.
   * accessing the event control register. If a NMI hits, then it will
   * keep the event running.
   */
 -void perf_event_task_sched_in(struct task_struct *task, int cpu)
 +void perf_event_task_sched_in(struct task_struct *task)
  {
 -      struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu);
 +      struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
        struct perf_event_context *ctx = task->perf_event_ctxp;
  
        if (likely(!ctx))
                return;
 +
        if (cpuctx->task_ctx == ctx)
                return;
 -      __perf_event_sched_in(ctx, cpuctx, cpu);
 -      cpuctx->task_ctx = ctx;
 -}
  
 -static void perf_event_cpu_sched_in(struct perf_cpu_context *cpuctx, int cpu)
 -{
 -      struct perf_event_context *ctx = &cpuctx->ctx;
 +      /*
 +       * We want to keep the following priority order:
 +       * cpu pinned (that don't need to move), task pinned,
 +       * cpu flexible, task flexible.
 +       */
 +      cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE);
  
 -      __perf_event_sched_in(ctx, cpuctx, cpu);
 +      ctx_sched_in(ctx, cpuctx, EVENT_PINNED);
 +      cpu_ctx_sched_in(cpuctx, EVENT_FLEXIBLE);
 +      ctx_sched_in(ctx, cpuctx, EVENT_FLEXIBLE);
 +
 +      cpuctx->task_ctx = ctx;
  }
  
  #define MAX_INTERRUPTS (~0ULL)
  
  static void perf_log_throttle(struct perf_event *event, int enable);
  
 -static void perf_adjust_period(struct perf_event *event, u64 events)
 +static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 count)
 +{
 +      u64 frequency = event->attr.sample_freq;
 +      u64 sec = NSEC_PER_SEC;
 +      u64 divisor, dividend;
 +
 +      int count_fls, nsec_fls, frequency_fls, sec_fls;
 +
 +      count_fls = fls64(count);
 +      nsec_fls = fls64(nsec);
 +      frequency_fls = fls64(frequency);
 +      sec_fls = 30;
 +
 +      /*
 +       * We got @count in @nsec, with a target of sample_freq HZ
 +       * the target period becomes:
 +       *
 +       *             @count * 10^9
 +       * period = -------------------
 +       *          @nsec * sample_freq
 +       *
 +       */
 +
 +      /*
 +       * Reduce accuracy by one bit such that @a and @b converge
 +       * to a similar magnitude.
 +       */
 +#define REDUCE_FLS(a, b)              \
 +do {                                  \
 +      if (a##_fls > b##_fls) {        \
 +              a >>= 1;                \
 +              a##_fls--;              \
 +      } else {                        \
 +              b >>= 1;                \
 +              b##_fls--;              \
 +      }                               \
 +} while (0)
 +
 +      /*
 +       * Reduce accuracy until either term fits in a u64, then proceed with
 +       * the other, so that finally we can do a u64/u64 division.
 +       */
 +      while (count_fls + sec_fls > 64 && nsec_fls + frequency_fls > 64) {
 +              REDUCE_FLS(nsec, frequency);
 +              REDUCE_FLS(sec, count);
 +      }
 +
 +      if (count_fls + sec_fls > 64) {
 +              divisor = nsec * frequency;
 +
 +              while (count_fls + sec_fls > 64) {
 +                      REDUCE_FLS(count, sec);
 +                      divisor >>= 1;
 +              }
 +
 +              dividend = count * sec;
 +      } else {
 +              dividend = count * sec;
 +
 +              while (nsec_fls + frequency_fls > 64) {
 +                      REDUCE_FLS(nsec, frequency);
 +                      dividend >>= 1;
 +              }
 +
 +              divisor = nsec * frequency;
 +      }
 +
 +      return div64_u64(dividend, divisor);
 +}
 +
 +static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count)
  {
        struct hw_perf_event *hwc = &event->hw;
        u64 period, sample_period;
        s64 delta;
  
 -      events *= hwc->sample_period;
 -      period = div64_u64(events, event->attr.sample_freq);
 +      period = perf_calculate_period(event, nsec, count);
  
        delta = (s64)(period - hwc->sample_period);
        delta = (delta + 7) / 8; /* low pass filter */
                sample_period = 1;
  
        hwc->sample_period = sample_period;
 +
 +      if (atomic64_read(&hwc->period_left) > 8*sample_period) {
 +              perf_disable();
 +              event->pmu->disable(event);
 +              atomic64_set(&hwc->period_left, 0);
 +              event->pmu->enable(event);
 +              perf_enable();
 +      }
  }
  
  static void perf_ctx_adjust_freq(struct perf_event_context *ctx)
  {
        struct perf_event *event;
        struct hw_perf_event *hwc;
 -      u64 interrupts, freq;
 +      u64 interrupts, now;
 +      s64 delta;
  
        raw_spin_lock(&ctx->lock);
        list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
                if (interrupts == MAX_INTERRUPTS) {
                        perf_log_throttle(event, 1);
                        event->pmu->unthrottle(event);
 -                      interrupts = 2*sysctl_perf_event_sample_rate/HZ;
                }
  
                if (!event->attr.freq || !event->attr.sample_freq)
                        continue;
  
 -              /*
 -               * if the specified freq < HZ then we need to skip ticks
 -               */
 -              if (event->attr.sample_freq < HZ) {
 -                      freq = event->attr.sample_freq;
 -
 -                      hwc->freq_count += freq;
 -                      hwc->freq_interrupts += interrupts;
 -
 -                      if (hwc->freq_count < HZ)
 -                              continue;
 -
 -                      interrupts = hwc->freq_interrupts;
 -                      hwc->freq_interrupts = 0;
 -                      hwc->freq_count -= HZ;
 -              } else
 -                      freq = HZ;
 -
 -              perf_adjust_period(event, freq * interrupts);
 +              event->pmu->read(event);
 +              now = atomic64_read(&event->count);
 +              delta = now - hwc->freq_count_stamp;
 +              hwc->freq_count_stamp = now;
  
 -              /*
 -               * In order to avoid being stalled by an (accidental) huge
 -               * sample period, force reset the sample period if we didn't
 -               * get any events in this freq period.
 -               */
 -              if (!interrupts) {
 -                      perf_disable();
 -                      event->pmu->disable(event);
 -                      atomic64_set(&hwc->period_left, 0);
 -                      event->pmu->enable(event);
 -                      perf_enable();
 -              }
 +              if (delta > 0)
 +                      perf_adjust_period(event, TICK_NSEC, delta);
        }
        raw_spin_unlock(&ctx->lock);
  }
   */
  static void rotate_ctx(struct perf_event_context *ctx)
  {
 -      struct perf_event *event;
 -
        if (!ctx->nr_events)
                return;
  
        raw_spin_lock(&ctx->lock);
 -      /*
 -       * Rotate the first entry last (works just fine for group events too):
 -       */
 +
 +      /* Rotate the first entry last of non-pinned groups */
        perf_disable();
 -      list_for_each_entry(event, &ctx->group_list, group_entry) {
 -              list_move_tail(&event->group_entry, &ctx->group_list);
 -              break;
 -      }
 +
 +      list_rotate_left(&ctx->flexible_groups);
 +
        perf_enable();
  
        raw_spin_unlock(&ctx->lock);
  }
  
 -void perf_event_task_tick(struct task_struct *curr, int cpu)
 +void perf_event_task_tick(struct task_struct *curr)
  {
        struct perf_cpu_context *cpuctx;
        struct perf_event_context *ctx;
        if (!atomic_read(&nr_events))
                return;
  
 -      cpuctx = &per_cpu(perf_cpu_context, cpu);
 +      cpuctx = &__get_cpu_var(perf_cpu_context);
        ctx = curr->perf_event_ctxp;
  
        perf_ctx_adjust_freq(&cpuctx->ctx);
        if (ctx)
                perf_ctx_adjust_freq(ctx);
  
 -      perf_event_cpu_sched_out(cpuctx);
 +      cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE);
        if (ctx)
 -              __perf_event_task_sched_out(ctx);
 +              task_ctx_sched_out(ctx, EVENT_FLEXIBLE);
  
        rotate_ctx(&cpuctx->ctx);
        if (ctx)
                rotate_ctx(ctx);
  
 -      perf_event_cpu_sched_in(cpuctx, cpu);
 +      cpu_ctx_sched_in(cpuctx, EVENT_FLEXIBLE);
        if (ctx)
 -              perf_event_task_sched_in(curr, cpu);
 +              task_ctx_sched_in(curr, EVENT_FLEXIBLE);
 +}
 +
 +static int event_enable_on_exec(struct perf_event *event,
 +                              struct perf_event_context *ctx)
 +{
 +      if (!event->attr.enable_on_exec)
 +              return 0;
 +
 +      event->attr.enable_on_exec = 0;
 +      if (event->state >= PERF_EVENT_STATE_INACTIVE)
 +              return 0;
 +
 +      __perf_event_mark_enabled(event, ctx);
 +
 +      return 1;
  }
  
  /*
@@@ -1635,7 -1499,6 +1635,7 @@@ static void perf_event_enable_on_exec(s
        struct perf_event *event;
        unsigned long flags;
        int enabled = 0;
 +      int ret;
  
        local_irq_save(flags);
        ctx = task->perf_event_ctxp;
  
        raw_spin_lock(&ctx->lock);
  
 -      list_for_each_entry(event, &ctx->group_list, group_entry) {
 -              if (!event->attr.enable_on_exec)
 -                      continue;
 -              event->attr.enable_on_exec = 0;
 -              if (event->state >= PERF_EVENT_STATE_INACTIVE)
 -                      continue;
 -              __perf_event_mark_enabled(event, ctx);
 -              enabled = 1;
 +      list_for_each_entry(event, &ctx->pinned_groups, group_entry) {
 +              ret = event_enable_on_exec(event, ctx);
 +              if (ret)
 +                      enabled = 1;
 +      }
 +
 +      list_for_each_entry(event, &ctx->flexible_groups, group_entry) {
 +              ret = event_enable_on_exec(event, ctx);
 +              if (ret)
 +                      enabled = 1;
        }
  
        /*
  
        raw_spin_unlock(&ctx->lock);
  
 -      perf_event_task_sched_in(task, smp_processor_id());
 +      perf_event_task_sched_in(task);
   out:
        local_irq_restore(flags);
  }
@@@ -1729,8 -1590,7 +1729,8 @@@ __perf_event_init_context(struct perf_e
  {
        raw_spin_lock_init(&ctx->lock);
        mutex_init(&ctx->mutex);
 -      INIT_LIST_HEAD(&ctx->group_list);
 +      INIT_LIST_HEAD(&ctx->pinned_groups);
 +      INIT_LIST_HEAD(&ctx->flexible_groups);
        INIT_LIST_HEAD(&ctx->event_list);
        atomic_set(&ctx->refcount, 1);
        ctx->task = task;
@@@ -3408,6 -3268,9 +3408,9 @@@ static void perf_event_task_output(stru
  
  static int perf_event_task_match(struct perf_event *event)
  {
+       if (event->state != PERF_EVENT_STATE_ACTIVE)
+               return 0;
        if (event->cpu != -1 && event->cpu != smp_processor_id())
                return 0;
  
@@@ -3517,6 -3380,9 +3520,9 @@@ static void perf_event_comm_output(stru
  
  static int perf_event_comm_match(struct perf_event *event)
  {
+       if (event->state != PERF_EVENT_STATE_ACTIVE)
+               return 0;
        if (event->cpu != -1 && event->cpu != smp_processor_id())
                return 0;
  
@@@ -3634,6 -3500,9 +3640,9 @@@ static void perf_event_mmap_output(stru
  static int perf_event_mmap_match(struct perf_event *event,
                                   struct perf_mmap_event *mmap_event)
  {
+       if (event->state != PERF_EVENT_STATE_ACTIVE)
+               return 0;
        if (event->cpu != -1 && event->cpu != smp_processor_id())
                return 0;
  
@@@ -3820,12 -3689,12 +3829,12 @@@ static int __perf_event_overflow(struc
  
        if (event->attr.freq) {
                u64 now = perf_clock();
 -              s64 delta = now - hwc->freq_stamp;
 +              s64 delta = now - hwc->freq_time_stamp;
  
 -              hwc->freq_stamp = now;
 +              hwc->freq_time_stamp = now;
  
 -              if (delta > 0 && delta < TICK_NSEC)
 -                      perf_adjust_period(event, NSEC_PER_SEC / (int)delta);
 +              if (delta > 0 && delta < 2*TICK_NSEC)
 +                      perf_adjust_period(event, delta, hwc->last_period);
        }
  
        /*
@@@ -4316,7 -4185,7 +4325,7 @@@ static const struct pmu perf_ops_task_c
        .read           = task_clock_perf_event_read,
  };
  
 -#ifdef CONFIG_EVENT_PROFILE
 +#ifdef CONFIG_EVENT_TRACING
  
  void perf_tp_event(int event_id, u64 addr, u64 count, void *record,
                          int entry_size)
@@@ -4421,7 -4290,7 +4430,7 @@@ static void perf_event_free_filter(stru
  {
  }
  
 -#endif /* CONFIG_EVENT_PROFILE */
 +#endif /* CONFIG_EVENT_TRACING */
  
  #ifdef CONFIG_HAVE_HW_BREAKPOINT
  static void bp_perf_event_destroy(struct perf_event *event)
@@@ -5002,15 -4871,8 +5011,15 @@@ inherit_event(struct perf_event *parent
        else
                child_event->state = PERF_EVENT_STATE_OFF;
  
 -      if (parent_event->attr.freq)
 -              child_event->hw.sample_period = parent_event->hw.sample_period;
 +      if (parent_event->attr.freq) {
 +              u64 sample_period = parent_event->hw.sample_period;
 +              struct hw_perf_event *hwc = &child_event->hw;
 +
 +              hwc->sample_period = sample_period;
 +              hwc->last_period   = sample_period;
 +
 +              atomic64_set(&hwc->period_left, sample_period);
 +      }
  
        child_event->overflow_handler = parent_event->overflow_handler;
  
@@@ -5178,11 -5040,7 +5187,11 @@@ void perf_event_exit_task(struct task_s
        mutex_lock_nested(&child_ctx->mutex, SINGLE_DEPTH_NESTING);
  
  again:
 -      list_for_each_entry_safe(child_event, tmp, &child_ctx->group_list,
 +      list_for_each_entry_safe(child_event, tmp, &child_ctx->pinned_groups,
 +                               group_entry)
 +              __perf_event_exit_task(child_event, child_ctx, child);
 +
 +      list_for_each_entry_safe(child_event, tmp, &child_ctx->flexible_groups,
                                 group_entry)
                __perf_event_exit_task(child_event, child_ctx, child);
  
         * its siblings to the list, but we obtained 'tmp' before that which
         * will still point to the list head terminating the iteration.
         */
 -      if (!list_empty(&child_ctx->group_list))
 +      if (!list_empty(&child_ctx->pinned_groups) ||
 +          !list_empty(&child_ctx->flexible_groups))
                goto again;
  
        mutex_unlock(&child_ctx->mutex);
        put_ctx(child_ctx);
  }
  
 +static void perf_free_event(struct perf_event *event,
 +                          struct perf_event_context *ctx)
 +{
 +      struct perf_event *parent = event->parent;
 +
 +      if (WARN_ON_ONCE(!parent))
 +              return;
 +
 +      mutex_lock(&parent->child_mutex);
 +      list_del_init(&event->child_list);
 +      mutex_unlock(&parent->child_mutex);
 +
 +      fput(parent->filp);
 +
 +      list_del_event(event, ctx);
 +      free_event(event);
 +}
 +
  /*
   * free an unexposed, unused context as created by inheritance by
   * init_task below, used by fork() in case of fail.
@@@ -5232,70 -5071,36 +5241,70 @@@ void perf_event_free_task(struct task_s
  
        mutex_lock(&ctx->mutex);
  again:
 -      list_for_each_entry_safe(event, tmp, &ctx->group_list, group_entry) {
 -              struct perf_event *parent = event->parent;
 +      list_for_each_entry_safe(event, tmp, &ctx->pinned_groups, group_entry)
 +              perf_free_event(event, ctx);
  
 -              if (WARN_ON_ONCE(!parent))
 -                      continue;
 +      list_for_each_entry_safe(event, tmp, &ctx->flexible_groups,
 +                               group_entry)
 +              perf_free_event(event, ctx);
  
 -              mutex_lock(&parent->child_mutex);
 -              list_del_init(&event->child_list);
 -              mutex_unlock(&parent->child_mutex);
 +      if (!list_empty(&ctx->pinned_groups) ||
 +          !list_empty(&ctx->flexible_groups))
 +              goto again;
  
 -              fput(parent->filp);
 +      mutex_unlock(&ctx->mutex);
  
 -              list_del_event(event, ctx);
 -              free_event(event);
 +      put_ctx(ctx);
 +}
 +
 +static int
 +inherit_task_group(struct perf_event *event, struct task_struct *parent,
 +                 struct perf_event_context *parent_ctx,
 +                 struct task_struct *child,
 +                 int *inherited_all)
 +{
 +      int ret;
 +      struct perf_event_context *child_ctx = child->perf_event_ctxp;
 +
 +      if (!event->attr.inherit) {
 +              *inherited_all = 0;
 +              return 0;
        }
  
 -      if (!list_empty(&ctx->group_list))
 -              goto again;
 +      if (!child_ctx) {
 +              /*
 +               * This is executed from the parent task context, so
 +               * inherit events that have been marked for cloning.
 +               * First allocate and initialize a context for the
 +               * child.
 +               */
  
 -      mutex_unlock(&ctx->mutex);
 +              child_ctx = kzalloc(sizeof(struct perf_event_context),
 +                                  GFP_KERNEL);
 +              if (!child_ctx)
 +                      return -ENOMEM;
  
 -      put_ctx(ctx);
 +              __perf_event_init_context(child_ctx, child);
 +              child->perf_event_ctxp = child_ctx;
 +              get_task_struct(child);
 +      }
 +
 +      ret = inherit_group(event, parent, parent_ctx,
 +                          child, child_ctx);
 +
 +      if (ret)
 +              *inherited_all = 0;
 +
 +      return ret;
  }
  
 +
  /*
   * Initialize the perf_event context in task_struct
   */
  int perf_event_init_task(struct task_struct *child)
  {
 -      struct perf_event_context *child_ctx = NULL, *parent_ctx;
 +      struct perf_event_context *child_ctx, *parent_ctx;
        struct perf_event_context *cloned_ctx;
        struct perf_event *event;
        struct task_struct *parent = current;
         * We dont have to disable NMIs - we are only looking at
         * the list, not manipulating it:
         */
 -      list_for_each_entry(event, &parent_ctx->group_list, group_entry) {
 -
 -              if (!event->attr.inherit) {
 -                      inherited_all = 0;
 -                      continue;
 -              }
 -
 -              if (!child->perf_event_ctxp) {
 -                      /*
 -                       * This is executed from the parent task context, so
 -                       * inherit events that have been marked for cloning.
 -                       * First allocate and initialize a context for the
 -                       * child.
 -                       */
 -
 -                      child_ctx = kzalloc(sizeof(struct perf_event_context),
 -                                          GFP_KERNEL);
 -                      if (!child_ctx) {
 -                              ret = -ENOMEM;
 -                              break;
 -                      }
 -
 -                      __perf_event_init_context(child_ctx, child);
 -                      child->perf_event_ctxp = child_ctx;
 -                      get_task_struct(child);
 -              }
 +      list_for_each_entry(event, &parent_ctx->pinned_groups, group_entry) {
 +              ret = inherit_task_group(event, parent, parent_ctx, child,
 +                                       &inherited_all);
 +              if (ret)
 +                      break;
 +      }
  
 -              ret = inherit_group(event, parent, parent_ctx,
 -                                           child, child_ctx);
 -              if (ret) {
 -                      inherited_all = 0;
 +      list_for_each_entry(event, &parent_ctx->flexible_groups, group_entry) {
 +              ret = inherit_task_group(event, parent, parent_ctx, child,
 +                                       &inherited_all);
 +              if (ret)
                        break;
 -              }
        }
  
 +      child_ctx = child->perf_event_ctxp;
 +
        if (child_ctx && inherited_all) {
                /*
                 * Mark the child context as a clone of the parent
@@@ -5397,9 -5221,7 +5406,9 @@@ static void __perf_event_exit_cpu(void 
        struct perf_event_context *ctx = &cpuctx->ctx;
        struct perf_event *event, *tmp;
  
 -      list_for_each_entry_safe(event, tmp, &ctx->group_list, group_entry)
 +      list_for_each_entry_safe(event, tmp, &ctx->pinned_groups, group_entry)
 +              __perf_event_remove_from_context(event);
 +      list_for_each_entry_safe(event, tmp, &ctx->flexible_groups, group_entry)
                __perf_event_remove_from_context(event);
  }
  static void perf_event_exit_cpu(int cpu)
diff --combined kernel/sched.c
@@@ -2783,13 -2783,7 +2783,13 @@@ static void finish_task_switch(struct r
         */
        prev_state = prev->state;
        finish_arch_switch(prev);
 -      perf_event_task_sched_in(current, cpu_of(rq));
 +#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
 +      local_irq_disable();
 +#endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */
 +      perf_event_task_sched_in(current);
 +#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
 +      local_irq_enable();
 +#endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */
        finish_lock_switch(rq, prev);
  
        fire_sched_in_preempt_notifiers(current);
@@@ -5304,7 -5298,7 +5304,7 @@@ void scheduler_tick(void
        curr->sched_class->task_tick(rq, curr, 0);
        raw_spin_unlock(&rq->lock);
  
 -      perf_event_task_tick(curr, cpu);
 +      perf_event_task_tick(curr);
  
  #ifdef CONFIG_SMP
        rq->idle_at_tick = idle_cpu(cpu);
@@@ -5518,7 -5512,7 +5518,7 @@@ need_resched_nonpreemptible
  
        if (likely(prev != next)) {
                sched_info_switch(prev, next);
 -              perf_event_task_sched_out(prev, next, cpu);
 +              perf_event_task_sched_out(prev, next);
  
                rq->nr_switches++;
                rq->curr = next;
  
        post_schedule(rq);
  
-       if (unlikely(reacquire_kernel_lock(current) < 0))
+       if (unlikely(reacquire_kernel_lock(current) < 0)) {
+               prev = rq->curr;
+               switch_count = &prev->nivcsw;
                goto need_resched_nonpreemptible;
+       }
  
        preempt_enable_no_resched();
        if (need_resched())
@@@ -211,8 -211,9 +211,9 @@@ static int filter_pred_pchar(struct fil
  {
        char **addr = (char **)(event + pred->offset);
        int cmp, match;
+       int len = strlen(*addr) + 1;    /* including tailing '\0' */
  
-       cmp = pred->regex.match(*addr, &pred->regex, pred->regex.field_len);
+       cmp = pred->regex.match(*addr, &pred->regex, len);
  
        match = cmp ^ pred->not;
  
@@@ -251,7 -252,18 +252,18 @@@ static int filter_pred_none(struct filt
        return 0;
  }
  
- /* Basic regex callbacks */
+ /*
+  * regex_match_foo - Basic regex callbacks
+  *
+  * @str: the string to be searched
+  * @r:   the regex structure containing the pattern string
+  * @len: the length of the string to be searched (including '\0')
+  *
+  * Note:
+  * - @str might not be NULL-terminated if it's of type DYN_STRING
+  *   or STATIC_STRING
+  */
  static int regex_match_full(char *str, struct regex *r, int len)
  {
        if (strncmp(str, r->pattern, len) == 0)
  
  static int regex_match_front(char *str, struct regex *r, int len)
  {
-       if (strncmp(str, r->pattern, len) == 0)
+       if (strncmp(str, r->pattern, r->len) == 0)
                return 1;
        return 0;
  }
  
  static int regex_match_middle(char *str, struct regex *r, int len)
  {
-       if (strstr(str, r->pattern))
+       if (strnstr(str, r->pattern, len))
                return 1;
        return 0;
  }
  
  static int regex_match_end(char *str, struct regex *r, int len)
  {
-       char *ptr = strstr(str, r->pattern);
+       int strlen = len - 1;
  
-       if (ptr && (ptr[r->len] == 0))
+       if (strlen >= r->len &&
+           memcmp(str + strlen - r->len, r->pattern, r->len) == 0)
                return 1;
        return 0;
  }
@@@ -781,10 -794,8 +794,8 @@@ static int filter_add_pred(struct filte
                        pred->regex.field_len = field->size;
                } else if (field->filter_type == FILTER_DYN_STRING)
                        fn = filter_pred_strloc;
-               else {
+               else
                        fn = filter_pred_pchar;
-                       pred->regex.field_len = strlen(pred->regex.pattern);
-               }
        } else {
                if (field->is_signed)
                        ret = strict_strtoll(pred->regex.pattern, 0, &val);
@@@ -1360,7 -1371,7 +1371,7 @@@ out_unlock
        return err;
  }
  
 -#ifdef CONFIG_EVENT_PROFILE
 +#ifdef CONFIG_PERF_EVENTS
  
  void ftrace_profile_free_filter(struct perf_event *event)
  {
@@@ -1428,5 -1439,5 +1439,5 @@@ out_unlock
        return err;
  }
  
 -#endif /* CONFIG_EVENT_PROFILE */
 +#endif /* CONFIG_PERF_EVENTS */
  
@@@ -92,18 -92,23 +92,18 @@@ static void setup_cpunode_map(void
        if (!dir1)
                return;
  
 -      while (true) {
 -              dent1 = readdir(dir1);
 -              if (!dent1)
 -                      break;
 -
 -              if (sscanf(dent1->d_name, "node%u", &mem) < 1)
 +      while ((dent1 = readdir(dir1)) != NULL) {
 +              if (dent1->d_type != DT_DIR ||
 +                  sscanf(dent1->d_name, "node%u", &mem) < 1)
                        continue;
  
                snprintf(buf, PATH_MAX, "%s/%s", PATH_SYS_NODE, dent1->d_name);
                dir2 = opendir(buf);
                if (!dir2)
                        continue;
 -              while (true) {
 -                      dent2 = readdir(dir2);
 -                      if (!dent2)
 -                              break;
 -                      if (sscanf(dent2->d_name, "cpu%u", &cpu) < 1)
 +              while ((dent2 = readdir(dir2)) != NULL) {
 +                      if (dent2->d_type != DT_LNK ||
 +                          sscanf(dent2->d_name, "cpu%u", &cpu) < 1)
                                continue;
                        cpunode_map[cpu] = mem;
                }
@@@ -316,8 -321,11 +316,8 @@@ static int process_sample_event(event_
  
        event__parse_sample(event, session->sample_type, &data);
  
 -      dump_printf("(IP, %d): %d/%d: %p period: %Ld\n",
 -              event->header.misc,
 -              data.pid, data.tid,
 -              (void *)(long)data.ip,
 -              (long long)data.period);
 +      dump_printf("(IP, %d): %d/%d: %#Lx period: %Ld\n", event->header.misc,
 +                  data.pid, data.tid, data.ip, data.period);
  
        thread = perf_session__findnew(session, event->ip.pid);
        if (thread == NULL) {
        return 0;
  }
  
 -static int sample_type_check(struct perf_session *session)
 -{
 -      if (!(session->sample_type & PERF_SAMPLE_RAW)) {
 -              fprintf(stderr,
 -                      "No trace sample to read. Did you call perf record "
 -                      "without -R?");
 -              return -1;
 -      }
 -
 -      return 0;
 -}
 -
  static struct perf_event_ops event_ops = {
 -      .process_sample_event   = process_sample_event,
 -      .process_comm_event     = event__process_comm,
 -      .sample_type_check      = sample_type_check,
 +      .sample = process_sample_event,
 +      .comm   = event__process_comm,
  };
  
  static double fragmentation(unsigned long n_req, unsigned long n_alloc)
@@@ -354,7 -375,7 +354,7 @@@ static void __print_result(struct rb_ro
  
        printf("%.102s\n", graph_dotted_line);
        printf(" %-34s |",  is_caller ? "Callsite": "Alloc Ptr");
-       printf(" Total_alloc/Per | Total_req/Per   | Hit   | Ping-pong | Frag\n");
+       printf(" Total_alloc/Per | Total_req/Per   | Hit      | Ping-pong | Frag\n");
        printf("%.102s\n", graph_dotted_line);
  
        next = rb_first(root);
                        snprintf(buf, sizeof(buf), "%#Lx", addr);
                printf(" %-34s |", buf);
  
-               printf(" %9llu/%-5lu | %9llu/%-5lu | %6lu | %8lu | %6.3f%%\n",
+               printf(" %9llu/%-5lu | %9llu/%-5lu | %8lu | %8lu | %6.3f%%\n",
                       (unsigned long long)data->bytes_alloc,
                       (unsigned long)data->bytes_alloc / data->hit,
                       (unsigned long long)data->bytes_req,
@@@ -483,14 -504,11 +483,14 @@@ static void sort_result(void
  
  static int __cmd_kmem(void)
  {
 -      int err;
 +      int err = -EINVAL;
        struct perf_session *session = perf_session__new(input_name, O_RDONLY, 0);
        if (session == NULL)
                return -ENOMEM;
  
 +      if (!perf_session__has_traces(session, "kmem record"))
 +              goto out_delete;
 +
        setup_pager();
        err = perf_session__process_events(session, &event_ops);
        if (err != 0)
@@@ -766,7 -784,8 +766,8 @@@ int cmd_kmem(int argc, const char **arg
                        setup_sorting(&alloc_sort, default_sort_order);
  
                return __cmd_kmem();
-       }
+       } else
+               usage_with_options(kmem_usage, kmem_options);
  
        return 0;
  }
@@@ -34,8 -34,6 +34,8 @@@
  static char           const *input_name = "perf.data";
  
  static int            force;
 +static bool           hide_unresolved;
 +static bool           dont_use_callchains;
  
  static int            show_threads;
  static struct perf_read_values        show_threads_values;
@@@ -93,8 -91,11 +93,8 @@@ static int process_sample_event(event_
  
        event__parse_sample(event, session->sample_type, &data);
  
 -      dump_printf("(IP, %d): %d/%d: %p period: %Ld\n",
 -              event->header.misc,
 -              data.pid, data.tid,
 -              (void *)(long)data.ip,
 -              (long long)data.period);
 +      dump_printf("(IP, %d): %d/%d: %#Lx period: %Ld\n", event->header.misc,
 +                  data.pid, data.tid, data.ip, data.period);
  
        if (session->sample_type & PERF_SAMPLE_CALLCHAIN) {
                unsigned int i;
                return -1;
        }
  
 -      if (al.filtered)
 +      if (al.filtered || (hide_unresolved && al.sym == NULL))
                return 0;
  
        if (perf_session__add_hist_entry(session, &al, data.callchain, data.period)) {
@@@ -155,14 -156,14 +155,14 @@@ static int process_read_event(event_t *
        return 0;
  }
  
 -static int sample_type_check(struct perf_session *session)
 +static int perf_session__setup_sample_type(struct perf_session *self)
  {
 -      if (!(session->sample_type & PERF_SAMPLE_CALLCHAIN)) {
 +      if (!(self->sample_type & PERF_SAMPLE_CALLCHAIN)) {
                if (sort__has_parent) {
                        fprintf(stderr, "selected --sort parent, but no"
                                        " callchain data. Did you call"
                                        " perf record without -g?\n");
 -                      return -1;
 +                      return -EINVAL;
                }
                if (symbol_conf.use_callchain) {
                        fprintf(stderr, "selected -g but no callchain data."
                                        " -g?\n");
                        return -1;
                }
 -      } else if (callchain_param.mode != CHAIN_NONE && !symbol_conf.use_callchain) {
 +      } else if (!dont_use_callchains && callchain_param.mode != CHAIN_NONE &&
 +                 !symbol_conf.use_callchain) {
                        symbol_conf.use_callchain = true;
                        if (register_callchain_param(&callchain_param) < 0) {
                                fprintf(stderr, "Can't register callchain"
                                                " params\n");
 -                              return -1;
 +                              return -EINVAL;
                        }
        }
  
  }
  
  static struct perf_event_ops event_ops = {
 -      .process_sample_event   = process_sample_event,
 -      .process_mmap_event     = event__process_mmap,
 -      .process_comm_event     = event__process_comm,
 -      .process_exit_event     = event__process_task,
 -      .process_fork_event     = event__process_task,
 -      .process_lost_event     = event__process_lost,
 -      .process_read_event     = process_read_event,
 -      .sample_type_check      = sample_type_check,
 +      .sample = process_sample_event,
 +      .mmap   = event__process_mmap,
 +      .comm   = event__process_comm,
 +      .exit   = event__process_task,
 +      .fork   = event__process_task,
 +      .lost   = event__process_lost,
 +      .read   = process_read_event,
  };
  
 -
  static int __cmd_report(void)
  {
 -      int ret;
 +      int ret = -EINVAL;
        struct perf_session *session;
  
        session = perf_session__new(input_name, O_RDONLY, force);
        if (show_threads)
                perf_read_values_init(&show_threads_values);
  
 +      ret = perf_session__setup_sample_type(session);
 +      if (ret)
 +              goto out_delete;
 +
        ret = perf_session__process_events(session, &event_ops);
        if (ret)
                goto out_delete;
@@@ -245,19 -243,11 +245,19 @@@ out_delete
  
  static int
  parse_callchain_opt(const struct option *opt __used, const char *arg,
 -                  int unset __used)
 +                  int unset)
  {
        char *tok;
        char *endptr;
  
 +      /*
 +       * --no-call-graph
 +       */
 +      if (unset) {
 +              dont_use_callchains = true;
 +              return 0;
 +      }
 +
        symbol_conf.use_callchain = true;
  
        if (!arg)
  
        else if (!strncmp(tok, "none", strlen(arg))) {
                callchain_param.mode = CHAIN_NONE;
-               symbol_conf.use_callchain = true;
+               symbol_conf.use_callchain = false;
  
                return 0;
        }
@@@ -329,7 -319,7 +329,7 @@@ static const struct option options[] = 
                   "pretty printing style key: normal raw"),
        OPT_STRING('s', "sort", &sort_order, "key[,key2...]",
                   "sort by key(s): pid, comm, dso, symbol, parent"),
 -      OPT_BOOLEAN('P', "full-paths", &event_ops.full_paths,
 +      OPT_BOOLEAN('P', "full-paths", &symbol_conf.full_paths,
                    "Don't shorten the pathnames taking into account the cwd"),
        OPT_STRING('p', "parent", &parent_pattern, "regex",
                   "regex filter to identify parent, see: '--sort parent'"),
        OPT_STRING('t', "field-separator", &symbol_conf.field_sep, "separator",
                   "separator for columns, no spaces will be added between "
                   "columns '.' is reserved."),
 +      OPT_BOOLEAN('U', "hide-unresolved", &hide_unresolved,
 +                  "Only display entries resolved to a symbol"),
        OPT_END()
  };
  
@@@ -280,7 -280,7 +280,7 @@@ static u64 cpus_pstate_state[MAX_CPUS]
  
  static int process_comm_event(event_t *event, struct perf_session *session __used)
  {
-       pid_set_comm(event->comm.pid, event->comm.comm);
+       pid_set_comm(event->comm.tid, event->comm.comm);
        return 0;
  }
  
@@@ -1029,24 -1029,33 +1029,24 @@@ static void process_samples(struct perf
        }
  }
  
 -static int sample_type_check(struct perf_session *session)
 -{
 -      if (!(session->sample_type & PERF_SAMPLE_RAW)) {
 -              fprintf(stderr, "No trace samples found in the file.\n"
 -                              "Have you used 'perf timechart record' to record it?\n");
 -              return -1;
 -      }
 -
 -      return 0;
 -}
 -
  static struct perf_event_ops event_ops = {
 -      .process_comm_event     = process_comm_event,
 -      .process_fork_event     = process_fork_event,
 -      .process_exit_event     = process_exit_event,
 -      .process_sample_event   = queue_sample_event,
 -      .sample_type_check      = sample_type_check,
 +      .comm   = process_comm_event,
 +      .fork   = process_fork_event,
 +      .exit   = process_exit_event,
 +      .sample = queue_sample_event,
  };
  
  static int __cmd_timechart(void)
  {
        struct perf_session *session = perf_session__new(input_name, O_RDONLY, 0);
 -      int ret;
 +      int ret = -EINVAL;
  
        if (session == NULL)
                return -ENOMEM;
  
 +      if (!perf_session__has_traces(session, "timechart record"))
 +              goto out_delete;
 +
        ret = perf_session__process_events(session, &event_ops);
        if (ret)
                goto out_delete;