perf events, x86: Fix Intel Nehalem and Westmere last level cache event definitions
Peter Zijlstra [Fri, 22 Apr 2011 22:57:42 +0000 (00:57 +0200)]
The Intel Nehalem offcore bits implemented in:

  e994d7d23a0b: perf: Fix LLC-* events on Intel Nehalem/Westmere

... are wrong: they implemented _ACCESS as _HIT and counted OTHER_CORE_HIT* as
MISS even though its clearly documented as an L3 hit ...

Fix them and the Westmere definitions as well.

Cc: Andi Kleen <ak@linux.intel.com>
Cc: Lin Ming <ming.m.lin@intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Steven Rostedt <rostedt@goodmis.org>
Link: http://lkml.kernel.org/r/1299119690-13991-3-git-send-email-ming.m.lin@intel.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>

arch/x86/kernel/cpu/perf_event_intel.c

index e61539b..447a28d 100644 (file)
@@ -184,26 +184,23 @@ static __initconst const u64 snb_hw_cache_event_ids
        },
  },
  [ C(LL  ) ] = {
-       /*
-        * TBD: Need Off-core Response Performance Monitoring support
-        */
        [ C(OP_READ) ] = {
-               /* OFFCORE_RESPONSE_0.ANY_DATA.LOCAL_CACHE */
+               /* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */
                [ C(RESULT_ACCESS) ] = 0x01b7,
-               /* OFFCORE_RESPONSE_1.ANY_DATA.ANY_LLC_MISS */
-               [ C(RESULT_MISS)   ] = 0x01bb,
+               /* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */
+               [ C(RESULT_MISS)   ] = 0x01b7,
        },
        [ C(OP_WRITE) ] = {
-               /* OFFCORE_RESPONSE_0.ANY_RFO.LOCAL_CACHE */
+               /* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */
                [ C(RESULT_ACCESS) ] = 0x01b7,
-               /* OFFCORE_RESPONSE_1.ANY_RFO.ANY_LLC_MISS */
-               [ C(RESULT_MISS)   ] = 0x01bb,
+               /* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */
+               [ C(RESULT_MISS)   ] = 0x01b7,
        },
        [ C(OP_PREFETCH) ] = {
-               /* OFFCORE_RESPONSE_0.PREFETCH.LOCAL_CACHE */
+               /* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */
                [ C(RESULT_ACCESS) ] = 0x01b7,
-               /* OFFCORE_RESPONSE_1.PREFETCH.ANY_LLC_MISS */
-               [ C(RESULT_MISS)   ] = 0x01bb,
+               /* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */
+               [ C(RESULT_MISS)   ] = 0x01b7,
        },
  },
  [ C(DTLB) ] = {
@@ -285,26 +282,26 @@ static __initconst const u64 westmere_hw_cache_event_ids
  },
  [ C(LL  ) ] = {
        [ C(OP_READ) ] = {
-               /* OFFCORE_RESPONSE_0.ANY_DATA.LOCAL_CACHE */
+               /* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */
                [ C(RESULT_ACCESS) ] = 0x01b7,
-               /* OFFCORE_RESPONSE_1.ANY_DATA.ANY_LLC_MISS */
-               [ C(RESULT_MISS)   ] = 0x01bb,
+               /* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */
+               [ C(RESULT_MISS)   ] = 0x01b7,
        },
        /*
         * Use RFO, not WRITEBACK, because a write miss would typically occur
         * on RFO.
         */
        [ C(OP_WRITE) ] = {
-               /* OFFCORE_RESPONSE_1.ANY_RFO.LOCAL_CACHE */
-               [ C(RESULT_ACCESS) ] = 0x01bb,
-               /* OFFCORE_RESPONSE_0.ANY_RFO.ANY_LLC_MISS */
+               /* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */
+               [ C(RESULT_ACCESS) ] = 0x01b7,
+               /* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */
                [ C(RESULT_MISS)   ] = 0x01b7,
        },
        [ C(OP_PREFETCH) ] = {
-               /* OFFCORE_RESPONSE_0.PREFETCH.LOCAL_CACHE */
+               /* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */
                [ C(RESULT_ACCESS) ] = 0x01b7,
-               /* OFFCORE_RESPONSE_1.PREFETCH.ANY_LLC_MISS */
-               [ C(RESULT_MISS)   ] = 0x01bb,
+               /* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */
+               [ C(RESULT_MISS)   ] = 0x01b7,
        },
  },
  [ C(DTLB) ] = {
@@ -352,16 +349,36 @@ static __initconst const u64 westmere_hw_cache_event_ids
 };
 
 /*
- * OFFCORE_RESPONSE MSR bits (subset), See IA32 SDM Vol 3 30.6.1.3
+ * Nehalem/Westmere MSR_OFFCORE_RESPONSE bits;
+ * See IA32 SDM Vol 3B 30.6.1.3
  */
 
-#define DMND_DATA_RD     (1 << 0)
-#define DMND_RFO         (1 << 1)
-#define DMND_WB          (1 << 3)
-#define PF_DATA_RD       (1 << 4)
-#define PF_DATA_RFO      (1 << 5)
-#define RESP_UNCORE_HIT  (1 << 8)
-#define RESP_MISS        (0xf600) /* non uncore hit */
+#define NHM_DMND_DATA_RD       (1 << 0)
+#define NHM_DMND_RFO           (1 << 1)
+#define NHM_DMND_IFETCH                (1 << 2)
+#define NHM_DMND_WB            (1 << 3)
+#define NHM_PF_DATA_RD         (1 << 4)
+#define NHM_PF_DATA_RFO                (1 << 5)
+#define NHM_PF_IFETCH          (1 << 6)
+#define NHM_OFFCORE_OTHER      (1 << 7)
+#define NHM_UNCORE_HIT         (1 << 8)
+#define NHM_OTHER_CORE_HIT_SNP (1 << 9)
+#define NHM_OTHER_CORE_HITM    (1 << 10)
+                               /* reserved */
+#define NHM_REMOTE_CACHE_FWD   (1 << 12)
+#define NHM_REMOTE_DRAM                (1 << 13)
+#define NHM_LOCAL_DRAM         (1 << 14)
+#define NHM_NON_DRAM           (1 << 15)
+
+#define NHM_ALL_DRAM           (NHM_REMOTE_DRAM|NHM_LOCAL_DRAM)
+
+#define NHM_DMND_READ          (NHM_DMND_DATA_RD)
+#define NHM_DMND_WRITE         (NHM_DMND_RFO|NHM_DMND_WB)
+#define NHM_DMND_PREFETCH      (NHM_PF_DATA_RD|NHM_PF_DATA_RFO)
+
+#define NHM_L3_HIT     (NHM_UNCORE_HIT|NHM_OTHER_CORE_HIT_SNP|NHM_OTHER_CORE_HITM)
+#define NHM_L3_MISS    (NHM_NON_DRAM|NHM_ALL_DRAM|NHM_REMOTE_CACHE_FWD)
+#define NHM_L3_ACCESS  (NHM_L3_HIT|NHM_L3_MISS)
 
 static __initconst const u64 nehalem_hw_cache_extra_regs
                                [PERF_COUNT_HW_CACHE_MAX]
@@ -370,16 +387,16 @@ static __initconst const u64 nehalem_hw_cache_extra_regs
 {
  [ C(LL  ) ] = {
        [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = DMND_DATA_RD|RESP_UNCORE_HIT,
-               [ C(RESULT_MISS)   ] = DMND_DATA_RD|RESP_MISS,
+               [ C(RESULT_ACCESS) ] = NHM_DMND_READ|NHM_L3_ACCESS,
+               [ C(RESULT_MISS)   ] = NHM_DMND_READ|NHM_L3_MISS,
        },
        [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = DMND_RFO|DMND_WB|RESP_UNCORE_HIT,
-               [ C(RESULT_MISS)   ] = DMND_RFO|DMND_WB|RESP_MISS,
+               [ C(RESULT_ACCESS) ] = NHM_DMND_WRITE|NHM_L3_ACCESS,
+               [ C(RESULT_MISS)   ] = NHM_DMND_WRITE|NHM_L3_MISS,
        },
        [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = PF_DATA_RD|PF_DATA_RFO|RESP_UNCORE_HIT,
-               [ C(RESULT_MISS)   ] = PF_DATA_RD|PF_DATA_RFO|RESP_MISS,
+               [ C(RESULT_ACCESS) ] = NHM_DMND_PREFETCH|NHM_L3_ACCESS,
+               [ C(RESULT_MISS)   ] = NHM_DMND_PREFETCH|NHM_L3_MISS,
        },
  }
 };