5b91992b6b25d3afd6f0e14eb351bc53302a93dc
[linux-2.6.git] / arch / x86 / kernel / cpu / perf_event.c
1 /*
2  * Performance events x86 architecture code
3  *
4  *  Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
5  *  Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar
6  *  Copyright (C) 2009 Jaswinder Singh Rajput
7  *  Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter
8  *  Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
9  *  Copyright (C) 2009 Intel Corporation, <markus.t.metzger@intel.com>
10  *  Copyright (C) 2009 Google, Inc., Stephane Eranian
11  *
12  *  For licencing details see kernel-base/COPYING
13  */
14
15 #include <linux/perf_event.h>
16 #include <linux/capability.h>
17 #include <linux/notifier.h>
18 #include <linux/hardirq.h>
19 #include <linux/kprobes.h>
20 #include <linux/module.h>
21 #include <linux/kdebug.h>
22 #include <linux/sched.h>
23 #include <linux/uaccess.h>
24 #include <linux/highmem.h>
25 #include <linux/cpu.h>
26 #include <linux/bitops.h>
27
28 #include <asm/apic.h>
29 #include <asm/stacktrace.h>
30 #include <asm/nmi.h>
31
32 static u64 perf_event_mask __read_mostly;
33
34 /* The maximal number of PEBS events: */
35 #define MAX_PEBS_EVENTS 4
36
37 /* The size of a BTS record in bytes: */
38 #define BTS_RECORD_SIZE         24
39
40 /* The size of a per-cpu BTS buffer in bytes: */
41 #define BTS_BUFFER_SIZE         (BTS_RECORD_SIZE * 2048)
42
43 /* The BTS overflow threshold in bytes from the end of the buffer: */
44 #define BTS_OVFL_TH             (BTS_RECORD_SIZE * 128)
45
46
47 /*
48  * Bits in the debugctlmsr controlling branch tracing.
49  */
50 #define X86_DEBUGCTL_TR                 (1 << 6)
51 #define X86_DEBUGCTL_BTS                (1 << 7)
52 #define X86_DEBUGCTL_BTINT              (1 << 8)
53 #define X86_DEBUGCTL_BTS_OFF_OS         (1 << 9)
54 #define X86_DEBUGCTL_BTS_OFF_USR        (1 << 10)
55
56 /*
57  * A debug store configuration.
58  *
59  * We only support architectures that use 64bit fields.
60  */
61 struct debug_store {
62         u64     bts_buffer_base;
63         u64     bts_index;
64         u64     bts_absolute_maximum;
65         u64     bts_interrupt_threshold;
66         u64     pebs_buffer_base;
67         u64     pebs_index;
68         u64     pebs_absolute_maximum;
69         u64     pebs_interrupt_threshold;
70         u64     pebs_event_reset[MAX_PEBS_EVENTS];
71 };
72
73 struct event_constraint {
74         union {
75                 unsigned long   idxmsk[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
76                 u64             idxmsk64[1];
77         };
78         int     code;
79         int     cmask;
80         int     weight;
81 };
82
83 struct cpu_hw_events {
84         struct perf_event       *events[X86_PMC_IDX_MAX]; /* in counter order */
85         unsigned long           active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
86         unsigned long           interrupts;
87         int                     enabled;
88         struct debug_store      *ds;
89
90         int                     n_events;
91         int                     n_added;
92         int                     assign[X86_PMC_IDX_MAX]; /* event to counter assignment */
93         struct perf_event       *event_list[X86_PMC_IDX_MAX]; /* in enabled order */
94 };
95
96 #define EVENT_CONSTRAINT(c, n, m) {     \
97         { .idxmsk64[0] = (n) },         \
98         .code = (c),                    \
99         .cmask = (m),                   \
100         .weight = HWEIGHT64((u64)(n)),  \
101 }
102
103 #define INTEL_EVENT_CONSTRAINT(c, n)    \
104         EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVTSEL_MASK)
105
106 #define FIXED_EVENT_CONSTRAINT(c, n)    \
107         EVENT_CONSTRAINT(c, n, INTEL_ARCH_FIXED_MASK)
108
109 #define EVENT_CONSTRAINT_END            \
110         EVENT_CONSTRAINT(0, 0, 0)
111
112 #define for_each_event_constraint(e, c) \
113         for ((e) = (c); (e)->cmask; (e)++)
114
115 /*
116  * struct x86_pmu - generic x86 pmu
117  */
118 struct x86_pmu {
119         const char      *name;
120         int             version;
121         int             (*handle_irq)(struct pt_regs *);
122         void            (*disable_all)(void);
123         void            (*enable_all)(void);
124         void            (*enable)(struct hw_perf_event *, int);
125         void            (*disable)(struct hw_perf_event *, int);
126         unsigned        eventsel;
127         unsigned        perfctr;
128         u64             (*event_map)(int);
129         u64             (*raw_event)(u64);
130         int             max_events;
131         int             num_events;
132         int             num_events_fixed;
133         int             event_bits;
134         u64             event_mask;
135         int             apic;
136         u64             max_period;
137         u64             intel_ctrl;
138         void            (*enable_bts)(u64 config);
139         void            (*disable_bts)(void);
140
141         struct event_constraint *
142                         (*get_event_constraints)(struct cpu_hw_events *cpuc,
143                                                  struct perf_event *event);
144
145         void            (*put_event_constraints)(struct cpu_hw_events *cpuc,
146                                                  struct perf_event *event);
147         struct event_constraint *event_constraints;
148 };
149
150 static struct x86_pmu x86_pmu __read_mostly;
151
152 static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = {
153         .enabled = 1,
154 };
155
156 static int x86_perf_event_set_period(struct perf_event *event,
157                              struct hw_perf_event *hwc, int idx);
158
159 /*
160  * Not sure about some of these
161  */
162 static const u64 p6_perfmon_event_map[] =
163 {
164   [PERF_COUNT_HW_CPU_CYCLES]            = 0x0079,
165   [PERF_COUNT_HW_INSTRUCTIONS]          = 0x00c0,
166   [PERF_COUNT_HW_CACHE_REFERENCES]      = 0x0f2e,
167   [PERF_COUNT_HW_CACHE_MISSES]          = 0x012e,
168   [PERF_COUNT_HW_BRANCH_INSTRUCTIONS]   = 0x00c4,
169   [PERF_COUNT_HW_BRANCH_MISSES]         = 0x00c5,
170   [PERF_COUNT_HW_BUS_CYCLES]            = 0x0062,
171 };
172
173 static u64 p6_pmu_event_map(int hw_event)
174 {
175         return p6_perfmon_event_map[hw_event];
176 }
177
178 /*
179  * Event setting that is specified not to count anything.
180  * We use this to effectively disable a counter.
181  *
182  * L2_RQSTS with 0 MESI unit mask.
183  */
184 #define P6_NOP_EVENT                    0x0000002EULL
185
186 static u64 p6_pmu_raw_event(u64 hw_event)
187 {
188 #define P6_EVNTSEL_EVENT_MASK           0x000000FFULL
189 #define P6_EVNTSEL_UNIT_MASK            0x0000FF00ULL
190 #define P6_EVNTSEL_EDGE_MASK            0x00040000ULL
191 #define P6_EVNTSEL_INV_MASK             0x00800000ULL
192 #define P6_EVNTSEL_REG_MASK             0xFF000000ULL
193
194 #define P6_EVNTSEL_MASK                 \
195         (P6_EVNTSEL_EVENT_MASK |        \
196          P6_EVNTSEL_UNIT_MASK  |        \
197          P6_EVNTSEL_EDGE_MASK  |        \
198          P6_EVNTSEL_INV_MASK   |        \
199          P6_EVNTSEL_REG_MASK)
200
201         return hw_event & P6_EVNTSEL_MASK;
202 }
203
204 static struct event_constraint intel_p6_event_constraints[] =
205 {
206         INTEL_EVENT_CONSTRAINT(0xc1, 0x1),      /* FLOPS */
207         INTEL_EVENT_CONSTRAINT(0x10, 0x1),      /* FP_COMP_OPS_EXE */
208         INTEL_EVENT_CONSTRAINT(0x11, 0x1),      /* FP_ASSIST */
209         INTEL_EVENT_CONSTRAINT(0x12, 0x2),      /* MUL */
210         INTEL_EVENT_CONSTRAINT(0x13, 0x2),      /* DIV */
211         INTEL_EVENT_CONSTRAINT(0x14, 0x1),      /* CYCLES_DIV_BUSY */
212         EVENT_CONSTRAINT_END
213 };
214
215 /*
216  * Intel PerfMon v3. Used on Core2 and later.
217  */
218 static const u64 intel_perfmon_event_map[] =
219 {
220   [PERF_COUNT_HW_CPU_CYCLES]            = 0x003c,
221   [PERF_COUNT_HW_INSTRUCTIONS]          = 0x00c0,
222   [PERF_COUNT_HW_CACHE_REFERENCES]      = 0x4f2e,
223   [PERF_COUNT_HW_CACHE_MISSES]          = 0x412e,
224   [PERF_COUNT_HW_BRANCH_INSTRUCTIONS]   = 0x00c4,
225   [PERF_COUNT_HW_BRANCH_MISSES]         = 0x00c5,
226   [PERF_COUNT_HW_BUS_CYCLES]            = 0x013c,
227 };
228
229 static struct event_constraint intel_core_event_constraints[] =
230 {
231         INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */
232         INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */
233         INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */
234         INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */
235         INTEL_EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */
236         INTEL_EVENT_CONSTRAINT(0xc1, 0x1), /* FP_COMP_INSTR_RET */
237         EVENT_CONSTRAINT_END
238 };
239
240 static struct event_constraint intel_core2_event_constraints[] =
241 {
242         FIXED_EVENT_CONSTRAINT(0xc0, (0x3|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */
243         FIXED_EVENT_CONSTRAINT(0x3c, (0x3|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */
244         INTEL_EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */
245         INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */
246         INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */
247         INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */
248         INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */
249         INTEL_EVENT_CONSTRAINT(0x18, 0x1), /* IDLE_DURING_DIV */
250         INTEL_EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */
251         INTEL_EVENT_CONSTRAINT(0xa1, 0x1), /* RS_UOPS_DISPATCH_CYCLES */
252         INTEL_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED */
253         EVENT_CONSTRAINT_END
254 };
255
256 static struct event_constraint intel_nehalem_event_constraints[] =
257 {
258         FIXED_EVENT_CONSTRAINT(0xc0, (0xf|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */
259         FIXED_EVENT_CONSTRAINT(0x3c, (0xf|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */
260         INTEL_EVENT_CONSTRAINT(0x40, 0x3), /* L1D_CACHE_LD */
261         INTEL_EVENT_CONSTRAINT(0x41, 0x3), /* L1D_CACHE_ST */
262         INTEL_EVENT_CONSTRAINT(0x42, 0x3), /* L1D_CACHE_LOCK */
263         INTEL_EVENT_CONSTRAINT(0x43, 0x3), /* L1D_ALL_REF */
264         INTEL_EVENT_CONSTRAINT(0x48, 0x3), /* L1D_PEND_MISS */
265         INTEL_EVENT_CONSTRAINT(0x4e, 0x3), /* L1D_PREFETCH */
266         INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */
267         INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */
268         EVENT_CONSTRAINT_END
269 };
270
271 static struct event_constraint intel_westmere_event_constraints[] =
272 {
273         FIXED_EVENT_CONSTRAINT(0xc0, (0xf|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */
274         FIXED_EVENT_CONSTRAINT(0x3c, (0xf|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */
275         INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */
276         INTEL_EVENT_CONSTRAINT(0x60, 0x1), /* OFFCORE_REQUESTS_OUTSTANDING */
277         INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */
278         EVENT_CONSTRAINT_END
279 };
280
281 static struct event_constraint intel_gen_event_constraints[] =
282 {
283         FIXED_EVENT_CONSTRAINT(0xc0, (0x3|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */
284         FIXED_EVENT_CONSTRAINT(0x3c, (0x3|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */
285         EVENT_CONSTRAINT_END
286 };
287
288 static u64 intel_pmu_event_map(int hw_event)
289 {
290         return intel_perfmon_event_map[hw_event];
291 }
292
293 /*
294  * Generalized hw caching related hw_event table, filled
295  * in on a per model basis. A value of 0 means
296  * 'not supported', -1 means 'hw_event makes no sense on
297  * this CPU', any other value means the raw hw_event
298  * ID.
299  */
300
301 #define C(x) PERF_COUNT_HW_CACHE_##x
302
303 static u64 __read_mostly hw_cache_event_ids
304                                 [PERF_COUNT_HW_CACHE_MAX]
305                                 [PERF_COUNT_HW_CACHE_OP_MAX]
306                                 [PERF_COUNT_HW_CACHE_RESULT_MAX];
307
308 static __initconst u64 westmere_hw_cache_event_ids
309                                 [PERF_COUNT_HW_CACHE_MAX]
310                                 [PERF_COUNT_HW_CACHE_OP_MAX]
311                                 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
312 {
313  [ C(L1D) ] = {
314         [ C(OP_READ) ] = {
315                 [ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS       */
316                 [ C(RESULT_MISS)   ] = 0x0151, /* L1D.REPL                     */
317         },
318         [ C(OP_WRITE) ] = {
319                 [ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES      */
320                 [ C(RESULT_MISS)   ] = 0x0251, /* L1D.M_REPL                   */
321         },
322         [ C(OP_PREFETCH) ] = {
323                 [ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS        */
324                 [ C(RESULT_MISS)   ] = 0x024e, /* L1D_PREFETCH.MISS            */
325         },
326  },
327  [ C(L1I ) ] = {
328         [ C(OP_READ) ] = {
329                 [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS                    */
330                 [ C(RESULT_MISS)   ] = 0x0280, /* L1I.MISSES                   */
331         },
332         [ C(OP_WRITE) ] = {
333                 [ C(RESULT_ACCESS) ] = -1,
334                 [ C(RESULT_MISS)   ] = -1,
335         },
336         [ C(OP_PREFETCH) ] = {
337                 [ C(RESULT_ACCESS) ] = 0x0,
338                 [ C(RESULT_MISS)   ] = 0x0,
339         },
340  },
341  [ C(LL  ) ] = {
342         [ C(OP_READ) ] = {
343                 [ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS               */
344                 [ C(RESULT_MISS)   ] = 0x0224, /* L2_RQSTS.LD_MISS             */
345         },
346         [ C(OP_WRITE) ] = {
347                 [ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS                */
348                 [ C(RESULT_MISS)   ] = 0x0824, /* L2_RQSTS.RFO_MISS            */
349         },
350         [ C(OP_PREFETCH) ] = {
351                 [ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference                */
352                 [ C(RESULT_MISS)   ] = 0x412e, /* LLC Misses                   */
353         },
354  },
355  [ C(DTLB) ] = {
356         [ C(OP_READ) ] = {
357                 [ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS       */
358                 [ C(RESULT_MISS)   ] = 0x0108, /* DTLB_LOAD_MISSES.ANY         */
359         },
360         [ C(OP_WRITE) ] = {
361                 [ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES      */
362                 [ C(RESULT_MISS)   ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS  */
363         },
364         [ C(OP_PREFETCH) ] = {
365                 [ C(RESULT_ACCESS) ] = 0x0,
366                 [ C(RESULT_MISS)   ] = 0x0,
367         },
368  },
369  [ C(ITLB) ] = {
370         [ C(OP_READ) ] = {
371                 [ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P           */
372                 [ C(RESULT_MISS)   ] = 0x0185, /* ITLB_MISSES.ANY              */
373         },
374         [ C(OP_WRITE) ] = {
375                 [ C(RESULT_ACCESS) ] = -1,
376                 [ C(RESULT_MISS)   ] = -1,
377         },
378         [ C(OP_PREFETCH) ] = {
379                 [ C(RESULT_ACCESS) ] = -1,
380                 [ C(RESULT_MISS)   ] = -1,
381         },
382  },
383  [ C(BPU ) ] = {
384         [ C(OP_READ) ] = {
385                 [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */
386                 [ C(RESULT_MISS)   ] = 0x03e8, /* BPU_CLEARS.ANY               */
387         },
388         [ C(OP_WRITE) ] = {
389                 [ C(RESULT_ACCESS) ] = -1,
390                 [ C(RESULT_MISS)   ] = -1,
391         },
392         [ C(OP_PREFETCH) ] = {
393                 [ C(RESULT_ACCESS) ] = -1,
394                 [ C(RESULT_MISS)   ] = -1,
395         },
396  },
397 };
398
399 static __initconst u64 nehalem_hw_cache_event_ids
400                                 [PERF_COUNT_HW_CACHE_MAX]
401                                 [PERF_COUNT_HW_CACHE_OP_MAX]
402                                 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
403 {
404  [ C(L1D) ] = {
405         [ C(OP_READ) ] = {
406                 [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI            */
407                 [ C(RESULT_MISS)   ] = 0x0140, /* L1D_CACHE_LD.I_STATE         */
408         },
409         [ C(OP_WRITE) ] = {
410                 [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI            */
411                 [ C(RESULT_MISS)   ] = 0x0141, /* L1D_CACHE_ST.I_STATE         */
412         },
413         [ C(OP_PREFETCH) ] = {
414                 [ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS        */
415                 [ C(RESULT_MISS)   ] = 0x024e, /* L1D_PREFETCH.MISS            */
416         },
417  },
418  [ C(L1I ) ] = {
419         [ C(OP_READ) ] = {
420                 [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS                    */
421                 [ C(RESULT_MISS)   ] = 0x0280, /* L1I.MISSES                   */
422         },
423         [ C(OP_WRITE) ] = {
424                 [ C(RESULT_ACCESS) ] = -1,
425                 [ C(RESULT_MISS)   ] = -1,
426         },
427         [ C(OP_PREFETCH) ] = {
428                 [ C(RESULT_ACCESS) ] = 0x0,
429                 [ C(RESULT_MISS)   ] = 0x0,
430         },
431  },
432  [ C(LL  ) ] = {
433         [ C(OP_READ) ] = {
434                 [ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS               */
435                 [ C(RESULT_MISS)   ] = 0x0224, /* L2_RQSTS.LD_MISS             */
436         },
437         [ C(OP_WRITE) ] = {
438                 [ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS                */
439                 [ C(RESULT_MISS)   ] = 0x0824, /* L2_RQSTS.RFO_MISS            */
440         },
441         [ C(OP_PREFETCH) ] = {
442                 [ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference                */
443                 [ C(RESULT_MISS)   ] = 0x412e, /* LLC Misses                   */
444         },
445  },
446  [ C(DTLB) ] = {
447         [ C(OP_READ) ] = {
448                 [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI   (alias)  */
449                 [ C(RESULT_MISS)   ] = 0x0108, /* DTLB_LOAD_MISSES.ANY         */
450         },
451         [ C(OP_WRITE) ] = {
452                 [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI   (alias)  */
453                 [ C(RESULT_MISS)   ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS  */
454         },
455         [ C(OP_PREFETCH) ] = {
456                 [ C(RESULT_ACCESS) ] = 0x0,
457                 [ C(RESULT_MISS)   ] = 0x0,
458         },
459  },
460  [ C(ITLB) ] = {
461         [ C(OP_READ) ] = {
462                 [ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P           */
463                 [ C(RESULT_MISS)   ] = 0x20c8, /* ITLB_MISS_RETIRED            */
464         },
465         [ C(OP_WRITE) ] = {
466                 [ C(RESULT_ACCESS) ] = -1,
467                 [ C(RESULT_MISS)   ] = -1,
468         },
469         [ C(OP_PREFETCH) ] = {
470                 [ C(RESULT_ACCESS) ] = -1,
471                 [ C(RESULT_MISS)   ] = -1,
472         },
473  },
474  [ C(BPU ) ] = {
475         [ C(OP_READ) ] = {
476                 [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */
477                 [ C(RESULT_MISS)   ] = 0x03e8, /* BPU_CLEARS.ANY               */
478         },
479         [ C(OP_WRITE) ] = {
480                 [ C(RESULT_ACCESS) ] = -1,
481                 [ C(RESULT_MISS)   ] = -1,
482         },
483         [ C(OP_PREFETCH) ] = {
484                 [ C(RESULT_ACCESS) ] = -1,
485                 [ C(RESULT_MISS)   ] = -1,
486         },
487  },
488 };
489
490 static __initconst u64 core2_hw_cache_event_ids
491                                 [PERF_COUNT_HW_CACHE_MAX]
492                                 [PERF_COUNT_HW_CACHE_OP_MAX]
493                                 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
494 {
495  [ C(L1D) ] = {
496         [ C(OP_READ) ] = {
497                 [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI          */
498                 [ C(RESULT_MISS)   ] = 0x0140, /* L1D_CACHE_LD.I_STATE       */
499         },
500         [ C(OP_WRITE) ] = {
501                 [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI          */
502                 [ C(RESULT_MISS)   ] = 0x0141, /* L1D_CACHE_ST.I_STATE       */
503         },
504         [ C(OP_PREFETCH) ] = {
505                 [ C(RESULT_ACCESS) ] = 0x104e, /* L1D_PREFETCH.REQUESTS      */
506                 [ C(RESULT_MISS)   ] = 0,
507         },
508  },
509  [ C(L1I ) ] = {
510         [ C(OP_READ) ] = {
511                 [ C(RESULT_ACCESS) ] = 0x0080, /* L1I.READS                  */
512                 [ C(RESULT_MISS)   ] = 0x0081, /* L1I.MISSES                 */
513         },
514         [ C(OP_WRITE) ] = {
515                 [ C(RESULT_ACCESS) ] = -1,
516                 [ C(RESULT_MISS)   ] = -1,
517         },
518         [ C(OP_PREFETCH) ] = {
519                 [ C(RESULT_ACCESS) ] = 0,
520                 [ C(RESULT_MISS)   ] = 0,
521         },
522  },
523  [ C(LL  ) ] = {
524         [ C(OP_READ) ] = {
525                 [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI                 */
526                 [ C(RESULT_MISS)   ] = 0x4129, /* L2_LD.ISTATE               */
527         },
528         [ C(OP_WRITE) ] = {
529                 [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI                 */
530                 [ C(RESULT_MISS)   ] = 0x412A, /* L2_ST.ISTATE               */
531         },
532         [ C(OP_PREFETCH) ] = {
533                 [ C(RESULT_ACCESS) ] = 0,
534                 [ C(RESULT_MISS)   ] = 0,
535         },
536  },
537  [ C(DTLB) ] = {
538         [ C(OP_READ) ] = {
539                 [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI  (alias) */
540                 [ C(RESULT_MISS)   ] = 0x0208, /* DTLB_MISSES.MISS_LD        */
541         },
542         [ C(OP_WRITE) ] = {
543                 [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI  (alias) */
544                 [ C(RESULT_MISS)   ] = 0x0808, /* DTLB_MISSES.MISS_ST        */
545         },
546         [ C(OP_PREFETCH) ] = {
547                 [ C(RESULT_ACCESS) ] = 0,
548                 [ C(RESULT_MISS)   ] = 0,
549         },
550  },
551  [ C(ITLB) ] = {
552         [ C(OP_READ) ] = {
553                 [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P         */
554                 [ C(RESULT_MISS)   ] = 0x1282, /* ITLBMISSES                 */
555         },
556         [ C(OP_WRITE) ] = {
557                 [ C(RESULT_ACCESS) ] = -1,
558                 [ C(RESULT_MISS)   ] = -1,
559         },
560         [ C(OP_PREFETCH) ] = {
561                 [ C(RESULT_ACCESS) ] = -1,
562                 [ C(RESULT_MISS)   ] = -1,
563         },
564  },
565  [ C(BPU ) ] = {
566         [ C(OP_READ) ] = {
567                 [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY        */
568                 [ C(RESULT_MISS)   ] = 0x00c5, /* BP_INST_RETIRED.MISPRED    */
569         },
570         [ C(OP_WRITE) ] = {
571                 [ C(RESULT_ACCESS) ] = -1,
572                 [ C(RESULT_MISS)   ] = -1,
573         },
574         [ C(OP_PREFETCH) ] = {
575                 [ C(RESULT_ACCESS) ] = -1,
576                 [ C(RESULT_MISS)   ] = -1,
577         },
578  },
579 };
580
581 static __initconst u64 atom_hw_cache_event_ids
582                                 [PERF_COUNT_HW_CACHE_MAX]
583                                 [PERF_COUNT_HW_CACHE_OP_MAX]
584                                 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
585 {
586  [ C(L1D) ] = {
587         [ C(OP_READ) ] = {
588                 [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE.LD               */
589                 [ C(RESULT_MISS)   ] = 0,
590         },
591         [ C(OP_WRITE) ] = {
592                 [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE.ST               */
593                 [ C(RESULT_MISS)   ] = 0,
594         },
595         [ C(OP_PREFETCH) ] = {
596                 [ C(RESULT_ACCESS) ] = 0x0,
597                 [ C(RESULT_MISS)   ] = 0,
598         },
599  },
600  [ C(L1I ) ] = {
601         [ C(OP_READ) ] = {
602                 [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS                  */
603                 [ C(RESULT_MISS)   ] = 0x0280, /* L1I.MISSES                 */
604         },
605         [ C(OP_WRITE) ] = {
606                 [ C(RESULT_ACCESS) ] = -1,
607                 [ C(RESULT_MISS)   ] = -1,
608         },
609         [ C(OP_PREFETCH) ] = {
610                 [ C(RESULT_ACCESS) ] = 0,
611                 [ C(RESULT_MISS)   ] = 0,
612         },
613  },
614  [ C(LL  ) ] = {
615         [ C(OP_READ) ] = {
616                 [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI                 */
617                 [ C(RESULT_MISS)   ] = 0x4129, /* L2_LD.ISTATE               */
618         },
619         [ C(OP_WRITE) ] = {
620                 [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI                 */
621                 [ C(RESULT_MISS)   ] = 0x412A, /* L2_ST.ISTATE               */
622         },
623         [ C(OP_PREFETCH) ] = {
624                 [ C(RESULT_ACCESS) ] = 0,
625                 [ C(RESULT_MISS)   ] = 0,
626         },
627  },
628  [ C(DTLB) ] = {
629         [ C(OP_READ) ] = {
630                 [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE_LD.MESI  (alias) */
631                 [ C(RESULT_MISS)   ] = 0x0508, /* DTLB_MISSES.MISS_LD        */
632         },
633         [ C(OP_WRITE) ] = {
634                 [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE_ST.MESI  (alias) */
635                 [ C(RESULT_MISS)   ] = 0x0608, /* DTLB_MISSES.MISS_ST        */
636         },
637         [ C(OP_PREFETCH) ] = {
638                 [ C(RESULT_ACCESS) ] = 0,
639                 [ C(RESULT_MISS)   ] = 0,
640         },
641  },
642  [ C(ITLB) ] = {
643         [ C(OP_READ) ] = {
644                 [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P         */
645                 [ C(RESULT_MISS)   ] = 0x0282, /* ITLB.MISSES                */
646         },
647         [ C(OP_WRITE) ] = {
648                 [ C(RESULT_ACCESS) ] = -1,
649                 [ C(RESULT_MISS)   ] = -1,
650         },
651         [ C(OP_PREFETCH) ] = {
652                 [ C(RESULT_ACCESS) ] = -1,
653                 [ C(RESULT_MISS)   ] = -1,
654         },
655  },
656  [ C(BPU ) ] = {
657         [ C(OP_READ) ] = {
658                 [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY        */
659                 [ C(RESULT_MISS)   ] = 0x00c5, /* BP_INST_RETIRED.MISPRED    */
660         },
661         [ C(OP_WRITE) ] = {
662                 [ C(RESULT_ACCESS) ] = -1,
663                 [ C(RESULT_MISS)   ] = -1,
664         },
665         [ C(OP_PREFETCH) ] = {
666                 [ C(RESULT_ACCESS) ] = -1,
667                 [ C(RESULT_MISS)   ] = -1,
668         },
669  },
670 };
671
672 static u64 intel_pmu_raw_event(u64 hw_event)
673 {
674 #define CORE_EVNTSEL_EVENT_MASK         0x000000FFULL
675 #define CORE_EVNTSEL_UNIT_MASK          0x0000FF00ULL
676 #define CORE_EVNTSEL_EDGE_MASK          0x00040000ULL
677 #define CORE_EVNTSEL_INV_MASK           0x00800000ULL
678 #define CORE_EVNTSEL_REG_MASK           0xFF000000ULL
679
680 #define CORE_EVNTSEL_MASK               \
681         (INTEL_ARCH_EVTSEL_MASK |       \
682          INTEL_ARCH_UNIT_MASK   |       \
683          INTEL_ARCH_EDGE_MASK   |       \
684          INTEL_ARCH_INV_MASK    |       \
685          INTEL_ARCH_CNT_MASK)
686
687         return hw_event & CORE_EVNTSEL_MASK;
688 }
689
690 static __initconst u64 amd_hw_cache_event_ids
691                                 [PERF_COUNT_HW_CACHE_MAX]
692                                 [PERF_COUNT_HW_CACHE_OP_MAX]
693                                 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
694 {
695  [ C(L1D) ] = {
696         [ C(OP_READ) ] = {
697                 [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses        */
698                 [ C(RESULT_MISS)   ] = 0x0041, /* Data Cache Misses          */
699         },
700         [ C(OP_WRITE) ] = {
701                 [ C(RESULT_ACCESS) ] = 0x0142, /* Data Cache Refills :system */
702                 [ C(RESULT_MISS)   ] = 0,
703         },
704         [ C(OP_PREFETCH) ] = {
705                 [ C(RESULT_ACCESS) ] = 0x0267, /* Data Prefetcher :attempts  */
706                 [ C(RESULT_MISS)   ] = 0x0167, /* Data Prefetcher :cancelled */
707         },
708  },
709  [ C(L1I ) ] = {
710         [ C(OP_READ) ] = {
711                 [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction cache fetches  */
712                 [ C(RESULT_MISS)   ] = 0x0081, /* Instruction cache misses   */
713         },
714         [ C(OP_WRITE) ] = {
715                 [ C(RESULT_ACCESS) ] = -1,
716                 [ C(RESULT_MISS)   ] = -1,
717         },
718         [ C(OP_PREFETCH) ] = {
719                 [ C(RESULT_ACCESS) ] = 0x014B, /* Prefetch Instructions :Load */
720                 [ C(RESULT_MISS)   ] = 0,
721         },
722  },
723  [ C(LL  ) ] = {
724         [ C(OP_READ) ] = {
725                 [ C(RESULT_ACCESS) ] = 0x037D, /* Requests to L2 Cache :IC+DC */
726                 [ C(RESULT_MISS)   ] = 0x037E, /* L2 Cache Misses : IC+DC     */
727         },
728         [ C(OP_WRITE) ] = {
729                 [ C(RESULT_ACCESS) ] = 0x017F, /* L2 Fill/Writeback           */
730                 [ C(RESULT_MISS)   ] = 0,
731         },
732         [ C(OP_PREFETCH) ] = {
733                 [ C(RESULT_ACCESS) ] = 0,
734                 [ C(RESULT_MISS)   ] = 0,
735         },
736  },
737  [ C(DTLB) ] = {
738         [ C(OP_READ) ] = {
739                 [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses        */
740                 [ C(RESULT_MISS)   ] = 0x0046, /* L1 DTLB and L2 DLTB Miss   */
741         },
742         [ C(OP_WRITE) ] = {
743                 [ C(RESULT_ACCESS) ] = 0,
744                 [ C(RESULT_MISS)   ] = 0,
745         },
746         [ C(OP_PREFETCH) ] = {
747                 [ C(RESULT_ACCESS) ] = 0,
748                 [ C(RESULT_MISS)   ] = 0,
749         },
750  },
751  [ C(ITLB) ] = {
752         [ C(OP_READ) ] = {
753                 [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction fecthes        */
754                 [ C(RESULT_MISS)   ] = 0x0085, /* Instr. fetch ITLB misses   */
755         },
756         [ C(OP_WRITE) ] = {
757                 [ C(RESULT_ACCESS) ] = -1,
758                 [ C(RESULT_MISS)   ] = -1,
759         },
760         [ C(OP_PREFETCH) ] = {
761                 [ C(RESULT_ACCESS) ] = -1,
762                 [ C(RESULT_MISS)   ] = -1,
763         },
764  },
765  [ C(BPU ) ] = {
766         [ C(OP_READ) ] = {
767                 [ C(RESULT_ACCESS) ] = 0x00c2, /* Retired Branch Instr.      */
768                 [ C(RESULT_MISS)   ] = 0x00c3, /* Retired Mispredicted BI    */
769         },
770         [ C(OP_WRITE) ] = {
771                 [ C(RESULT_ACCESS) ] = -1,
772                 [ C(RESULT_MISS)   ] = -1,
773         },
774         [ C(OP_PREFETCH) ] = {
775                 [ C(RESULT_ACCESS) ] = -1,
776                 [ C(RESULT_MISS)   ] = -1,
777         },
778  },
779 };
780
781 /*
782  * AMD Performance Monitor K7 and later.
783  */
784 static const u64 amd_perfmon_event_map[] =
785 {
786   [PERF_COUNT_HW_CPU_CYCLES]            = 0x0076,
787   [PERF_COUNT_HW_INSTRUCTIONS]          = 0x00c0,
788   [PERF_COUNT_HW_CACHE_REFERENCES]      = 0x0080,
789   [PERF_COUNT_HW_CACHE_MISSES]          = 0x0081,
790   [PERF_COUNT_HW_BRANCH_INSTRUCTIONS]   = 0x00c4,
791   [PERF_COUNT_HW_BRANCH_MISSES]         = 0x00c5,
792 };
793
794 static u64 amd_pmu_event_map(int hw_event)
795 {
796         return amd_perfmon_event_map[hw_event];
797 }
798
799 static u64 amd_pmu_raw_event(u64 hw_event)
800 {
801 #define K7_EVNTSEL_EVENT_MASK   0x7000000FFULL
802 #define K7_EVNTSEL_UNIT_MASK    0x00000FF00ULL
803 #define K7_EVNTSEL_EDGE_MASK    0x000040000ULL
804 #define K7_EVNTSEL_INV_MASK     0x000800000ULL
805 #define K7_EVNTSEL_REG_MASK     0x0FF000000ULL
806
807 #define K7_EVNTSEL_MASK                 \
808         (K7_EVNTSEL_EVENT_MASK |        \
809          K7_EVNTSEL_UNIT_MASK  |        \
810          K7_EVNTSEL_EDGE_MASK  |        \
811          K7_EVNTSEL_INV_MASK   |        \
812          K7_EVNTSEL_REG_MASK)
813
814         return hw_event & K7_EVNTSEL_MASK;
815 }
816
817 /*
818  * Propagate event elapsed time into the generic event.
819  * Can only be executed on the CPU where the event is active.
820  * Returns the delta events processed.
821  */
822 static u64
823 x86_perf_event_update(struct perf_event *event,
824                         struct hw_perf_event *hwc, int idx)
825 {
826         int shift = 64 - x86_pmu.event_bits;
827         u64 prev_raw_count, new_raw_count;
828         s64 delta;
829
830         if (idx == X86_PMC_IDX_FIXED_BTS)
831                 return 0;
832
833         /*
834          * Careful: an NMI might modify the previous event value.
835          *
836          * Our tactic to handle this is to first atomically read and
837          * exchange a new raw count - then add that new-prev delta
838          * count to the generic event atomically:
839          */
840 again:
841         prev_raw_count = atomic64_read(&hwc->prev_count);
842         rdmsrl(hwc->event_base + idx, new_raw_count);
843
844         if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count,
845                                         new_raw_count) != prev_raw_count)
846                 goto again;
847
848         /*
849          * Now we have the new raw value and have updated the prev
850          * timestamp already. We can now calculate the elapsed delta
851          * (event-)time and add that to the generic event.
852          *
853          * Careful, not all hw sign-extends above the physical width
854          * of the count.
855          */
856         delta = (new_raw_count << shift) - (prev_raw_count << shift);
857         delta >>= shift;
858
859         atomic64_add(delta, &event->count);
860         atomic64_sub(delta, &hwc->period_left);
861
862         return new_raw_count;
863 }
864
865 static atomic_t active_events;
866 static DEFINE_MUTEX(pmc_reserve_mutex);
867
868 static bool reserve_pmc_hardware(void)
869 {
870 #ifdef CONFIG_X86_LOCAL_APIC
871         int i;
872
873         if (nmi_watchdog == NMI_LOCAL_APIC)
874                 disable_lapic_nmi_watchdog();
875
876         for (i = 0; i < x86_pmu.num_events; i++) {
877                 if (!reserve_perfctr_nmi(x86_pmu.perfctr + i))
878                         goto perfctr_fail;
879         }
880
881         for (i = 0; i < x86_pmu.num_events; i++) {
882                 if (!reserve_evntsel_nmi(x86_pmu.eventsel + i))
883                         goto eventsel_fail;
884         }
885 #endif
886
887         return true;
888
889 #ifdef CONFIG_X86_LOCAL_APIC
890 eventsel_fail:
891         for (i--; i >= 0; i--)
892                 release_evntsel_nmi(x86_pmu.eventsel + i);
893
894         i = x86_pmu.num_events;
895
896 perfctr_fail:
897         for (i--; i >= 0; i--)
898                 release_perfctr_nmi(x86_pmu.perfctr + i);
899
900         if (nmi_watchdog == NMI_LOCAL_APIC)
901                 enable_lapic_nmi_watchdog();
902
903         return false;
904 #endif
905 }
906
907 static void release_pmc_hardware(void)
908 {
909 #ifdef CONFIG_X86_LOCAL_APIC
910         int i;
911
912         for (i = 0; i < x86_pmu.num_events; i++) {
913                 release_perfctr_nmi(x86_pmu.perfctr + i);
914                 release_evntsel_nmi(x86_pmu.eventsel + i);
915         }
916
917         if (nmi_watchdog == NMI_LOCAL_APIC)
918                 enable_lapic_nmi_watchdog();
919 #endif
920 }
921
922 static inline bool bts_available(void)
923 {
924         return x86_pmu.enable_bts != NULL;
925 }
926
927 static inline void init_debug_store_on_cpu(int cpu)
928 {
929         struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
930
931         if (!ds)
932                 return;
933
934         wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA,
935                      (u32)((u64)(unsigned long)ds),
936                      (u32)((u64)(unsigned long)ds >> 32));
937 }
938
939 static inline void fini_debug_store_on_cpu(int cpu)
940 {
941         if (!per_cpu(cpu_hw_events, cpu).ds)
942                 return;
943
944         wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0);
945 }
946
947 static void release_bts_hardware(void)
948 {
949         int cpu;
950
951         if (!bts_available())
952                 return;
953
954         get_online_cpus();
955
956         for_each_online_cpu(cpu)
957                 fini_debug_store_on_cpu(cpu);
958
959         for_each_possible_cpu(cpu) {
960                 struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
961
962                 if (!ds)
963                         continue;
964
965                 per_cpu(cpu_hw_events, cpu).ds = NULL;
966
967                 kfree((void *)(unsigned long)ds->bts_buffer_base);
968                 kfree(ds);
969         }
970
971         put_online_cpus();
972 }
973
974 static int reserve_bts_hardware(void)
975 {
976         int cpu, err = 0;
977
978         if (!bts_available())
979                 return 0;
980
981         get_online_cpus();
982
983         for_each_possible_cpu(cpu) {
984                 struct debug_store *ds;
985                 void *buffer;
986
987                 err = -ENOMEM;
988                 buffer = kzalloc(BTS_BUFFER_SIZE, GFP_KERNEL);
989                 if (unlikely(!buffer))
990                         break;
991
992                 ds = kzalloc(sizeof(*ds), GFP_KERNEL);
993                 if (unlikely(!ds)) {
994                         kfree(buffer);
995                         break;
996                 }
997
998                 ds->bts_buffer_base = (u64)(unsigned long)buffer;
999                 ds->bts_index = ds->bts_buffer_base;
1000                 ds->bts_absolute_maximum =
1001                         ds->bts_buffer_base + BTS_BUFFER_SIZE;
1002                 ds->bts_interrupt_threshold =
1003                         ds->bts_absolute_maximum - BTS_OVFL_TH;
1004
1005                 per_cpu(cpu_hw_events, cpu).ds = ds;
1006                 err = 0;
1007         }
1008
1009         if (err)
1010                 release_bts_hardware();
1011         else {
1012                 for_each_online_cpu(cpu)
1013                         init_debug_store_on_cpu(cpu);
1014         }
1015
1016         put_online_cpus();
1017
1018         return err;
1019 }
1020
1021 static void hw_perf_event_destroy(struct perf_event *event)
1022 {
1023         if (atomic_dec_and_mutex_lock(&active_events, &pmc_reserve_mutex)) {
1024                 release_pmc_hardware();
1025                 release_bts_hardware();
1026                 mutex_unlock(&pmc_reserve_mutex);
1027         }
1028 }
1029
1030 static inline int x86_pmu_initialized(void)
1031 {
1032         return x86_pmu.handle_irq != NULL;
1033 }
1034
1035 static inline int
1036 set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event_attr *attr)
1037 {
1038         unsigned int cache_type, cache_op, cache_result;
1039         u64 config, val;
1040
1041         config = attr->config;
1042
1043         cache_type = (config >>  0) & 0xff;
1044         if (cache_type >= PERF_COUNT_HW_CACHE_MAX)
1045                 return -EINVAL;
1046
1047         cache_op = (config >>  8) & 0xff;
1048         if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX)
1049                 return -EINVAL;
1050
1051         cache_result = (config >> 16) & 0xff;
1052         if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
1053                 return -EINVAL;
1054
1055         val = hw_cache_event_ids[cache_type][cache_op][cache_result];
1056
1057         if (val == 0)
1058                 return -ENOENT;
1059
1060         if (val == -1)
1061                 return -EINVAL;
1062
1063         hwc->config |= val;
1064
1065         return 0;
1066 }
1067
1068 static void intel_pmu_enable_bts(u64 config)
1069 {
1070         unsigned long debugctlmsr;
1071
1072         debugctlmsr = get_debugctlmsr();
1073
1074         debugctlmsr |= X86_DEBUGCTL_TR;
1075         debugctlmsr |= X86_DEBUGCTL_BTS;
1076         debugctlmsr |= X86_DEBUGCTL_BTINT;
1077
1078         if (!(config & ARCH_PERFMON_EVENTSEL_OS))
1079                 debugctlmsr |= X86_DEBUGCTL_BTS_OFF_OS;
1080
1081         if (!(config & ARCH_PERFMON_EVENTSEL_USR))
1082                 debugctlmsr |= X86_DEBUGCTL_BTS_OFF_USR;
1083
1084         update_debugctlmsr(debugctlmsr);
1085 }
1086
1087 static void intel_pmu_disable_bts(void)
1088 {
1089         struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1090         unsigned long debugctlmsr;
1091
1092         if (!cpuc->ds)
1093                 return;
1094
1095         debugctlmsr = get_debugctlmsr();
1096
1097         debugctlmsr &=
1098                 ~(X86_DEBUGCTL_TR | X86_DEBUGCTL_BTS | X86_DEBUGCTL_BTINT |
1099                   X86_DEBUGCTL_BTS_OFF_OS | X86_DEBUGCTL_BTS_OFF_USR);
1100
1101         update_debugctlmsr(debugctlmsr);
1102 }
1103
1104 /*
1105  * Setup the hardware configuration for a given attr_type
1106  */
1107 static int __hw_perf_event_init(struct perf_event *event)
1108 {
1109         struct perf_event_attr *attr = &event->attr;
1110         struct hw_perf_event *hwc = &event->hw;
1111         u64 config;
1112         int err;
1113
1114         if (!x86_pmu_initialized())
1115                 return -ENODEV;
1116
1117         err = 0;
1118         if (!atomic_inc_not_zero(&active_events)) {
1119                 mutex_lock(&pmc_reserve_mutex);
1120                 if (atomic_read(&active_events) == 0) {
1121                         if (!reserve_pmc_hardware())
1122                                 err = -EBUSY;
1123                         else
1124                                 err = reserve_bts_hardware();
1125                 }
1126                 if (!err)
1127                         atomic_inc(&active_events);
1128                 mutex_unlock(&pmc_reserve_mutex);
1129         }
1130         if (err)
1131                 return err;
1132
1133         event->destroy = hw_perf_event_destroy;
1134
1135         /*
1136          * Generate PMC IRQs:
1137          * (keep 'enabled' bit clear for now)
1138          */
1139         hwc->config = ARCH_PERFMON_EVENTSEL_INT;
1140
1141         hwc->idx = -1;
1142
1143         /*
1144          * Count user and OS events unless requested not to.
1145          */
1146         if (!attr->exclude_user)
1147                 hwc->config |= ARCH_PERFMON_EVENTSEL_USR;
1148         if (!attr->exclude_kernel)
1149                 hwc->config |= ARCH_PERFMON_EVENTSEL_OS;
1150
1151         if (!hwc->sample_period) {
1152                 hwc->sample_period = x86_pmu.max_period;
1153                 hwc->last_period = hwc->sample_period;
1154                 atomic64_set(&hwc->period_left, hwc->sample_period);
1155         } else {
1156                 /*
1157                  * If we have a PMU initialized but no APIC
1158                  * interrupts, we cannot sample hardware
1159                  * events (user-space has to fall back and
1160                  * sample via a hrtimer based software event):
1161                  */
1162                 if (!x86_pmu.apic)
1163                         return -EOPNOTSUPP;
1164         }
1165
1166         /*
1167          * Raw hw_event type provide the config in the hw_event structure
1168          */
1169         if (attr->type == PERF_TYPE_RAW) {
1170                 hwc->config |= x86_pmu.raw_event(attr->config);
1171                 return 0;
1172         }
1173
1174         if (attr->type == PERF_TYPE_HW_CACHE)
1175                 return set_ext_hw_attr(hwc, attr);
1176
1177         if (attr->config >= x86_pmu.max_events)
1178                 return -EINVAL;
1179
1180         /*
1181          * The generic map:
1182          */
1183         config = x86_pmu.event_map(attr->config);
1184
1185         if (config == 0)
1186                 return -ENOENT;
1187
1188         if (config == -1LL)
1189                 return -EINVAL;
1190
1191         /*
1192          * Branch tracing:
1193          */
1194         if ((attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS) &&
1195             (hwc->sample_period == 1)) {
1196                 /* BTS is not supported by this architecture. */
1197                 if (!bts_available())
1198                         return -EOPNOTSUPP;
1199
1200                 /* BTS is currently only allowed for user-mode. */
1201                 if (hwc->config & ARCH_PERFMON_EVENTSEL_OS)
1202                         return -EOPNOTSUPP;
1203         }
1204
1205         hwc->config |= config;
1206
1207         return 0;
1208 }
1209
1210 static void p6_pmu_disable_all(void)
1211 {
1212         u64 val;
1213
1214         /* p6 only has one enable register */
1215         rdmsrl(MSR_P6_EVNTSEL0, val);
1216         val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE;
1217         wrmsrl(MSR_P6_EVNTSEL0, val);
1218 }
1219
1220 static void intel_pmu_disable_all(void)
1221 {
1222         struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1223
1224         wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
1225
1226         if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask))
1227                 intel_pmu_disable_bts();
1228 }
1229
1230 static void x86_pmu_disable_all(void)
1231 {
1232         struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1233         int idx;
1234
1235         for (idx = 0; idx < x86_pmu.num_events; idx++) {
1236                 u64 val;
1237
1238                 if (!test_bit(idx, cpuc->active_mask))
1239                         continue;
1240                 rdmsrl(x86_pmu.eventsel + idx, val);
1241                 if (!(val & ARCH_PERFMON_EVENTSEL0_ENABLE))
1242                         continue;
1243                 val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE;
1244                 wrmsrl(x86_pmu.eventsel + idx, val);
1245         }
1246 }
1247
1248 void hw_perf_disable(void)
1249 {
1250         struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1251
1252         if (!x86_pmu_initialized())
1253                 return;
1254
1255         if (!cpuc->enabled)
1256                 return;
1257
1258         cpuc->n_added = 0;
1259         cpuc->enabled = 0;
1260         barrier();
1261
1262         x86_pmu.disable_all();
1263 }
1264
1265 static void p6_pmu_enable_all(void)
1266 {
1267         unsigned long val;
1268
1269         /* p6 only has one enable register */
1270         rdmsrl(MSR_P6_EVNTSEL0, val);
1271         val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
1272         wrmsrl(MSR_P6_EVNTSEL0, val);
1273 }
1274
1275 static void intel_pmu_enable_all(void)
1276 {
1277         struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1278
1279         wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl);
1280
1281         if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) {
1282                 struct perf_event *event =
1283                         cpuc->events[X86_PMC_IDX_FIXED_BTS];
1284
1285                 if (WARN_ON_ONCE(!event))
1286                         return;
1287
1288                 intel_pmu_enable_bts(event->hw.config);
1289         }
1290 }
1291
1292 static void x86_pmu_enable_all(void)
1293 {
1294         struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1295         int idx;
1296
1297         for (idx = 0; idx < x86_pmu.num_events; idx++) {
1298                 struct perf_event *event = cpuc->events[idx];
1299                 u64 val;
1300
1301                 if (!test_bit(idx, cpuc->active_mask))
1302                         continue;
1303
1304                 val = event->hw.config;
1305                 val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
1306                 wrmsrl(x86_pmu.eventsel + idx, val);
1307         }
1308 }
1309
1310 static const struct pmu pmu;
1311
1312 static inline int is_x86_event(struct perf_event *event)
1313 {
1314         return event->pmu == &pmu;
1315 }
1316
1317 static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
1318 {
1319         struct event_constraint *c, *constraints[X86_PMC_IDX_MAX];
1320         unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
1321         int i, j, w, wmax, num = 0;
1322         struct hw_perf_event *hwc;
1323
1324         bitmap_zero(used_mask, X86_PMC_IDX_MAX);
1325
1326         for (i = 0; i < n; i++) {
1327                 constraints[i] =
1328                   x86_pmu.get_event_constraints(cpuc, cpuc->event_list[i]);
1329         }
1330
1331         /*
1332          * fastpath, try to reuse previous register
1333          */
1334         for (i = 0; i < n; i++) {
1335                 hwc = &cpuc->event_list[i]->hw;
1336                 c = constraints[i];
1337
1338                 /* never assigned */
1339                 if (hwc->idx == -1)
1340                         break;
1341
1342                 /* constraint still honored */
1343                 if (!test_bit(hwc->idx, c->idxmsk))
1344                         break;
1345
1346                 /* not already used */
1347                 if (test_bit(hwc->idx, used_mask))
1348                         break;
1349
1350                 set_bit(hwc->idx, used_mask);
1351                 if (assign)
1352                         assign[i] = hwc->idx;
1353         }
1354         if (i == n)
1355                 goto done;
1356
1357         /*
1358          * begin slow path
1359          */
1360
1361         bitmap_zero(used_mask, X86_PMC_IDX_MAX);
1362
1363         /*
1364          * weight = number of possible counters
1365          *
1366          * 1    = most constrained, only works on one counter
1367          * wmax = least constrained, works on any counter
1368          *
1369          * assign events to counters starting with most
1370          * constrained events.
1371          */
1372         wmax = x86_pmu.num_events;
1373
1374         /*
1375          * when fixed event counters are present,
1376          * wmax is incremented by 1 to account
1377          * for one more choice
1378          */
1379         if (x86_pmu.num_events_fixed)
1380                 wmax++;
1381
1382         for (w = 1, num = n; num && w <= wmax; w++) {
1383                 /* for each event */
1384                 for (i = 0; num && i < n; i++) {
1385                         c = constraints[i];
1386                         hwc = &cpuc->event_list[i]->hw;
1387
1388                         if (c->weight != w)
1389                                 continue;
1390
1391                         for_each_bit(j, c->idxmsk, X86_PMC_IDX_MAX) {
1392                                 if (!test_bit(j, used_mask))
1393                                         break;
1394                         }
1395
1396                         if (j == X86_PMC_IDX_MAX)
1397                                 break;
1398
1399                         set_bit(j, used_mask);
1400
1401                         if (assign)
1402                                 assign[i] = j;
1403                         num--;
1404                 }
1405         }
1406 done:
1407         /*
1408          * scheduling failed or is just a simulation,
1409          * free resources if necessary
1410          */
1411         if (!assign || num) {
1412                 for (i = 0; i < n; i++) {
1413                         if (x86_pmu.put_event_constraints)
1414                                 x86_pmu.put_event_constraints(cpuc, cpuc->event_list[i]);
1415                 }
1416         }
1417         return num ? -ENOSPC : 0;
1418 }
1419
1420 /*
1421  * dogrp: true if must collect siblings events (group)
1422  * returns total number of events and error code
1423  */
1424 static int collect_events(struct cpu_hw_events *cpuc, struct perf_event *leader, bool dogrp)
1425 {
1426         struct perf_event *event;
1427         int n, max_count;
1428
1429         max_count = x86_pmu.num_events + x86_pmu.num_events_fixed;
1430
1431         /* current number of events already accepted */
1432         n = cpuc->n_events;
1433
1434         if (is_x86_event(leader)) {
1435                 if (n >= max_count)
1436                         return -ENOSPC;
1437                 cpuc->event_list[n] = leader;
1438                 n++;
1439         }
1440         if (!dogrp)
1441                 return n;
1442
1443         list_for_each_entry(event, &leader->sibling_list, group_entry) {
1444                 if (!is_x86_event(event) ||
1445                     event->state <= PERF_EVENT_STATE_OFF)
1446                         continue;
1447
1448                 if (n >= max_count)
1449                         return -ENOSPC;
1450
1451                 cpuc->event_list[n] = event;
1452                 n++;
1453         }
1454         return n;
1455 }
1456
1457
1458 static inline void x86_assign_hw_event(struct perf_event *event,
1459                                 struct hw_perf_event *hwc, int idx)
1460 {
1461         hwc->idx = idx;
1462
1463         if (hwc->idx == X86_PMC_IDX_FIXED_BTS) {
1464                 hwc->config_base = 0;
1465                 hwc->event_base = 0;
1466         } else if (hwc->idx >= X86_PMC_IDX_FIXED) {
1467                 hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
1468                 /*
1469                  * We set it so that event_base + idx in wrmsr/rdmsr maps to
1470                  * MSR_ARCH_PERFMON_FIXED_CTR0 ... CTR2:
1471                  */
1472                 hwc->event_base =
1473                         MSR_ARCH_PERFMON_FIXED_CTR0 - X86_PMC_IDX_FIXED;
1474         } else {
1475                 hwc->config_base = x86_pmu.eventsel;
1476                 hwc->event_base  = x86_pmu.perfctr;
1477         }
1478 }
1479
1480 static void __x86_pmu_disable(struct perf_event *event, struct cpu_hw_events *cpuc);
1481
1482 void hw_perf_enable(void)
1483 {
1484         struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1485         struct perf_event *event;
1486         struct hw_perf_event *hwc;
1487         int i;
1488
1489         if (!x86_pmu_initialized())
1490                 return;
1491
1492         if (cpuc->enabled)
1493                 return;
1494
1495         if (cpuc->n_added) {
1496                 /*
1497                  * apply assignment obtained either from
1498                  * hw_perf_group_sched_in() or x86_pmu_enable()
1499                  *
1500                  * step1: save events moving to new counters
1501                  * step2: reprogram moved events into new counters
1502                  */
1503                 for (i = 0; i < cpuc->n_events; i++) {
1504
1505                         event = cpuc->event_list[i];
1506                         hwc = &event->hw;
1507
1508                         if (hwc->idx == -1 || hwc->idx == cpuc->assign[i])
1509                                 continue;
1510
1511                         __x86_pmu_disable(event, cpuc);
1512
1513                         hwc->idx = -1;
1514                 }
1515
1516                 for (i = 0; i < cpuc->n_events; i++) {
1517
1518                         event = cpuc->event_list[i];
1519                         hwc = &event->hw;
1520
1521                         if (hwc->idx == -1) {
1522                                 x86_assign_hw_event(event, hwc, cpuc->assign[i]);
1523                                 x86_perf_event_set_period(event, hwc, hwc->idx);
1524                         }
1525                         /*
1526                          * need to mark as active because x86_pmu_disable()
1527                          * clear active_mask and eventsp[] yet it preserves
1528                          * idx
1529                          */
1530                         set_bit(hwc->idx, cpuc->active_mask);
1531                         cpuc->events[hwc->idx] = event;
1532
1533                         x86_pmu.enable(hwc, hwc->idx);
1534                         perf_event_update_userpage(event);
1535                 }
1536                 cpuc->n_added = 0;
1537                 perf_events_lapic_init();
1538         }
1539
1540         cpuc->enabled = 1;
1541         barrier();
1542
1543         x86_pmu.enable_all();
1544 }
1545
1546 static inline u64 intel_pmu_get_status(void)
1547 {
1548         u64 status;
1549
1550         rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
1551
1552         return status;
1553 }
1554
1555 static inline void intel_pmu_ack_status(u64 ack)
1556 {
1557         wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack);
1558 }
1559
1560 static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc, int idx)
1561 {
1562         (void)checking_wrmsrl(hwc->config_base + idx,
1563                               hwc->config | ARCH_PERFMON_EVENTSEL0_ENABLE);
1564 }
1565
1566 static inline void x86_pmu_disable_event(struct hw_perf_event *hwc, int idx)
1567 {
1568         (void)checking_wrmsrl(hwc->config_base + idx, hwc->config);
1569 }
1570
1571 static inline void
1572 intel_pmu_disable_fixed(struct hw_perf_event *hwc, int __idx)
1573 {
1574         int idx = __idx - X86_PMC_IDX_FIXED;
1575         u64 ctrl_val, mask;
1576
1577         mask = 0xfULL << (idx * 4);
1578
1579         rdmsrl(hwc->config_base, ctrl_val);
1580         ctrl_val &= ~mask;
1581         (void)checking_wrmsrl(hwc->config_base, ctrl_val);
1582 }
1583
1584 static inline void
1585 p6_pmu_disable_event(struct hw_perf_event *hwc, int idx)
1586 {
1587         struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1588         u64 val = P6_NOP_EVENT;
1589
1590         if (cpuc->enabled)
1591                 val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
1592
1593         (void)checking_wrmsrl(hwc->config_base + idx, val);
1594 }
1595
1596 static inline void
1597 intel_pmu_disable_event(struct hw_perf_event *hwc, int idx)
1598 {
1599         if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) {
1600                 intel_pmu_disable_bts();
1601                 return;
1602         }
1603
1604         if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
1605                 intel_pmu_disable_fixed(hwc, idx);
1606                 return;
1607         }
1608
1609         x86_pmu_disable_event(hwc, idx);
1610 }
1611
1612 static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left);
1613
1614 /*
1615  * Set the next IRQ period, based on the hwc->period_left value.
1616  * To be called with the event disabled in hw:
1617  */
1618 static int
1619 x86_perf_event_set_period(struct perf_event *event,
1620                              struct hw_perf_event *hwc, int idx)
1621 {
1622         s64 left = atomic64_read(&hwc->period_left);
1623         s64 period = hwc->sample_period;
1624         int err, ret = 0;
1625
1626         if (idx == X86_PMC_IDX_FIXED_BTS)
1627                 return 0;
1628
1629         /*
1630          * If we are way outside a reasonable range then just skip forward:
1631          */
1632         if (unlikely(left <= -period)) {
1633                 left = period;
1634                 atomic64_set(&hwc->period_left, left);
1635                 hwc->last_period = period;
1636                 ret = 1;
1637         }
1638
1639         if (unlikely(left <= 0)) {
1640                 left += period;
1641                 atomic64_set(&hwc->period_left, left);
1642                 hwc->last_period = period;
1643                 ret = 1;
1644         }
1645         /*
1646          * Quirk: certain CPUs dont like it if just 1 hw_event is left:
1647          */
1648         if (unlikely(left < 2))
1649                 left = 2;
1650
1651         if (left > x86_pmu.max_period)
1652                 left = x86_pmu.max_period;
1653
1654         per_cpu(pmc_prev_left[idx], smp_processor_id()) = left;
1655
1656         /*
1657          * The hw event starts counting from this event offset,
1658          * mark it to be able to extra future deltas:
1659          */
1660         atomic64_set(&hwc->prev_count, (u64)-left);
1661
1662         err = checking_wrmsrl(hwc->event_base + idx,
1663                              (u64)(-left) & x86_pmu.event_mask);
1664
1665         perf_event_update_userpage(event);
1666
1667         return ret;
1668 }
1669
1670 static inline void
1671 intel_pmu_enable_fixed(struct hw_perf_event *hwc, int __idx)
1672 {
1673         int idx = __idx - X86_PMC_IDX_FIXED;
1674         u64 ctrl_val, bits, mask;
1675         int err;
1676
1677         /*
1678          * Enable IRQ generation (0x8),
1679          * and enable ring-3 counting (0x2) and ring-0 counting (0x1)
1680          * if requested:
1681          */
1682         bits = 0x8ULL;
1683         if (hwc->config & ARCH_PERFMON_EVENTSEL_USR)
1684                 bits |= 0x2;
1685         if (hwc->config & ARCH_PERFMON_EVENTSEL_OS)
1686                 bits |= 0x1;
1687
1688         /*
1689          * ANY bit is supported in v3 and up
1690          */
1691         if (x86_pmu.version > 2 && hwc->config & ARCH_PERFMON_EVENTSEL_ANY)
1692                 bits |= 0x4;
1693
1694         bits <<= (idx * 4);
1695         mask = 0xfULL << (idx * 4);
1696
1697         rdmsrl(hwc->config_base, ctrl_val);
1698         ctrl_val &= ~mask;
1699         ctrl_val |= bits;
1700         err = checking_wrmsrl(hwc->config_base, ctrl_val);
1701 }
1702
1703 static void p6_pmu_enable_event(struct hw_perf_event *hwc, int idx)
1704 {
1705         struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1706         u64 val;
1707
1708         val = hwc->config;
1709         if (cpuc->enabled)
1710                 val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
1711
1712         (void)checking_wrmsrl(hwc->config_base + idx, val);
1713 }
1714
1715
1716 static void intel_pmu_enable_event(struct hw_perf_event *hwc, int idx)
1717 {
1718         if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) {
1719                 if (!__get_cpu_var(cpu_hw_events).enabled)
1720                         return;
1721
1722                 intel_pmu_enable_bts(hwc->config);
1723                 return;
1724         }
1725
1726         if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
1727                 intel_pmu_enable_fixed(hwc, idx);
1728                 return;
1729         }
1730
1731         __x86_pmu_enable_event(hwc, idx);
1732 }
1733
1734 static void x86_pmu_enable_event(struct hw_perf_event *hwc, int idx)
1735 {
1736         struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1737         if (cpuc->enabled)
1738                 __x86_pmu_enable_event(hwc, idx);
1739 }
1740
1741 /*
1742  * activate a single event
1743  *
1744  * The event is added to the group of enabled events
1745  * but only if it can be scehduled with existing events.
1746  *
1747  * Called with PMU disabled. If successful and return value 1,
1748  * then guaranteed to call perf_enable() and hw_perf_enable()
1749  */
1750 static int x86_pmu_enable(struct perf_event *event)
1751 {
1752         struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1753         struct hw_perf_event *hwc;
1754         int assign[X86_PMC_IDX_MAX];
1755         int n, n0, ret;
1756
1757         hwc = &event->hw;
1758
1759         n0 = cpuc->n_events;
1760         n = collect_events(cpuc, event, false);
1761         if (n < 0)
1762                 return n;
1763
1764         ret = x86_schedule_events(cpuc, n, assign);
1765         if (ret)
1766                 return ret;
1767         /*
1768          * copy new assignment, now we know it is possible
1769          * will be used by hw_perf_enable()
1770          */
1771         memcpy(cpuc->assign, assign, n*sizeof(int));
1772
1773         cpuc->n_events = n;
1774         cpuc->n_added  = n - n0;
1775
1776         return 0;
1777 }
1778
1779 static void x86_pmu_unthrottle(struct perf_event *event)
1780 {
1781         struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1782         struct hw_perf_event *hwc = &event->hw;
1783
1784         if (WARN_ON_ONCE(hwc->idx >= X86_PMC_IDX_MAX ||
1785                                 cpuc->events[hwc->idx] != event))
1786                 return;
1787
1788         x86_pmu.enable(hwc, hwc->idx);
1789 }
1790
1791 void perf_event_print_debug(void)
1792 {
1793         u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed;
1794         struct cpu_hw_events *cpuc;
1795         unsigned long flags;
1796         int cpu, idx;
1797
1798         if (!x86_pmu.num_events)
1799                 return;
1800
1801         local_irq_save(flags);
1802
1803         cpu = smp_processor_id();
1804         cpuc = &per_cpu(cpu_hw_events, cpu);
1805
1806         if (x86_pmu.version >= 2) {
1807                 rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl);
1808                 rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
1809                 rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow);
1810                 rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, fixed);
1811
1812                 pr_info("\n");
1813                 pr_info("CPU#%d: ctrl:       %016llx\n", cpu, ctrl);
1814                 pr_info("CPU#%d: status:     %016llx\n", cpu, status);
1815                 pr_info("CPU#%d: overflow:   %016llx\n", cpu, overflow);
1816                 pr_info("CPU#%d: fixed:      %016llx\n", cpu, fixed);
1817         }
1818         pr_info("CPU#%d: active:       %016llx\n", cpu, *(u64 *)cpuc->active_mask);
1819
1820         for (idx = 0; idx < x86_pmu.num_events; idx++) {
1821                 rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl);
1822                 rdmsrl(x86_pmu.perfctr  + idx, pmc_count);
1823
1824                 prev_left = per_cpu(pmc_prev_left[idx], cpu);
1825
1826                 pr_info("CPU#%d:   gen-PMC%d ctrl:  %016llx\n",
1827                         cpu, idx, pmc_ctrl);
1828                 pr_info("CPU#%d:   gen-PMC%d count: %016llx\n",
1829                         cpu, idx, pmc_count);
1830                 pr_info("CPU#%d:   gen-PMC%d left:  %016llx\n",
1831                         cpu, idx, prev_left);
1832         }
1833         for (idx = 0; idx < x86_pmu.num_events_fixed; idx++) {
1834                 rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count);
1835
1836                 pr_info("CPU#%d: fixed-PMC%d count: %016llx\n",
1837                         cpu, idx, pmc_count);
1838         }
1839         local_irq_restore(flags);
1840 }
1841
1842 static void intel_pmu_drain_bts_buffer(struct cpu_hw_events *cpuc)
1843 {
1844         struct debug_store *ds = cpuc->ds;
1845         struct bts_record {
1846                 u64     from;
1847                 u64     to;
1848                 u64     flags;
1849         };
1850         struct perf_event *event = cpuc->events[X86_PMC_IDX_FIXED_BTS];
1851         struct bts_record *at, *top;
1852         struct perf_output_handle handle;
1853         struct perf_event_header header;
1854         struct perf_sample_data data;
1855         struct pt_regs regs;
1856
1857         if (!event)
1858                 return;
1859
1860         if (!ds)
1861                 return;
1862
1863         at  = (struct bts_record *)(unsigned long)ds->bts_buffer_base;
1864         top = (struct bts_record *)(unsigned long)ds->bts_index;
1865
1866         if (top <= at)
1867                 return;
1868
1869         ds->bts_index = ds->bts_buffer_base;
1870
1871
1872         data.period     = event->hw.last_period;
1873         data.addr       = 0;
1874         data.raw        = NULL;
1875         regs.ip         = 0;
1876
1877         /*
1878          * Prepare a generic sample, i.e. fill in the invariant fields.
1879          * We will overwrite the from and to address before we output
1880          * the sample.
1881          */
1882         perf_prepare_sample(&header, &data, event, &regs);
1883
1884         if (perf_output_begin(&handle, event,
1885                               header.size * (top - at), 1, 1))
1886                 return;
1887
1888         for (; at < top; at++) {
1889                 data.ip         = at->from;
1890                 data.addr       = at->to;
1891
1892                 perf_output_sample(&handle, &header, &data, event);
1893         }
1894
1895         perf_output_end(&handle);
1896
1897         /* There's new data available. */
1898         event->hw.interrupts++;
1899         event->pending_kill = POLL_IN;
1900 }
1901
1902 static void __x86_pmu_disable(struct perf_event *event, struct cpu_hw_events *cpuc)
1903 {
1904         struct hw_perf_event *hwc = &event->hw;
1905         int idx = hwc->idx;
1906
1907         /*
1908          * Must be done before we disable, otherwise the nmi handler
1909          * could reenable again:
1910          */
1911         clear_bit(idx, cpuc->active_mask);
1912         x86_pmu.disable(hwc, idx);
1913
1914         /*
1915          * Drain the remaining delta count out of a event
1916          * that we are disabling:
1917          */
1918         x86_perf_event_update(event, hwc, idx);
1919
1920         /* Drain the remaining BTS records. */
1921         if (unlikely(idx == X86_PMC_IDX_FIXED_BTS))
1922                 intel_pmu_drain_bts_buffer(cpuc);
1923
1924         cpuc->events[idx] = NULL;
1925 }
1926
1927 static void x86_pmu_disable(struct perf_event *event)
1928 {
1929         struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1930         int i;
1931
1932         __x86_pmu_disable(event, cpuc);
1933
1934         for (i = 0; i < cpuc->n_events; i++) {
1935                 if (event == cpuc->event_list[i]) {
1936
1937                         if (x86_pmu.put_event_constraints)
1938                                 x86_pmu.put_event_constraints(cpuc, event);
1939
1940                         while (++i < cpuc->n_events)
1941                                 cpuc->event_list[i-1] = cpuc->event_list[i];
1942
1943                         --cpuc->n_events;
1944                         break;
1945                 }
1946         }
1947         perf_event_update_userpage(event);
1948 }
1949
1950 /*
1951  * Save and restart an expired event. Called by NMI contexts,
1952  * so it has to be careful about preempting normal event ops:
1953  */
1954 static int intel_pmu_save_and_restart(struct perf_event *event)
1955 {
1956         struct hw_perf_event *hwc = &event->hw;
1957         int idx = hwc->idx;
1958         int ret;
1959
1960         x86_perf_event_update(event, hwc, idx);
1961         ret = x86_perf_event_set_period(event, hwc, idx);
1962
1963         if (event->state == PERF_EVENT_STATE_ACTIVE)
1964                 intel_pmu_enable_event(hwc, idx);
1965
1966         return ret;
1967 }
1968
1969 static void intel_pmu_reset(void)
1970 {
1971         struct debug_store *ds = __get_cpu_var(cpu_hw_events).ds;
1972         unsigned long flags;
1973         int idx;
1974
1975         if (!x86_pmu.num_events)
1976                 return;
1977
1978         local_irq_save(flags);
1979
1980         printk("clearing PMU state on CPU#%d\n", smp_processor_id());
1981
1982         for (idx = 0; idx < x86_pmu.num_events; idx++) {
1983                 checking_wrmsrl(x86_pmu.eventsel + idx, 0ull);
1984                 checking_wrmsrl(x86_pmu.perfctr  + idx, 0ull);
1985         }
1986         for (idx = 0; idx < x86_pmu.num_events_fixed; idx++) {
1987                 checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull);
1988         }
1989         if (ds)
1990                 ds->bts_index = ds->bts_buffer_base;
1991
1992         local_irq_restore(flags);
1993 }
1994
1995 /*
1996  * This handler is triggered by the local APIC, so the APIC IRQ handling
1997  * rules apply:
1998  */
1999 static int intel_pmu_handle_irq(struct pt_regs *regs)
2000 {
2001         struct perf_sample_data data;
2002         struct cpu_hw_events *cpuc;
2003         int bit, loops;
2004         u64 ack, status;
2005
2006         data.addr = 0;
2007         data.raw = NULL;
2008
2009         cpuc = &__get_cpu_var(cpu_hw_events);
2010
2011         perf_disable();
2012         intel_pmu_drain_bts_buffer(cpuc);
2013         status = intel_pmu_get_status();
2014         if (!status) {
2015                 perf_enable();
2016                 return 0;
2017         }
2018
2019         loops = 0;
2020 again:
2021         if (++loops > 100) {
2022                 WARN_ONCE(1, "perfevents: irq loop stuck!\n");
2023                 perf_event_print_debug();
2024                 intel_pmu_reset();
2025                 perf_enable();
2026                 return 1;
2027         }
2028
2029         inc_irq_stat(apic_perf_irqs);
2030         ack = status;
2031         for_each_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
2032                 struct perf_event *event = cpuc->events[bit];
2033
2034                 clear_bit(bit, (unsigned long *) &status);
2035                 if (!test_bit(bit, cpuc->active_mask))
2036                         continue;
2037
2038                 if (!intel_pmu_save_and_restart(event))
2039                         continue;
2040
2041                 data.period = event->hw.last_period;
2042
2043                 if (perf_event_overflow(event, 1, &data, regs))
2044                         intel_pmu_disable_event(&event->hw, bit);
2045         }
2046
2047         intel_pmu_ack_status(ack);
2048
2049         /*
2050          * Repeat if there is more work to be done:
2051          */
2052         status = intel_pmu_get_status();
2053         if (status)
2054                 goto again;
2055
2056         perf_enable();
2057
2058         return 1;
2059 }
2060
2061 static int x86_pmu_handle_irq(struct pt_regs *regs)
2062 {
2063         struct perf_sample_data data;
2064         struct cpu_hw_events *cpuc;
2065         struct perf_event *event;
2066         struct hw_perf_event *hwc;
2067         int idx, handled = 0;
2068         u64 val;
2069
2070         data.addr = 0;
2071         data.raw = NULL;
2072
2073         cpuc = &__get_cpu_var(cpu_hw_events);
2074
2075         for (idx = 0; idx < x86_pmu.num_events; idx++) {
2076                 if (!test_bit(idx, cpuc->active_mask))
2077                         continue;
2078
2079                 event = cpuc->events[idx];
2080                 hwc = &event->hw;
2081
2082                 val = x86_perf_event_update(event, hwc, idx);
2083                 if (val & (1ULL << (x86_pmu.event_bits - 1)))
2084                         continue;
2085
2086                 /*
2087                  * event overflow
2088                  */
2089                 handled         = 1;
2090                 data.period     = event->hw.last_period;
2091
2092                 if (!x86_perf_event_set_period(event, hwc, idx))
2093                         continue;
2094
2095                 if (perf_event_overflow(event, 1, &data, regs))
2096                         x86_pmu.disable(hwc, idx);
2097         }
2098
2099         if (handled)
2100                 inc_irq_stat(apic_perf_irqs);
2101
2102         return handled;
2103 }
2104
2105 void smp_perf_pending_interrupt(struct pt_regs *regs)
2106 {
2107         irq_enter();
2108         ack_APIC_irq();
2109         inc_irq_stat(apic_pending_irqs);
2110         perf_event_do_pending();
2111         irq_exit();
2112 }
2113
2114 void set_perf_event_pending(void)
2115 {
2116 #ifdef CONFIG_X86_LOCAL_APIC
2117         if (!x86_pmu.apic || !x86_pmu_initialized())
2118                 return;
2119
2120         apic->send_IPI_self(LOCAL_PENDING_VECTOR);
2121 #endif
2122 }
2123
2124 void perf_events_lapic_init(void)
2125 {
2126 #ifdef CONFIG_X86_LOCAL_APIC
2127         if (!x86_pmu.apic || !x86_pmu_initialized())
2128                 return;
2129
2130         /*
2131          * Always use NMI for PMU
2132          */
2133         apic_write(APIC_LVTPC, APIC_DM_NMI);
2134 #endif
2135 }
2136
2137 static int __kprobes
2138 perf_event_nmi_handler(struct notifier_block *self,
2139                          unsigned long cmd, void *__args)
2140 {
2141         struct die_args *args = __args;
2142         struct pt_regs *regs;
2143
2144         if (!atomic_read(&active_events))
2145                 return NOTIFY_DONE;
2146
2147         switch (cmd) {
2148         case DIE_NMI:
2149         case DIE_NMI_IPI:
2150                 break;
2151
2152         default:
2153                 return NOTIFY_DONE;
2154         }
2155
2156         regs = args->regs;
2157
2158 #ifdef CONFIG_X86_LOCAL_APIC
2159         apic_write(APIC_LVTPC, APIC_DM_NMI);
2160 #endif
2161         /*
2162          * Can't rely on the handled return value to say it was our NMI, two
2163          * events could trigger 'simultaneously' raising two back-to-back NMIs.
2164          *
2165          * If the first NMI handles both, the latter will be empty and daze
2166          * the CPU.
2167          */
2168         x86_pmu.handle_irq(regs);
2169
2170         return NOTIFY_STOP;
2171 }
2172
2173 static struct event_constraint unconstrained;
2174
2175 static struct event_constraint bts_constraint =
2176         EVENT_CONSTRAINT(0, 1ULL << X86_PMC_IDX_FIXED_BTS, 0);
2177
2178 static struct event_constraint *
2179 intel_special_constraints(struct perf_event *event)
2180 {
2181         unsigned int hw_event;
2182
2183         hw_event = event->hw.config & INTEL_ARCH_EVENT_MASK;
2184
2185         if (unlikely((hw_event ==
2186                       x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS)) &&
2187                      (event->hw.sample_period == 1))) {
2188
2189                 return &bts_constraint;
2190         }
2191         return NULL;
2192 }
2193
2194 static struct event_constraint *
2195 intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
2196 {
2197         struct event_constraint *c;
2198
2199         c = intel_special_constraints(event);
2200         if (c)
2201                 return c;
2202
2203         if (x86_pmu.event_constraints) {
2204                 for_each_event_constraint(c, x86_pmu.event_constraints) {
2205                         if ((event->hw.config & c->cmask) == c->code)
2206                                 return c;
2207                 }
2208         }
2209
2210         return &unconstrained;
2211 }
2212
2213 static struct event_constraint *
2214 amd_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
2215 {
2216         return &unconstrained;
2217 }
2218
2219 static int x86_event_sched_in(struct perf_event *event,
2220                           struct perf_cpu_context *cpuctx, int cpu)
2221 {
2222         int ret = 0;
2223
2224         event->state = PERF_EVENT_STATE_ACTIVE;
2225         event->oncpu = cpu;
2226         event->tstamp_running += event->ctx->time - event->tstamp_stopped;
2227
2228         if (!is_x86_event(event))
2229                 ret = event->pmu->enable(event);
2230
2231         if (!ret && !is_software_event(event))
2232                 cpuctx->active_oncpu++;
2233
2234         if (!ret && event->attr.exclusive)
2235                 cpuctx->exclusive = 1;
2236
2237         return ret;
2238 }
2239
2240 static void x86_event_sched_out(struct perf_event *event,
2241                             struct perf_cpu_context *cpuctx, int cpu)
2242 {
2243         event->state = PERF_EVENT_STATE_INACTIVE;
2244         event->oncpu = -1;
2245
2246         if (!is_x86_event(event))
2247                 event->pmu->disable(event);
2248
2249         event->tstamp_running -= event->ctx->time - event->tstamp_stopped;
2250
2251         if (!is_software_event(event))
2252                 cpuctx->active_oncpu--;
2253
2254         if (event->attr.exclusive || !cpuctx->active_oncpu)
2255                 cpuctx->exclusive = 0;
2256 }
2257
2258 /*
2259  * Called to enable a whole group of events.
2260  * Returns 1 if the group was enabled, or -EAGAIN if it could not be.
2261  * Assumes the caller has disabled interrupts and has
2262  * frozen the PMU with hw_perf_save_disable.
2263  *
2264  * called with PMU disabled. If successful and return value 1,
2265  * then guaranteed to call perf_enable() and hw_perf_enable()
2266  */
2267 int hw_perf_group_sched_in(struct perf_event *leader,
2268                struct perf_cpu_context *cpuctx,
2269                struct perf_event_context *ctx, int cpu)
2270 {
2271         struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
2272         struct perf_event *sub;
2273         int assign[X86_PMC_IDX_MAX];
2274         int n0, n1, ret;
2275
2276         /* n0 = total number of events */
2277         n0 = collect_events(cpuc, leader, true);
2278         if (n0 < 0)
2279                 return n0;
2280
2281         ret = x86_schedule_events(cpuc, n0, assign);
2282         if (ret)
2283                 return ret;
2284
2285         ret = x86_event_sched_in(leader, cpuctx, cpu);
2286         if (ret)
2287                 return ret;
2288
2289         n1 = 1;
2290         list_for_each_entry(sub, &leader->sibling_list, group_entry) {
2291                 if (sub->state > PERF_EVENT_STATE_OFF) {
2292                         ret = x86_event_sched_in(sub, cpuctx, cpu);
2293                         if (ret)
2294                                 goto undo;
2295                         ++n1;
2296                 }
2297         }
2298         /*
2299          * copy new assignment, now we know it is possible
2300          * will be used by hw_perf_enable()
2301          */
2302         memcpy(cpuc->assign, assign, n0*sizeof(int));
2303
2304         cpuc->n_events  = n0;
2305         cpuc->n_added   = n1;
2306         ctx->nr_active += n1;
2307
2308         /*
2309          * 1 means successful and events are active
2310          * This is not quite true because we defer
2311          * actual activation until hw_perf_enable() but
2312          * this way we* ensure caller won't try to enable
2313          * individual events
2314          */
2315         return 1;
2316 undo:
2317         x86_event_sched_out(leader, cpuctx, cpu);
2318         n0  = 1;
2319         list_for_each_entry(sub, &leader->sibling_list, group_entry) {
2320                 if (sub->state == PERF_EVENT_STATE_ACTIVE) {
2321                         x86_event_sched_out(sub, cpuctx, cpu);
2322                         if (++n0 == n1)
2323                                 break;
2324                 }
2325         }
2326         return ret;
2327 }
2328
2329 static __read_mostly struct notifier_block perf_event_nmi_notifier = {
2330         .notifier_call          = perf_event_nmi_handler,
2331         .next                   = NULL,
2332         .priority               = 1
2333 };
2334
2335 static __initconst struct x86_pmu p6_pmu = {
2336         .name                   = "p6",
2337         .handle_irq             = x86_pmu_handle_irq,
2338         .disable_all            = p6_pmu_disable_all,
2339         .enable_all             = p6_pmu_enable_all,
2340         .enable                 = p6_pmu_enable_event,
2341         .disable                = p6_pmu_disable_event,
2342         .eventsel               = MSR_P6_EVNTSEL0,
2343         .perfctr                = MSR_P6_PERFCTR0,
2344         .event_map              = p6_pmu_event_map,
2345         .raw_event              = p6_pmu_raw_event,
2346         .max_events             = ARRAY_SIZE(p6_perfmon_event_map),
2347         .apic                   = 1,
2348         .max_period             = (1ULL << 31) - 1,
2349         .version                = 0,
2350         .num_events             = 2,
2351         /*
2352          * Events have 40 bits implemented. However they are designed such
2353          * that bits [32-39] are sign extensions of bit 31. As such the
2354          * effective width of a event for P6-like PMU is 32 bits only.
2355          *
2356          * See IA-32 Intel Architecture Software developer manual Vol 3B
2357          */
2358         .event_bits             = 32,
2359         .event_mask             = (1ULL << 32) - 1,
2360         .get_event_constraints  = intel_get_event_constraints,
2361         .event_constraints      = intel_p6_event_constraints
2362 };
2363
2364 static __initconst struct x86_pmu core_pmu = {
2365         .name                   = "core",
2366         .handle_irq             = x86_pmu_handle_irq,
2367         .disable_all            = x86_pmu_disable_all,
2368         .enable_all             = x86_pmu_enable_all,
2369         .enable                 = x86_pmu_enable_event,
2370         .disable                = x86_pmu_disable_event,
2371         .eventsel               = MSR_ARCH_PERFMON_EVENTSEL0,
2372         .perfctr                = MSR_ARCH_PERFMON_PERFCTR0,
2373         .event_map              = intel_pmu_event_map,
2374         .raw_event              = intel_pmu_raw_event,
2375         .max_events             = ARRAY_SIZE(intel_perfmon_event_map),
2376         .apic                   = 1,
2377         /*
2378          * Intel PMCs cannot be accessed sanely above 32 bit width,
2379          * so we install an artificial 1<<31 period regardless of
2380          * the generic event period:
2381          */
2382         .max_period             = (1ULL << 31) - 1,
2383         .get_event_constraints  = intel_get_event_constraints,
2384         .event_constraints      = intel_core_event_constraints,
2385 };
2386
2387 static __initconst struct x86_pmu intel_pmu = {
2388         .name                   = "Intel",
2389         .handle_irq             = intel_pmu_handle_irq,
2390         .disable_all            = intel_pmu_disable_all,
2391         .enable_all             = intel_pmu_enable_all,
2392         .enable                 = intel_pmu_enable_event,
2393         .disable                = intel_pmu_disable_event,
2394         .eventsel               = MSR_ARCH_PERFMON_EVENTSEL0,
2395         .perfctr                = MSR_ARCH_PERFMON_PERFCTR0,
2396         .event_map              = intel_pmu_event_map,
2397         .raw_event              = intel_pmu_raw_event,
2398         .max_events             = ARRAY_SIZE(intel_perfmon_event_map),
2399         .apic                   = 1,
2400         /*
2401          * Intel PMCs cannot be accessed sanely above 32 bit width,
2402          * so we install an artificial 1<<31 period regardless of
2403          * the generic event period:
2404          */
2405         .max_period             = (1ULL << 31) - 1,
2406         .enable_bts             = intel_pmu_enable_bts,
2407         .disable_bts            = intel_pmu_disable_bts,
2408         .get_event_constraints  = intel_get_event_constraints
2409 };
2410
2411 static __initconst struct x86_pmu amd_pmu = {
2412         .name                   = "AMD",
2413         .handle_irq             = x86_pmu_handle_irq,
2414         .disable_all            = x86_pmu_disable_all,
2415         .enable_all             = x86_pmu_enable_all,
2416         .enable                 = x86_pmu_enable_event,
2417         .disable                = x86_pmu_disable_event,
2418         .eventsel               = MSR_K7_EVNTSEL0,
2419         .perfctr                = MSR_K7_PERFCTR0,
2420         .event_map              = amd_pmu_event_map,
2421         .raw_event              = amd_pmu_raw_event,
2422         .max_events             = ARRAY_SIZE(amd_perfmon_event_map),
2423         .num_events             = 4,
2424         .event_bits             = 48,
2425         .event_mask             = (1ULL << 48) - 1,
2426         .apic                   = 1,
2427         /* use highest bit to detect overflow */
2428         .max_period             = (1ULL << 47) - 1,
2429         .get_event_constraints  = amd_get_event_constraints
2430 };
2431
2432 static __init int p6_pmu_init(void)
2433 {
2434         switch (boot_cpu_data.x86_model) {
2435         case 1:
2436         case 3:  /* Pentium Pro */
2437         case 5:
2438         case 6:  /* Pentium II */
2439         case 7:
2440         case 8:
2441         case 11: /* Pentium III */
2442         case 9:
2443         case 13:
2444                 /* Pentium M */
2445                 break;
2446         default:
2447                 pr_cont("unsupported p6 CPU model %d ",
2448                         boot_cpu_data.x86_model);
2449                 return -ENODEV;
2450         }
2451
2452         x86_pmu = p6_pmu;
2453
2454         return 0;
2455 }
2456
2457 static __init int intel_pmu_init(void)
2458 {
2459         union cpuid10_edx edx;
2460         union cpuid10_eax eax;
2461         unsigned int unused;
2462         unsigned int ebx;
2463         int version;
2464
2465         if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
2466                 /* check for P6 processor family */
2467            if (boot_cpu_data.x86 == 6) {
2468                 return p6_pmu_init();
2469            } else {
2470                 return -ENODEV;
2471            }
2472         }
2473
2474         /*
2475          * Check whether the Architectural PerfMon supports
2476          * Branch Misses Retired hw_event or not.
2477          */
2478         cpuid(10, &eax.full, &ebx, &unused, &edx.full);
2479         if (eax.split.mask_length <= ARCH_PERFMON_BRANCH_MISSES_RETIRED)
2480                 return -ENODEV;
2481
2482         version = eax.split.version_id;
2483         if (version < 2)
2484                 x86_pmu = core_pmu;
2485         else
2486                 x86_pmu = intel_pmu;
2487
2488         x86_pmu.version                 = version;
2489         x86_pmu.num_events              = eax.split.num_events;
2490         x86_pmu.event_bits              = eax.split.bit_width;
2491         x86_pmu.event_mask              = (1ULL << eax.split.bit_width) - 1;
2492
2493         /*
2494          * Quirk: v2 perfmon does not report fixed-purpose events, so
2495          * assume at least 3 events:
2496          */
2497         if (version > 1)
2498                 x86_pmu.num_events_fixed = max((int)edx.split.num_events_fixed, 3);
2499
2500         /*
2501          * Install the hw-cache-events table:
2502          */
2503         switch (boot_cpu_data.x86_model) {
2504         case 14: /* 65 nm core solo/duo, "Yonah" */
2505                 pr_cont("Core events, ");
2506                 break;
2507
2508         case 15: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */
2509         case 22: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */
2510         case 23: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */
2511         case 29: /* six-core 45 nm xeon "Dunnington" */
2512                 memcpy(hw_cache_event_ids, core2_hw_cache_event_ids,
2513                        sizeof(hw_cache_event_ids));
2514
2515                 x86_pmu.event_constraints = intel_core2_event_constraints;
2516                 pr_cont("Core2 events, ");
2517                 break;
2518
2519         case 26: /* 45 nm nehalem, "Bloomfield" */
2520         case 30: /* 45 nm nehalem, "Lynnfield" */
2521                 memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids,
2522                        sizeof(hw_cache_event_ids));
2523
2524                 x86_pmu.event_constraints = intel_nehalem_event_constraints;
2525                 pr_cont("Nehalem/Corei7 events, ");
2526                 break;
2527         case 28:
2528                 memcpy(hw_cache_event_ids, atom_hw_cache_event_ids,
2529                        sizeof(hw_cache_event_ids));
2530
2531                 x86_pmu.event_constraints = intel_gen_event_constraints;
2532                 pr_cont("Atom events, ");
2533                 break;
2534
2535         case 37: /* 32 nm nehalem, "Clarkdale" */
2536         case 44: /* 32 nm nehalem, "Gulftown" */
2537                 memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids,
2538                        sizeof(hw_cache_event_ids));
2539
2540                 x86_pmu.event_constraints = intel_westmere_event_constraints;
2541                 pr_cont("Westmere events, ");
2542                 break;
2543         default:
2544                 /*
2545                  * default constraints for v2 and up
2546                  */
2547                 x86_pmu.event_constraints = intel_gen_event_constraints;
2548                 pr_cont("generic architected perfmon, ");
2549         }
2550         return 0;
2551 }
2552
2553 static __init int amd_pmu_init(void)
2554 {
2555         /* Performance-monitoring supported from K7 and later: */
2556         if (boot_cpu_data.x86 < 6)
2557                 return -ENODEV;
2558
2559         x86_pmu = amd_pmu;
2560
2561         /* Events are common for all AMDs */
2562         memcpy(hw_cache_event_ids, amd_hw_cache_event_ids,
2563                sizeof(hw_cache_event_ids));
2564
2565         return 0;
2566 }
2567
2568 static void __init pmu_check_apic(void)
2569 {
2570         if (cpu_has_apic)
2571                 return;
2572
2573         x86_pmu.apic = 0;
2574         pr_info("no APIC, boot with the \"lapic\" boot parameter to force-enable it.\n");
2575         pr_info("no hardware sampling interrupt available.\n");
2576 }
2577
2578 void __init init_hw_perf_events(void)
2579 {
2580         int err;
2581
2582         pr_info("Performance Events: ");
2583
2584         switch (boot_cpu_data.x86_vendor) {
2585         case X86_VENDOR_INTEL:
2586                 err = intel_pmu_init();
2587                 break;
2588         case X86_VENDOR_AMD:
2589                 err = amd_pmu_init();
2590                 break;
2591         default:
2592                 return;
2593         }
2594         if (err != 0) {
2595                 pr_cont("no PMU driver, software events only.\n");
2596                 return;
2597         }
2598
2599         pmu_check_apic();
2600
2601         pr_cont("%s PMU driver.\n", x86_pmu.name);
2602
2603         if (x86_pmu.num_events > X86_PMC_MAX_GENERIC) {
2604                 WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!",
2605                      x86_pmu.num_events, X86_PMC_MAX_GENERIC);
2606                 x86_pmu.num_events = X86_PMC_MAX_GENERIC;
2607         }
2608         perf_event_mask = (1 << x86_pmu.num_events) - 1;
2609         perf_max_events = x86_pmu.num_events;
2610
2611         if (x86_pmu.num_events_fixed > X86_PMC_MAX_FIXED) {
2612                 WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!",
2613                      x86_pmu.num_events_fixed, X86_PMC_MAX_FIXED);
2614                 x86_pmu.num_events_fixed = X86_PMC_MAX_FIXED;
2615         }
2616
2617         perf_event_mask |=
2618                 ((1LL << x86_pmu.num_events_fixed)-1) << X86_PMC_IDX_FIXED;
2619         x86_pmu.intel_ctrl = perf_event_mask;
2620
2621         perf_events_lapic_init();
2622         register_die_notifier(&perf_event_nmi_notifier);
2623
2624         unconstrained = (struct event_constraint)
2625                 EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_events) - 1, 0);
2626
2627         pr_info("... version:                %d\n",     x86_pmu.version);
2628         pr_info("... bit width:              %d\n",     x86_pmu.event_bits);
2629         pr_info("... generic registers:      %d\n",     x86_pmu.num_events);
2630         pr_info("... value mask:             %016Lx\n", x86_pmu.event_mask);
2631         pr_info("... max period:             %016Lx\n", x86_pmu.max_period);
2632         pr_info("... fixed-purpose events:   %d\n",     x86_pmu.num_events_fixed);
2633         pr_info("... event mask:             %016Lx\n", perf_event_mask);
2634 }
2635
2636 static inline void x86_pmu_read(struct perf_event *event)
2637 {
2638         x86_perf_event_update(event, &event->hw, event->hw.idx);
2639 }
2640
2641 static const struct pmu pmu = {
2642         .enable         = x86_pmu_enable,
2643         .disable        = x86_pmu_disable,
2644         .read           = x86_pmu_read,
2645         .unthrottle     = x86_pmu_unthrottle,
2646 };
2647
2648 /*
2649  * validate a single event group
2650  *
2651  * validation include:
2652  *      - check events are compatible which each other
2653  *      - events do not compete for the same counter
2654  *      - number of events <= number of counters
2655  *
2656  * validation ensures the group can be loaded onto the
2657  * PMU if it was the only group available.
2658  */
2659 static int validate_group(struct perf_event *event)
2660 {
2661         struct perf_event *leader = event->group_leader;
2662         struct cpu_hw_events *fake_cpuc;
2663         int ret, n;
2664
2665         ret = -ENOMEM;
2666         fake_cpuc = kmalloc(sizeof(*fake_cpuc), GFP_KERNEL | __GFP_ZERO);
2667         if (!fake_cpuc)
2668                 goto out;
2669
2670         /*
2671          * the event is not yet connected with its
2672          * siblings therefore we must first collect
2673          * existing siblings, then add the new event
2674          * before we can simulate the scheduling
2675          */
2676         ret = -ENOSPC;
2677         n = collect_events(fake_cpuc, leader, true);
2678         if (n < 0)
2679                 goto out_free;
2680
2681         fake_cpuc->n_events = n;
2682         n = collect_events(fake_cpuc, event, false);
2683         if (n < 0)
2684                 goto out_free;
2685
2686         fake_cpuc->n_events = n;
2687
2688         ret = x86_schedule_events(fake_cpuc, n, NULL);
2689
2690 out_free:
2691         kfree(fake_cpuc);
2692 out:
2693         return ret;
2694 }
2695
2696 const struct pmu *hw_perf_event_init(struct perf_event *event)
2697 {
2698         const struct pmu *tmp;
2699         int err;
2700
2701         err = __hw_perf_event_init(event);
2702         if (!err) {
2703                 /*
2704                  * we temporarily connect event to its pmu
2705                  * such that validate_group() can classify
2706                  * it as an x86 event using is_x86_event()
2707                  */
2708                 tmp = event->pmu;
2709                 event->pmu = &pmu;
2710
2711                 if (event->group_leader != event)
2712                         err = validate_group(event);
2713
2714                 event->pmu = tmp;
2715         }
2716         if (err) {
2717                 if (event->destroy)
2718                         event->destroy(event);
2719                 return ERR_PTR(err);
2720         }
2721
2722         return &pmu;
2723 }
2724
2725 /*
2726  * callchain support
2727  */
2728
2729 static inline
2730 void callchain_store(struct perf_callchain_entry *entry, u64 ip)
2731 {
2732         if (entry->nr < PERF_MAX_STACK_DEPTH)
2733                 entry->ip[entry->nr++] = ip;
2734 }
2735
2736 static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry);
2737 static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_nmi_entry);
2738
2739
2740 static void
2741 backtrace_warning_symbol(void *data, char *msg, unsigned long symbol)
2742 {
2743         /* Ignore warnings */
2744 }
2745
2746 static void backtrace_warning(void *data, char *msg)
2747 {
2748         /* Ignore warnings */
2749 }
2750
2751 static int backtrace_stack(void *data, char *name)
2752 {
2753         return 0;
2754 }
2755
2756 static void backtrace_address(void *data, unsigned long addr, int reliable)
2757 {
2758         struct perf_callchain_entry *entry = data;
2759
2760         if (reliable)
2761                 callchain_store(entry, addr);
2762 }
2763
2764 static const struct stacktrace_ops backtrace_ops = {
2765         .warning                = backtrace_warning,
2766         .warning_symbol         = backtrace_warning_symbol,
2767         .stack                  = backtrace_stack,
2768         .address                = backtrace_address,
2769         .walk_stack             = print_context_stack_bp,
2770 };
2771
2772 #include "../dumpstack.h"
2773
2774 static void
2775 perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry)
2776 {
2777         callchain_store(entry, PERF_CONTEXT_KERNEL);
2778         callchain_store(entry, regs->ip);
2779
2780         dump_trace(NULL, regs, NULL, regs->bp, &backtrace_ops, entry);
2781 }
2782
2783 /*
2784  * best effort, GUP based copy_from_user() that assumes IRQ or NMI context
2785  */
2786 static unsigned long
2787 copy_from_user_nmi(void *to, const void __user *from, unsigned long n)
2788 {
2789         unsigned long offset, addr = (unsigned long)from;
2790         int type = in_nmi() ? KM_NMI : KM_IRQ0;
2791         unsigned long size, len = 0;
2792         struct page *page;
2793         void *map;
2794         int ret;
2795
2796         do {
2797                 ret = __get_user_pages_fast(addr, 1, 0, &page);
2798                 if (!ret)
2799                         break;
2800
2801                 offset = addr & (PAGE_SIZE - 1);
2802                 size = min(PAGE_SIZE - offset, n - len);
2803
2804                 map = kmap_atomic(page, type);
2805                 memcpy(to, map+offset, size);
2806                 kunmap_atomic(map, type);
2807                 put_page(page);
2808
2809                 len  += size;
2810                 to   += size;
2811                 addr += size;
2812
2813         } while (len < n);
2814
2815         return len;
2816 }
2817
2818 static int copy_stack_frame(const void __user *fp, struct stack_frame *frame)
2819 {
2820         unsigned long bytes;
2821
2822         bytes = copy_from_user_nmi(frame, fp, sizeof(*frame));
2823
2824         return bytes == sizeof(*frame);
2825 }
2826
2827 static void
2828 perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry)
2829 {
2830         struct stack_frame frame;
2831         const void __user *fp;
2832
2833         if (!user_mode(regs))
2834                 regs = task_pt_regs(current);
2835
2836         fp = (void __user *)regs->bp;
2837
2838         callchain_store(entry, PERF_CONTEXT_USER);
2839         callchain_store(entry, regs->ip);
2840
2841         while (entry->nr < PERF_MAX_STACK_DEPTH) {
2842                 frame.next_frame             = NULL;
2843                 frame.return_address = 0;
2844
2845                 if (!copy_stack_frame(fp, &frame))
2846                         break;
2847
2848                 if ((unsigned long)fp < regs->sp)
2849                         break;
2850
2851                 callchain_store(entry, frame.return_address);
2852                 fp = frame.next_frame;
2853         }
2854 }
2855
2856 static void
2857 perf_do_callchain(struct pt_regs *regs, struct perf_callchain_entry *entry)
2858 {
2859         int is_user;
2860
2861         if (!regs)
2862                 return;
2863
2864         is_user = user_mode(regs);
2865
2866         if (is_user && current->state != TASK_RUNNING)
2867                 return;
2868
2869         if (!is_user)
2870                 perf_callchain_kernel(regs, entry);
2871
2872         if (current->mm)
2873                 perf_callchain_user(regs, entry);
2874 }
2875
2876 struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
2877 {
2878         struct perf_callchain_entry *entry;
2879
2880         if (in_nmi())
2881                 entry = &__get_cpu_var(pmc_nmi_entry);
2882         else
2883                 entry = &__get_cpu_var(pmc_irq_entry);
2884
2885         entry->nr = 0;
2886
2887         perf_do_callchain(regs, entry);
2888
2889         return entry;
2890 }
2891
2892 void hw_perf_event_setup_online(int cpu)
2893 {
2894         init_debug_store_on_cpu(cpu);
2895 }