perf_events: Add event constraints support for Intel processors
[linux-2.6.git] / arch / x86 / kernel / cpu / perf_event.c
1 /*
2  * Performance events x86 architecture code
3  *
4  *  Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
5  *  Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar
6  *  Copyright (C) 2009 Jaswinder Singh Rajput
7  *  Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter
8  *  Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
9  *  Copyright (C) 2009 Intel Corporation, <markus.t.metzger@intel.com>
10  *
11  *  For licencing details see kernel-base/COPYING
12  */
13
14 #include <linux/perf_event.h>
15 #include <linux/capability.h>
16 #include <linux/notifier.h>
17 #include <linux/hardirq.h>
18 #include <linux/kprobes.h>
19 #include <linux/module.h>
20 #include <linux/kdebug.h>
21 #include <linux/sched.h>
22 #include <linux/uaccess.h>
23 #include <linux/highmem.h>
24 #include <linux/cpu.h>
25
26 #include <asm/apic.h>
27 #include <asm/stacktrace.h>
28 #include <asm/nmi.h>
29
30 static u64 perf_event_mask __read_mostly;
31
32 /* The maximal number of PEBS events: */
33 #define MAX_PEBS_EVENTS 4
34
35 /* The size of a BTS record in bytes: */
36 #define BTS_RECORD_SIZE         24
37
38 /* The size of a per-cpu BTS buffer in bytes: */
39 #define BTS_BUFFER_SIZE         (BTS_RECORD_SIZE * 2048)
40
41 /* The BTS overflow threshold in bytes from the end of the buffer: */
42 #define BTS_OVFL_TH             (BTS_RECORD_SIZE * 128)
43
44
45 /*
46  * Bits in the debugctlmsr controlling branch tracing.
47  */
48 #define X86_DEBUGCTL_TR                 (1 << 6)
49 #define X86_DEBUGCTL_BTS                (1 << 7)
50 #define X86_DEBUGCTL_BTINT              (1 << 8)
51 #define X86_DEBUGCTL_BTS_OFF_OS         (1 << 9)
52 #define X86_DEBUGCTL_BTS_OFF_USR        (1 << 10)
53
54 /*
55  * A debug store configuration.
56  *
57  * We only support architectures that use 64bit fields.
58  */
59 struct debug_store {
60         u64     bts_buffer_base;
61         u64     bts_index;
62         u64     bts_absolute_maximum;
63         u64     bts_interrupt_threshold;
64         u64     pebs_buffer_base;
65         u64     pebs_index;
66         u64     pebs_absolute_maximum;
67         u64     pebs_interrupt_threshold;
68         u64     pebs_event_reset[MAX_PEBS_EVENTS];
69 };
70
71 struct cpu_hw_events {
72         struct perf_event       *events[X86_PMC_IDX_MAX];
73         unsigned long           used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
74         unsigned long           active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
75         unsigned long           interrupts;
76         int                     enabled;
77         struct debug_store      *ds;
78 };
79
80 struct event_constraint {
81         unsigned long   idxmsk[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
82         int             code;
83 };
84
85 #define EVENT_CONSTRAINT(c, m) { .code = (c), .idxmsk[0] = (m) }
86 #define EVENT_CONSTRAINT_END  { .code = 0, .idxmsk[0] = 0 }
87
88 #define for_each_event_constraint(e, c) \
89         for ((e) = (c); (e)->idxmsk[0]; (e)++)
90
91
92 /*
93  * struct x86_pmu - generic x86 pmu
94  */
95 struct x86_pmu {
96         const char      *name;
97         int             version;
98         int             (*handle_irq)(struct pt_regs *);
99         void            (*disable_all)(void);
100         void            (*enable_all)(void);
101         void            (*enable)(struct hw_perf_event *, int);
102         void            (*disable)(struct hw_perf_event *, int);
103         unsigned        eventsel;
104         unsigned        perfctr;
105         u64             (*event_map)(int);
106         u64             (*raw_event)(u64);
107         int             max_events;
108         int             num_events;
109         int             num_events_fixed;
110         int             event_bits;
111         u64             event_mask;
112         int             apic;
113         u64             max_period;
114         u64             intel_ctrl;
115         void            (*enable_bts)(u64 config);
116         void            (*disable_bts)(void);
117         int             (*get_event_idx)(struct hw_perf_event *hwc);
118 };
119
120 static struct x86_pmu x86_pmu __read_mostly;
121
122 static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = {
123         .enabled = 1,
124 };
125
126 static const struct event_constraint *event_constraint;
127
128 /*
129  * Not sure about some of these
130  */
131 static const u64 p6_perfmon_event_map[] =
132 {
133   [PERF_COUNT_HW_CPU_CYCLES]            = 0x0079,
134   [PERF_COUNT_HW_INSTRUCTIONS]          = 0x00c0,
135   [PERF_COUNT_HW_CACHE_REFERENCES]      = 0x0f2e,
136   [PERF_COUNT_HW_CACHE_MISSES]          = 0x012e,
137   [PERF_COUNT_HW_BRANCH_INSTRUCTIONS]   = 0x00c4,
138   [PERF_COUNT_HW_BRANCH_MISSES]         = 0x00c5,
139   [PERF_COUNT_HW_BUS_CYCLES]            = 0x0062,
140 };
141
142 static u64 p6_pmu_event_map(int hw_event)
143 {
144         return p6_perfmon_event_map[hw_event];
145 }
146
147 /*
148  * Event setting that is specified not to count anything.
149  * We use this to effectively disable a counter.
150  *
151  * L2_RQSTS with 0 MESI unit mask.
152  */
153 #define P6_NOP_EVENT                    0x0000002EULL
154
155 static u64 p6_pmu_raw_event(u64 hw_event)
156 {
157 #define P6_EVNTSEL_EVENT_MASK           0x000000FFULL
158 #define P6_EVNTSEL_UNIT_MASK            0x0000FF00ULL
159 #define P6_EVNTSEL_EDGE_MASK            0x00040000ULL
160 #define P6_EVNTSEL_INV_MASK             0x00800000ULL
161 #define P6_EVNTSEL_REG_MASK             0xFF000000ULL
162
163 #define P6_EVNTSEL_MASK                 \
164         (P6_EVNTSEL_EVENT_MASK |        \
165          P6_EVNTSEL_UNIT_MASK  |        \
166          P6_EVNTSEL_EDGE_MASK  |        \
167          P6_EVNTSEL_INV_MASK   |        \
168          P6_EVNTSEL_REG_MASK)
169
170         return hw_event & P6_EVNTSEL_MASK;
171 }
172
173 static const struct event_constraint intel_p6_event_constraints[] =
174 {
175         EVENT_CONSTRAINT(0xc1, 0x1),    /* FLOPS */
176         EVENT_CONSTRAINT(0x10, 0x1),    /* FP_COMP_OPS_EXE */
177         EVENT_CONSTRAINT(0x11, 0x1),    /* FP_ASSIST */
178         EVENT_CONSTRAINT(0x12, 0x2),    /* MUL */
179         EVENT_CONSTRAINT(0x13, 0x2),    /* DIV */
180         EVENT_CONSTRAINT(0x14, 0x1),    /* CYCLES_DIV_BUSY */
181         EVENT_CONSTRAINT_END
182 };
183
184 /*
185  * Intel PerfMon v3. Used on Core2 and later.
186  */
187 static const u64 intel_perfmon_event_map[] =
188 {
189   [PERF_COUNT_HW_CPU_CYCLES]            = 0x003c,
190   [PERF_COUNT_HW_INSTRUCTIONS]          = 0x00c0,
191   [PERF_COUNT_HW_CACHE_REFERENCES]      = 0x4f2e,
192   [PERF_COUNT_HW_CACHE_MISSES]          = 0x412e,
193   [PERF_COUNT_HW_BRANCH_INSTRUCTIONS]   = 0x00c4,
194   [PERF_COUNT_HW_BRANCH_MISSES]         = 0x00c5,
195   [PERF_COUNT_HW_BUS_CYCLES]            = 0x013c,
196 };
197
198 static const struct event_constraint intel_core_event_constraints[] =
199 {
200         EVENT_CONSTRAINT(0x10, 0x1),    /* FP_COMP_OPS_EXE */
201         EVENT_CONSTRAINT(0x11, 0x2),    /* FP_ASSIST */
202         EVENT_CONSTRAINT(0x12, 0x2),    /* MUL */
203         EVENT_CONSTRAINT(0x13, 0x2),    /* DIV */
204         EVENT_CONSTRAINT(0x14, 0x1),    /* CYCLES_DIV_BUSY */
205         EVENT_CONSTRAINT(0x18, 0x1),    /* IDLE_DURING_DIV */
206         EVENT_CONSTRAINT(0x19, 0x2),    /* DELAYED_BYPASS */
207         EVENT_CONSTRAINT(0xa1, 0x1),    /* RS_UOPS_DISPATCH_CYCLES */
208         EVENT_CONSTRAINT(0xcb, 0x1),    /* MEM_LOAD_RETIRED */
209         EVENT_CONSTRAINT_END
210 };
211
212 static const struct event_constraint intel_nehalem_event_constraints[] =
213 {
214         EVENT_CONSTRAINT(0x40, 0x3),    /* L1D_CACHE_LD */
215         EVENT_CONSTRAINT(0x41, 0x3),    /* L1D_CACHE_ST */
216         EVENT_CONSTRAINT(0x42, 0x3),    /* L1D_CACHE_LOCK */
217         EVENT_CONSTRAINT(0x43, 0x3),    /* L1D_ALL_REF */
218         EVENT_CONSTRAINT(0x4e, 0x3),    /* L1D_PREFETCH */
219         EVENT_CONSTRAINT(0x4c, 0x3),    /* LOAD_HIT_PRE */
220         EVENT_CONSTRAINT(0x51, 0x3),    /* L1D */
221         EVENT_CONSTRAINT(0x52, 0x3),    /* L1D_CACHE_PREFETCH_LOCK_FB_HIT */
222         EVENT_CONSTRAINT(0x53, 0x3),    /* L1D_CACHE_LOCK_FB_HIT */
223         EVENT_CONSTRAINT(0xc5, 0x3),    /* CACHE_LOCK_CYCLES */
224         EVENT_CONSTRAINT_END
225 };
226
227 static u64 intel_pmu_event_map(int hw_event)
228 {
229         return intel_perfmon_event_map[hw_event];
230 }
231
232 /*
233  * Generalized hw caching related hw_event table, filled
234  * in on a per model basis. A value of 0 means
235  * 'not supported', -1 means 'hw_event makes no sense on
236  * this CPU', any other value means the raw hw_event
237  * ID.
238  */
239
240 #define C(x) PERF_COUNT_HW_CACHE_##x
241
242 static u64 __read_mostly hw_cache_event_ids
243                                 [PERF_COUNT_HW_CACHE_MAX]
244                                 [PERF_COUNT_HW_CACHE_OP_MAX]
245                                 [PERF_COUNT_HW_CACHE_RESULT_MAX];
246
247 static const u64 nehalem_hw_cache_event_ids
248                                 [PERF_COUNT_HW_CACHE_MAX]
249                                 [PERF_COUNT_HW_CACHE_OP_MAX]
250                                 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
251 {
252  [ C(L1D) ] = {
253         [ C(OP_READ) ] = {
254                 [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI            */
255                 [ C(RESULT_MISS)   ] = 0x0140, /* L1D_CACHE_LD.I_STATE         */
256         },
257         [ C(OP_WRITE) ] = {
258                 [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI            */
259                 [ C(RESULT_MISS)   ] = 0x0141, /* L1D_CACHE_ST.I_STATE         */
260         },
261         [ C(OP_PREFETCH) ] = {
262                 [ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS        */
263                 [ C(RESULT_MISS)   ] = 0x024e, /* L1D_PREFETCH.MISS            */
264         },
265  },
266  [ C(L1I ) ] = {
267         [ C(OP_READ) ] = {
268                 [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS                    */
269                 [ C(RESULT_MISS)   ] = 0x0280, /* L1I.MISSES                   */
270         },
271         [ C(OP_WRITE) ] = {
272                 [ C(RESULT_ACCESS) ] = -1,
273                 [ C(RESULT_MISS)   ] = -1,
274         },
275         [ C(OP_PREFETCH) ] = {
276                 [ C(RESULT_ACCESS) ] = 0x0,
277                 [ C(RESULT_MISS)   ] = 0x0,
278         },
279  },
280  [ C(LL  ) ] = {
281         [ C(OP_READ) ] = {
282                 [ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS               */
283                 [ C(RESULT_MISS)   ] = 0x0224, /* L2_RQSTS.LD_MISS             */
284         },
285         [ C(OP_WRITE) ] = {
286                 [ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS                */
287                 [ C(RESULT_MISS)   ] = 0x0824, /* L2_RQSTS.RFO_MISS            */
288         },
289         [ C(OP_PREFETCH) ] = {
290                 [ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference                */
291                 [ C(RESULT_MISS)   ] = 0x412e, /* LLC Misses                   */
292         },
293  },
294  [ C(DTLB) ] = {
295         [ C(OP_READ) ] = {
296                 [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI   (alias)  */
297                 [ C(RESULT_MISS)   ] = 0x0108, /* DTLB_LOAD_MISSES.ANY         */
298         },
299         [ C(OP_WRITE) ] = {
300                 [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI   (alias)  */
301                 [ C(RESULT_MISS)   ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS  */
302         },
303         [ C(OP_PREFETCH) ] = {
304                 [ C(RESULT_ACCESS) ] = 0x0,
305                 [ C(RESULT_MISS)   ] = 0x0,
306         },
307  },
308  [ C(ITLB) ] = {
309         [ C(OP_READ) ] = {
310                 [ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P           */
311                 [ C(RESULT_MISS)   ] = 0x20c8, /* ITLB_MISS_RETIRED            */
312         },
313         [ C(OP_WRITE) ] = {
314                 [ C(RESULT_ACCESS) ] = -1,
315                 [ C(RESULT_MISS)   ] = -1,
316         },
317         [ C(OP_PREFETCH) ] = {
318                 [ C(RESULT_ACCESS) ] = -1,
319                 [ C(RESULT_MISS)   ] = -1,
320         },
321  },
322  [ C(BPU ) ] = {
323         [ C(OP_READ) ] = {
324                 [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */
325                 [ C(RESULT_MISS)   ] = 0x03e8, /* BPU_CLEARS.ANY               */
326         },
327         [ C(OP_WRITE) ] = {
328                 [ C(RESULT_ACCESS) ] = -1,
329                 [ C(RESULT_MISS)   ] = -1,
330         },
331         [ C(OP_PREFETCH) ] = {
332                 [ C(RESULT_ACCESS) ] = -1,
333                 [ C(RESULT_MISS)   ] = -1,
334         },
335  },
336 };
337
338 static const u64 core2_hw_cache_event_ids
339                                 [PERF_COUNT_HW_CACHE_MAX]
340                                 [PERF_COUNT_HW_CACHE_OP_MAX]
341                                 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
342 {
343  [ C(L1D) ] = {
344         [ C(OP_READ) ] = {
345                 [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI          */
346                 [ C(RESULT_MISS)   ] = 0x0140, /* L1D_CACHE_LD.I_STATE       */
347         },
348         [ C(OP_WRITE) ] = {
349                 [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI          */
350                 [ C(RESULT_MISS)   ] = 0x0141, /* L1D_CACHE_ST.I_STATE       */
351         },
352         [ C(OP_PREFETCH) ] = {
353                 [ C(RESULT_ACCESS) ] = 0x104e, /* L1D_PREFETCH.REQUESTS      */
354                 [ C(RESULT_MISS)   ] = 0,
355         },
356  },
357  [ C(L1I ) ] = {
358         [ C(OP_READ) ] = {
359                 [ C(RESULT_ACCESS) ] = 0x0080, /* L1I.READS                  */
360                 [ C(RESULT_MISS)   ] = 0x0081, /* L1I.MISSES                 */
361         },
362         [ C(OP_WRITE) ] = {
363                 [ C(RESULT_ACCESS) ] = -1,
364                 [ C(RESULT_MISS)   ] = -1,
365         },
366         [ C(OP_PREFETCH) ] = {
367                 [ C(RESULT_ACCESS) ] = 0,
368                 [ C(RESULT_MISS)   ] = 0,
369         },
370  },
371  [ C(LL  ) ] = {
372         [ C(OP_READ) ] = {
373                 [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI                 */
374                 [ C(RESULT_MISS)   ] = 0x4129, /* L2_LD.ISTATE               */
375         },
376         [ C(OP_WRITE) ] = {
377                 [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI                 */
378                 [ C(RESULT_MISS)   ] = 0x412A, /* L2_ST.ISTATE               */
379         },
380         [ C(OP_PREFETCH) ] = {
381                 [ C(RESULT_ACCESS) ] = 0,
382                 [ C(RESULT_MISS)   ] = 0,
383         },
384  },
385  [ C(DTLB) ] = {
386         [ C(OP_READ) ] = {
387                 [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI  (alias) */
388                 [ C(RESULT_MISS)   ] = 0x0208, /* DTLB_MISSES.MISS_LD        */
389         },
390         [ C(OP_WRITE) ] = {
391                 [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI  (alias) */
392                 [ C(RESULT_MISS)   ] = 0x0808, /* DTLB_MISSES.MISS_ST        */
393         },
394         [ C(OP_PREFETCH) ] = {
395                 [ C(RESULT_ACCESS) ] = 0,
396                 [ C(RESULT_MISS)   ] = 0,
397         },
398  },
399  [ C(ITLB) ] = {
400         [ C(OP_READ) ] = {
401                 [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P         */
402                 [ C(RESULT_MISS)   ] = 0x1282, /* ITLBMISSES                 */
403         },
404         [ C(OP_WRITE) ] = {
405                 [ C(RESULT_ACCESS) ] = -1,
406                 [ C(RESULT_MISS)   ] = -1,
407         },
408         [ C(OP_PREFETCH) ] = {
409                 [ C(RESULT_ACCESS) ] = -1,
410                 [ C(RESULT_MISS)   ] = -1,
411         },
412  },
413  [ C(BPU ) ] = {
414         [ C(OP_READ) ] = {
415                 [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY        */
416                 [ C(RESULT_MISS)   ] = 0x00c5, /* BP_INST_RETIRED.MISPRED    */
417         },
418         [ C(OP_WRITE) ] = {
419                 [ C(RESULT_ACCESS) ] = -1,
420                 [ C(RESULT_MISS)   ] = -1,
421         },
422         [ C(OP_PREFETCH) ] = {
423                 [ C(RESULT_ACCESS) ] = -1,
424                 [ C(RESULT_MISS)   ] = -1,
425         },
426  },
427 };
428
429 static const u64 atom_hw_cache_event_ids
430                                 [PERF_COUNT_HW_CACHE_MAX]
431                                 [PERF_COUNT_HW_CACHE_OP_MAX]
432                                 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
433 {
434  [ C(L1D) ] = {
435         [ C(OP_READ) ] = {
436                 [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE.LD               */
437                 [ C(RESULT_MISS)   ] = 0,
438         },
439         [ C(OP_WRITE) ] = {
440                 [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE.ST               */
441                 [ C(RESULT_MISS)   ] = 0,
442         },
443         [ C(OP_PREFETCH) ] = {
444                 [ C(RESULT_ACCESS) ] = 0x0,
445                 [ C(RESULT_MISS)   ] = 0,
446         },
447  },
448  [ C(L1I ) ] = {
449         [ C(OP_READ) ] = {
450                 [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS                  */
451                 [ C(RESULT_MISS)   ] = 0x0280, /* L1I.MISSES                 */
452         },
453         [ C(OP_WRITE) ] = {
454                 [ C(RESULT_ACCESS) ] = -1,
455                 [ C(RESULT_MISS)   ] = -1,
456         },
457         [ C(OP_PREFETCH) ] = {
458                 [ C(RESULT_ACCESS) ] = 0,
459                 [ C(RESULT_MISS)   ] = 0,
460         },
461  },
462  [ C(LL  ) ] = {
463         [ C(OP_READ) ] = {
464                 [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI                 */
465                 [ C(RESULT_MISS)   ] = 0x4129, /* L2_LD.ISTATE               */
466         },
467         [ C(OP_WRITE) ] = {
468                 [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI                 */
469                 [ C(RESULT_MISS)   ] = 0x412A, /* L2_ST.ISTATE               */
470         },
471         [ C(OP_PREFETCH) ] = {
472                 [ C(RESULT_ACCESS) ] = 0,
473                 [ C(RESULT_MISS)   ] = 0,
474         },
475  },
476  [ C(DTLB) ] = {
477         [ C(OP_READ) ] = {
478                 [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE_LD.MESI  (alias) */
479                 [ C(RESULT_MISS)   ] = 0x0508, /* DTLB_MISSES.MISS_LD        */
480         },
481         [ C(OP_WRITE) ] = {
482                 [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE_ST.MESI  (alias) */
483                 [ C(RESULT_MISS)   ] = 0x0608, /* DTLB_MISSES.MISS_ST        */
484         },
485         [ C(OP_PREFETCH) ] = {
486                 [ C(RESULT_ACCESS) ] = 0,
487                 [ C(RESULT_MISS)   ] = 0,
488         },
489  },
490  [ C(ITLB) ] = {
491         [ C(OP_READ) ] = {
492                 [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P         */
493                 [ C(RESULT_MISS)   ] = 0x0282, /* ITLB.MISSES                */
494         },
495         [ C(OP_WRITE) ] = {
496                 [ C(RESULT_ACCESS) ] = -1,
497                 [ C(RESULT_MISS)   ] = -1,
498         },
499         [ C(OP_PREFETCH) ] = {
500                 [ C(RESULT_ACCESS) ] = -1,
501                 [ C(RESULT_MISS)   ] = -1,
502         },
503  },
504  [ C(BPU ) ] = {
505         [ C(OP_READ) ] = {
506                 [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY        */
507                 [ C(RESULT_MISS)   ] = 0x00c5, /* BP_INST_RETIRED.MISPRED    */
508         },
509         [ C(OP_WRITE) ] = {
510                 [ C(RESULT_ACCESS) ] = -1,
511                 [ C(RESULT_MISS)   ] = -1,
512         },
513         [ C(OP_PREFETCH) ] = {
514                 [ C(RESULT_ACCESS) ] = -1,
515                 [ C(RESULT_MISS)   ] = -1,
516         },
517  },
518 };
519
520 static u64 intel_pmu_raw_event(u64 hw_event)
521 {
522 #define CORE_EVNTSEL_EVENT_MASK         0x000000FFULL
523 #define CORE_EVNTSEL_UNIT_MASK          0x0000FF00ULL
524 #define CORE_EVNTSEL_EDGE_MASK          0x00040000ULL
525 #define CORE_EVNTSEL_INV_MASK           0x00800000ULL
526 #define CORE_EVNTSEL_REG_MASK   0xFF000000ULL
527
528 #define CORE_EVNTSEL_MASK               \
529         (CORE_EVNTSEL_EVENT_MASK |      \
530          CORE_EVNTSEL_UNIT_MASK  |      \
531          CORE_EVNTSEL_EDGE_MASK  |      \
532          CORE_EVNTSEL_INV_MASK  |       \
533          CORE_EVNTSEL_REG_MASK)
534
535         return hw_event & CORE_EVNTSEL_MASK;
536 }
537
538 static const u64 amd_hw_cache_event_ids
539                                 [PERF_COUNT_HW_CACHE_MAX]
540                                 [PERF_COUNT_HW_CACHE_OP_MAX]
541                                 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
542 {
543  [ C(L1D) ] = {
544         [ C(OP_READ) ] = {
545                 [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses        */
546                 [ C(RESULT_MISS)   ] = 0x0041, /* Data Cache Misses          */
547         },
548         [ C(OP_WRITE) ] = {
549                 [ C(RESULT_ACCESS) ] = 0x0142, /* Data Cache Refills :system */
550                 [ C(RESULT_MISS)   ] = 0,
551         },
552         [ C(OP_PREFETCH) ] = {
553                 [ C(RESULT_ACCESS) ] = 0x0267, /* Data Prefetcher :attempts  */
554                 [ C(RESULT_MISS)   ] = 0x0167, /* Data Prefetcher :cancelled */
555         },
556  },
557  [ C(L1I ) ] = {
558         [ C(OP_READ) ] = {
559                 [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction cache fetches  */
560                 [ C(RESULT_MISS)   ] = 0x0081, /* Instruction cache misses   */
561         },
562         [ C(OP_WRITE) ] = {
563                 [ C(RESULT_ACCESS) ] = -1,
564                 [ C(RESULT_MISS)   ] = -1,
565         },
566         [ C(OP_PREFETCH) ] = {
567                 [ C(RESULT_ACCESS) ] = 0x014B, /* Prefetch Instructions :Load */
568                 [ C(RESULT_MISS)   ] = 0,
569         },
570  },
571  [ C(LL  ) ] = {
572         [ C(OP_READ) ] = {
573                 [ C(RESULT_ACCESS) ] = 0x037D, /* Requests to L2 Cache :IC+DC */
574                 [ C(RESULT_MISS)   ] = 0x037E, /* L2 Cache Misses : IC+DC     */
575         },
576         [ C(OP_WRITE) ] = {
577                 [ C(RESULT_ACCESS) ] = 0x017F, /* L2 Fill/Writeback           */
578                 [ C(RESULT_MISS)   ] = 0,
579         },
580         [ C(OP_PREFETCH) ] = {
581                 [ C(RESULT_ACCESS) ] = 0,
582                 [ C(RESULT_MISS)   ] = 0,
583         },
584  },
585  [ C(DTLB) ] = {
586         [ C(OP_READ) ] = {
587                 [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses        */
588                 [ C(RESULT_MISS)   ] = 0x0046, /* L1 DTLB and L2 DLTB Miss   */
589         },
590         [ C(OP_WRITE) ] = {
591                 [ C(RESULT_ACCESS) ] = 0,
592                 [ C(RESULT_MISS)   ] = 0,
593         },
594         [ C(OP_PREFETCH) ] = {
595                 [ C(RESULT_ACCESS) ] = 0,
596                 [ C(RESULT_MISS)   ] = 0,
597         },
598  },
599  [ C(ITLB) ] = {
600         [ C(OP_READ) ] = {
601                 [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction fecthes        */
602                 [ C(RESULT_MISS)   ] = 0x0085, /* Instr. fetch ITLB misses   */
603         },
604         [ C(OP_WRITE) ] = {
605                 [ C(RESULT_ACCESS) ] = -1,
606                 [ C(RESULT_MISS)   ] = -1,
607         },
608         [ C(OP_PREFETCH) ] = {
609                 [ C(RESULT_ACCESS) ] = -1,
610                 [ C(RESULT_MISS)   ] = -1,
611         },
612  },
613  [ C(BPU ) ] = {
614         [ C(OP_READ) ] = {
615                 [ C(RESULT_ACCESS) ] = 0x00c2, /* Retired Branch Instr.      */
616                 [ C(RESULT_MISS)   ] = 0x00c3, /* Retired Mispredicted BI    */
617         },
618         [ C(OP_WRITE) ] = {
619                 [ C(RESULT_ACCESS) ] = -1,
620                 [ C(RESULT_MISS)   ] = -1,
621         },
622         [ C(OP_PREFETCH) ] = {
623                 [ C(RESULT_ACCESS) ] = -1,
624                 [ C(RESULT_MISS)   ] = -1,
625         },
626  },
627 };
628
629 /*
630  * AMD Performance Monitor K7 and later.
631  */
632 static const u64 amd_perfmon_event_map[] =
633 {
634   [PERF_COUNT_HW_CPU_CYCLES]            = 0x0076,
635   [PERF_COUNT_HW_INSTRUCTIONS]          = 0x00c0,
636   [PERF_COUNT_HW_CACHE_REFERENCES]      = 0x0080,
637   [PERF_COUNT_HW_CACHE_MISSES]          = 0x0081,
638   [PERF_COUNT_HW_BRANCH_INSTRUCTIONS]   = 0x00c4,
639   [PERF_COUNT_HW_BRANCH_MISSES]         = 0x00c5,
640 };
641
642 static u64 amd_pmu_event_map(int hw_event)
643 {
644         return amd_perfmon_event_map[hw_event];
645 }
646
647 static u64 amd_pmu_raw_event(u64 hw_event)
648 {
649 #define K7_EVNTSEL_EVENT_MASK   0x7000000FFULL
650 #define K7_EVNTSEL_UNIT_MASK    0x00000FF00ULL
651 #define K7_EVNTSEL_EDGE_MASK    0x000040000ULL
652 #define K7_EVNTSEL_INV_MASK     0x000800000ULL
653 #define K7_EVNTSEL_REG_MASK     0x0FF000000ULL
654
655 #define K7_EVNTSEL_MASK                 \
656         (K7_EVNTSEL_EVENT_MASK |        \
657          K7_EVNTSEL_UNIT_MASK  |        \
658          K7_EVNTSEL_EDGE_MASK  |        \
659          K7_EVNTSEL_INV_MASK   |        \
660          K7_EVNTSEL_REG_MASK)
661
662         return hw_event & K7_EVNTSEL_MASK;
663 }
664
665 /*
666  * Propagate event elapsed time into the generic event.
667  * Can only be executed on the CPU where the event is active.
668  * Returns the delta events processed.
669  */
670 static u64
671 x86_perf_event_update(struct perf_event *event,
672                         struct hw_perf_event *hwc, int idx)
673 {
674         int shift = 64 - x86_pmu.event_bits;
675         u64 prev_raw_count, new_raw_count;
676         s64 delta;
677
678         if (idx == X86_PMC_IDX_FIXED_BTS)
679                 return 0;
680
681         /*
682          * Careful: an NMI might modify the previous event value.
683          *
684          * Our tactic to handle this is to first atomically read and
685          * exchange a new raw count - then add that new-prev delta
686          * count to the generic event atomically:
687          */
688 again:
689         prev_raw_count = atomic64_read(&hwc->prev_count);
690         rdmsrl(hwc->event_base + idx, new_raw_count);
691
692         if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count,
693                                         new_raw_count) != prev_raw_count)
694                 goto again;
695
696         /*
697          * Now we have the new raw value and have updated the prev
698          * timestamp already. We can now calculate the elapsed delta
699          * (event-)time and add that to the generic event.
700          *
701          * Careful, not all hw sign-extends above the physical width
702          * of the count.
703          */
704         delta = (new_raw_count << shift) - (prev_raw_count << shift);
705         delta >>= shift;
706
707         atomic64_add(delta, &event->count);
708         atomic64_sub(delta, &hwc->period_left);
709
710         return new_raw_count;
711 }
712
713 static atomic_t active_events;
714 static DEFINE_MUTEX(pmc_reserve_mutex);
715
716 static bool reserve_pmc_hardware(void)
717 {
718 #ifdef CONFIG_X86_LOCAL_APIC
719         int i;
720
721         if (nmi_watchdog == NMI_LOCAL_APIC)
722                 disable_lapic_nmi_watchdog();
723
724         for (i = 0; i < x86_pmu.num_events; i++) {
725                 if (!reserve_perfctr_nmi(x86_pmu.perfctr + i))
726                         goto perfctr_fail;
727         }
728
729         for (i = 0; i < x86_pmu.num_events; i++) {
730                 if (!reserve_evntsel_nmi(x86_pmu.eventsel + i))
731                         goto eventsel_fail;
732         }
733 #endif
734
735         return true;
736
737 #ifdef CONFIG_X86_LOCAL_APIC
738 eventsel_fail:
739         for (i--; i >= 0; i--)
740                 release_evntsel_nmi(x86_pmu.eventsel + i);
741
742         i = x86_pmu.num_events;
743
744 perfctr_fail:
745         for (i--; i >= 0; i--)
746                 release_perfctr_nmi(x86_pmu.perfctr + i);
747
748         if (nmi_watchdog == NMI_LOCAL_APIC)
749                 enable_lapic_nmi_watchdog();
750
751         return false;
752 #endif
753 }
754
755 static void release_pmc_hardware(void)
756 {
757 #ifdef CONFIG_X86_LOCAL_APIC
758         int i;
759
760         for (i = 0; i < x86_pmu.num_events; i++) {
761                 release_perfctr_nmi(x86_pmu.perfctr + i);
762                 release_evntsel_nmi(x86_pmu.eventsel + i);
763         }
764
765         if (nmi_watchdog == NMI_LOCAL_APIC)
766                 enable_lapic_nmi_watchdog();
767 #endif
768 }
769
770 static inline bool bts_available(void)
771 {
772         return x86_pmu.enable_bts != NULL;
773 }
774
775 static inline void init_debug_store_on_cpu(int cpu)
776 {
777         struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
778
779         if (!ds)
780                 return;
781
782         wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA,
783                      (u32)((u64)(unsigned long)ds),
784                      (u32)((u64)(unsigned long)ds >> 32));
785 }
786
787 static inline void fini_debug_store_on_cpu(int cpu)
788 {
789         if (!per_cpu(cpu_hw_events, cpu).ds)
790                 return;
791
792         wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0);
793 }
794
795 static void release_bts_hardware(void)
796 {
797         int cpu;
798
799         if (!bts_available())
800                 return;
801
802         get_online_cpus();
803
804         for_each_online_cpu(cpu)
805                 fini_debug_store_on_cpu(cpu);
806
807         for_each_possible_cpu(cpu) {
808                 struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
809
810                 if (!ds)
811                         continue;
812
813                 per_cpu(cpu_hw_events, cpu).ds = NULL;
814
815                 kfree((void *)(unsigned long)ds->bts_buffer_base);
816                 kfree(ds);
817         }
818
819         put_online_cpus();
820 }
821
822 static int reserve_bts_hardware(void)
823 {
824         int cpu, err = 0;
825
826         if (!bts_available())
827                 return 0;
828
829         get_online_cpus();
830
831         for_each_possible_cpu(cpu) {
832                 struct debug_store *ds;
833                 void *buffer;
834
835                 err = -ENOMEM;
836                 buffer = kzalloc(BTS_BUFFER_SIZE, GFP_KERNEL);
837                 if (unlikely(!buffer))
838                         break;
839
840                 ds = kzalloc(sizeof(*ds), GFP_KERNEL);
841                 if (unlikely(!ds)) {
842                         kfree(buffer);
843                         break;
844                 }
845
846                 ds->bts_buffer_base = (u64)(unsigned long)buffer;
847                 ds->bts_index = ds->bts_buffer_base;
848                 ds->bts_absolute_maximum =
849                         ds->bts_buffer_base + BTS_BUFFER_SIZE;
850                 ds->bts_interrupt_threshold =
851                         ds->bts_absolute_maximum - BTS_OVFL_TH;
852
853                 per_cpu(cpu_hw_events, cpu).ds = ds;
854                 err = 0;
855         }
856
857         if (err)
858                 release_bts_hardware();
859         else {
860                 for_each_online_cpu(cpu)
861                         init_debug_store_on_cpu(cpu);
862         }
863
864         put_online_cpus();
865
866         return err;
867 }
868
869 static void hw_perf_event_destroy(struct perf_event *event)
870 {
871         if (atomic_dec_and_mutex_lock(&active_events, &pmc_reserve_mutex)) {
872                 release_pmc_hardware();
873                 release_bts_hardware();
874                 mutex_unlock(&pmc_reserve_mutex);
875         }
876 }
877
878 static inline int x86_pmu_initialized(void)
879 {
880         return x86_pmu.handle_irq != NULL;
881 }
882
883 static inline int
884 set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event_attr *attr)
885 {
886         unsigned int cache_type, cache_op, cache_result;
887         u64 config, val;
888
889         config = attr->config;
890
891         cache_type = (config >>  0) & 0xff;
892         if (cache_type >= PERF_COUNT_HW_CACHE_MAX)
893                 return -EINVAL;
894
895         cache_op = (config >>  8) & 0xff;
896         if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX)
897                 return -EINVAL;
898
899         cache_result = (config >> 16) & 0xff;
900         if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
901                 return -EINVAL;
902
903         val = hw_cache_event_ids[cache_type][cache_op][cache_result];
904
905         if (val == 0)
906                 return -ENOENT;
907
908         if (val == -1)
909                 return -EINVAL;
910
911         hwc->config |= val;
912
913         return 0;
914 }
915
916 static void intel_pmu_enable_bts(u64 config)
917 {
918         unsigned long debugctlmsr;
919
920         debugctlmsr = get_debugctlmsr();
921
922         debugctlmsr |= X86_DEBUGCTL_TR;
923         debugctlmsr |= X86_DEBUGCTL_BTS;
924         debugctlmsr |= X86_DEBUGCTL_BTINT;
925
926         if (!(config & ARCH_PERFMON_EVENTSEL_OS))
927                 debugctlmsr |= X86_DEBUGCTL_BTS_OFF_OS;
928
929         if (!(config & ARCH_PERFMON_EVENTSEL_USR))
930                 debugctlmsr |= X86_DEBUGCTL_BTS_OFF_USR;
931
932         update_debugctlmsr(debugctlmsr);
933 }
934
935 static void intel_pmu_disable_bts(void)
936 {
937         struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
938         unsigned long debugctlmsr;
939
940         if (!cpuc->ds)
941                 return;
942
943         debugctlmsr = get_debugctlmsr();
944
945         debugctlmsr &=
946                 ~(X86_DEBUGCTL_TR | X86_DEBUGCTL_BTS | X86_DEBUGCTL_BTINT |
947                   X86_DEBUGCTL_BTS_OFF_OS | X86_DEBUGCTL_BTS_OFF_USR);
948
949         update_debugctlmsr(debugctlmsr);
950 }
951
952 /*
953  * Setup the hardware configuration for a given attr_type
954  */
955 static int __hw_perf_event_init(struct perf_event *event)
956 {
957         struct perf_event_attr *attr = &event->attr;
958         struct hw_perf_event *hwc = &event->hw;
959         u64 config;
960         int err;
961
962         if (!x86_pmu_initialized())
963                 return -ENODEV;
964
965         err = 0;
966         if (!atomic_inc_not_zero(&active_events)) {
967                 mutex_lock(&pmc_reserve_mutex);
968                 if (atomic_read(&active_events) == 0) {
969                         if (!reserve_pmc_hardware())
970                                 err = -EBUSY;
971                         else
972                                 err = reserve_bts_hardware();
973                 }
974                 if (!err)
975                         atomic_inc(&active_events);
976                 mutex_unlock(&pmc_reserve_mutex);
977         }
978         if (err)
979                 return err;
980
981         event->destroy = hw_perf_event_destroy;
982
983         /*
984          * Generate PMC IRQs:
985          * (keep 'enabled' bit clear for now)
986          */
987         hwc->config = ARCH_PERFMON_EVENTSEL_INT;
988
989         hwc->idx = -1;
990
991         /*
992          * Count user and OS events unless requested not to.
993          */
994         if (!attr->exclude_user)
995                 hwc->config |= ARCH_PERFMON_EVENTSEL_USR;
996         if (!attr->exclude_kernel)
997                 hwc->config |= ARCH_PERFMON_EVENTSEL_OS;
998
999         if (!hwc->sample_period) {
1000                 hwc->sample_period = x86_pmu.max_period;
1001                 hwc->last_period = hwc->sample_period;
1002                 atomic64_set(&hwc->period_left, hwc->sample_period);
1003         } else {
1004                 /*
1005                  * If we have a PMU initialized but no APIC
1006                  * interrupts, we cannot sample hardware
1007                  * events (user-space has to fall back and
1008                  * sample via a hrtimer based software event):
1009                  */
1010                 if (!x86_pmu.apic)
1011                         return -EOPNOTSUPP;
1012         }
1013
1014         /*
1015          * Raw hw_event type provide the config in the hw_event structure
1016          */
1017         if (attr->type == PERF_TYPE_RAW) {
1018                 hwc->config |= x86_pmu.raw_event(attr->config);
1019                 return 0;
1020         }
1021
1022         if (attr->type == PERF_TYPE_HW_CACHE)
1023                 return set_ext_hw_attr(hwc, attr);
1024
1025         if (attr->config >= x86_pmu.max_events)
1026                 return -EINVAL;
1027
1028         /*
1029          * The generic map:
1030          */
1031         config = x86_pmu.event_map(attr->config);
1032
1033         if (config == 0)
1034                 return -ENOENT;
1035
1036         if (config == -1LL)
1037                 return -EINVAL;
1038
1039         /*
1040          * Branch tracing:
1041          */
1042         if ((attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS) &&
1043             (hwc->sample_period == 1)) {
1044                 /* BTS is not supported by this architecture. */
1045                 if (!bts_available())
1046                         return -EOPNOTSUPP;
1047
1048                 /* BTS is currently only allowed for user-mode. */
1049                 if (hwc->config & ARCH_PERFMON_EVENTSEL_OS)
1050                         return -EOPNOTSUPP;
1051         }
1052
1053         hwc->config |= config;
1054
1055         return 0;
1056 }
1057
1058 static void p6_pmu_disable_all(void)
1059 {
1060         struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1061         u64 val;
1062
1063         if (!cpuc->enabled)
1064                 return;
1065
1066         cpuc->enabled = 0;
1067         barrier();
1068
1069         /* p6 only has one enable register */
1070         rdmsrl(MSR_P6_EVNTSEL0, val);
1071         val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE;
1072         wrmsrl(MSR_P6_EVNTSEL0, val);
1073 }
1074
1075 static void intel_pmu_disable_all(void)
1076 {
1077         struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1078
1079         if (!cpuc->enabled)
1080                 return;
1081
1082         cpuc->enabled = 0;
1083         barrier();
1084
1085         wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
1086
1087         if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask))
1088                 intel_pmu_disable_bts();
1089 }
1090
1091 static void amd_pmu_disable_all(void)
1092 {
1093         struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1094         int idx;
1095
1096         if (!cpuc->enabled)
1097                 return;
1098
1099         cpuc->enabled = 0;
1100         /*
1101          * ensure we write the disable before we start disabling the
1102          * events proper, so that amd_pmu_enable_event() does the
1103          * right thing.
1104          */
1105         barrier();
1106
1107         for (idx = 0; idx < x86_pmu.num_events; idx++) {
1108                 u64 val;
1109
1110                 if (!test_bit(idx, cpuc->active_mask))
1111                         continue;
1112                 rdmsrl(MSR_K7_EVNTSEL0 + idx, val);
1113                 if (!(val & ARCH_PERFMON_EVENTSEL0_ENABLE))
1114                         continue;
1115                 val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE;
1116                 wrmsrl(MSR_K7_EVNTSEL0 + idx, val);
1117         }
1118 }
1119
1120 void hw_perf_disable(void)
1121 {
1122         if (!x86_pmu_initialized())
1123                 return;
1124         return x86_pmu.disable_all();
1125 }
1126
1127 static void p6_pmu_enable_all(void)
1128 {
1129         struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1130         unsigned long val;
1131
1132         if (cpuc->enabled)
1133                 return;
1134
1135         cpuc->enabled = 1;
1136         barrier();
1137
1138         /* p6 only has one enable register */
1139         rdmsrl(MSR_P6_EVNTSEL0, val);
1140         val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
1141         wrmsrl(MSR_P6_EVNTSEL0, val);
1142 }
1143
1144 static void intel_pmu_enable_all(void)
1145 {
1146         struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1147
1148         if (cpuc->enabled)
1149                 return;
1150
1151         cpuc->enabled = 1;
1152         barrier();
1153
1154         wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl);
1155
1156         if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) {
1157                 struct perf_event *event =
1158                         cpuc->events[X86_PMC_IDX_FIXED_BTS];
1159
1160                 if (WARN_ON_ONCE(!event))
1161                         return;
1162
1163                 intel_pmu_enable_bts(event->hw.config);
1164         }
1165 }
1166
1167 static void amd_pmu_enable_all(void)
1168 {
1169         struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1170         int idx;
1171
1172         if (cpuc->enabled)
1173                 return;
1174
1175         cpuc->enabled = 1;
1176         barrier();
1177
1178         for (idx = 0; idx < x86_pmu.num_events; idx++) {
1179                 struct perf_event *event = cpuc->events[idx];
1180                 u64 val;
1181
1182                 if (!test_bit(idx, cpuc->active_mask))
1183                         continue;
1184
1185                 val = event->hw.config;
1186                 val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
1187                 wrmsrl(MSR_K7_EVNTSEL0 + idx, val);
1188         }
1189 }
1190
1191 void hw_perf_enable(void)
1192 {
1193         if (!x86_pmu_initialized())
1194                 return;
1195         x86_pmu.enable_all();
1196 }
1197
1198 static inline u64 intel_pmu_get_status(void)
1199 {
1200         u64 status;
1201
1202         rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
1203
1204         return status;
1205 }
1206
1207 static inline void intel_pmu_ack_status(u64 ack)
1208 {
1209         wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack);
1210 }
1211
1212 static inline void x86_pmu_enable_event(struct hw_perf_event *hwc, int idx)
1213 {
1214         (void)checking_wrmsrl(hwc->config_base + idx,
1215                               hwc->config | ARCH_PERFMON_EVENTSEL0_ENABLE);
1216 }
1217
1218 static inline void x86_pmu_disable_event(struct hw_perf_event *hwc, int idx)
1219 {
1220         (void)checking_wrmsrl(hwc->config_base + idx, hwc->config);
1221 }
1222
1223 static inline void
1224 intel_pmu_disable_fixed(struct hw_perf_event *hwc, int __idx)
1225 {
1226         int idx = __idx - X86_PMC_IDX_FIXED;
1227         u64 ctrl_val, mask;
1228
1229         mask = 0xfULL << (idx * 4);
1230
1231         rdmsrl(hwc->config_base, ctrl_val);
1232         ctrl_val &= ~mask;
1233         (void)checking_wrmsrl(hwc->config_base, ctrl_val);
1234 }
1235
1236 static inline void
1237 p6_pmu_disable_event(struct hw_perf_event *hwc, int idx)
1238 {
1239         struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1240         u64 val = P6_NOP_EVENT;
1241
1242         if (cpuc->enabled)
1243                 val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
1244
1245         (void)checking_wrmsrl(hwc->config_base + idx, val);
1246 }
1247
1248 static inline void
1249 intel_pmu_disable_event(struct hw_perf_event *hwc, int idx)
1250 {
1251         if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) {
1252                 intel_pmu_disable_bts();
1253                 return;
1254         }
1255
1256         if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
1257                 intel_pmu_disable_fixed(hwc, idx);
1258                 return;
1259         }
1260
1261         x86_pmu_disable_event(hwc, idx);
1262 }
1263
1264 static inline void
1265 amd_pmu_disable_event(struct hw_perf_event *hwc, int idx)
1266 {
1267         x86_pmu_disable_event(hwc, idx);
1268 }
1269
1270 static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left);
1271
1272 /*
1273  * Set the next IRQ period, based on the hwc->period_left value.
1274  * To be called with the event disabled in hw:
1275  */
1276 static int
1277 x86_perf_event_set_period(struct perf_event *event,
1278                              struct hw_perf_event *hwc, int idx)
1279 {
1280         s64 left = atomic64_read(&hwc->period_left);
1281         s64 period = hwc->sample_period;
1282         int err, ret = 0;
1283
1284         if (idx == X86_PMC_IDX_FIXED_BTS)
1285                 return 0;
1286
1287         /*
1288          * If we are way outside a reasoable range then just skip forward:
1289          */
1290         if (unlikely(left <= -period)) {
1291                 left = period;
1292                 atomic64_set(&hwc->period_left, left);
1293                 hwc->last_period = period;
1294                 ret = 1;
1295         }
1296
1297         if (unlikely(left <= 0)) {
1298                 left += period;
1299                 atomic64_set(&hwc->period_left, left);
1300                 hwc->last_period = period;
1301                 ret = 1;
1302         }
1303         /*
1304          * Quirk: certain CPUs dont like it if just 1 hw_event is left:
1305          */
1306         if (unlikely(left < 2))
1307                 left = 2;
1308
1309         if (left > x86_pmu.max_period)
1310                 left = x86_pmu.max_period;
1311
1312         per_cpu(pmc_prev_left[idx], smp_processor_id()) = left;
1313
1314         /*
1315          * The hw event starts counting from this event offset,
1316          * mark it to be able to extra future deltas:
1317          */
1318         atomic64_set(&hwc->prev_count, (u64)-left);
1319
1320         err = checking_wrmsrl(hwc->event_base + idx,
1321                              (u64)(-left) & x86_pmu.event_mask);
1322
1323         perf_event_update_userpage(event);
1324
1325         return ret;
1326 }
1327
1328 static inline void
1329 intel_pmu_enable_fixed(struct hw_perf_event *hwc, int __idx)
1330 {
1331         int idx = __idx - X86_PMC_IDX_FIXED;
1332         u64 ctrl_val, bits, mask;
1333         int err;
1334
1335         /*
1336          * Enable IRQ generation (0x8),
1337          * and enable ring-3 counting (0x2) and ring-0 counting (0x1)
1338          * if requested:
1339          */
1340         bits = 0x8ULL;
1341         if (hwc->config & ARCH_PERFMON_EVENTSEL_USR)
1342                 bits |= 0x2;
1343         if (hwc->config & ARCH_PERFMON_EVENTSEL_OS)
1344                 bits |= 0x1;
1345         bits <<= (idx * 4);
1346         mask = 0xfULL << (idx * 4);
1347
1348         rdmsrl(hwc->config_base, ctrl_val);
1349         ctrl_val &= ~mask;
1350         ctrl_val |= bits;
1351         err = checking_wrmsrl(hwc->config_base, ctrl_val);
1352 }
1353
1354 static void p6_pmu_enable_event(struct hw_perf_event *hwc, int idx)
1355 {
1356         struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1357         u64 val;
1358
1359         val = hwc->config;
1360         if (cpuc->enabled)
1361                 val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
1362
1363         (void)checking_wrmsrl(hwc->config_base + idx, val);
1364 }
1365
1366
1367 static void intel_pmu_enable_event(struct hw_perf_event *hwc, int idx)
1368 {
1369         if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) {
1370                 if (!__get_cpu_var(cpu_hw_events).enabled)
1371                         return;
1372
1373                 intel_pmu_enable_bts(hwc->config);
1374                 return;
1375         }
1376
1377         if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
1378                 intel_pmu_enable_fixed(hwc, idx);
1379                 return;
1380         }
1381
1382         x86_pmu_enable_event(hwc, idx);
1383 }
1384
1385 static void amd_pmu_enable_event(struct hw_perf_event *hwc, int idx)
1386 {
1387         struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1388
1389         if (cpuc->enabled)
1390                 x86_pmu_enable_event(hwc, idx);
1391 }
1392
1393 static int
1394 fixed_mode_idx(struct perf_event *event, struct hw_perf_event *hwc)
1395 {
1396         unsigned int hw_event;
1397
1398         hw_event = hwc->config & ARCH_PERFMON_EVENT_MASK;
1399
1400         if (unlikely((hw_event ==
1401                       x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS)) &&
1402                      (hwc->sample_period == 1)))
1403                 return X86_PMC_IDX_FIXED_BTS;
1404
1405         if (!x86_pmu.num_events_fixed)
1406                 return -1;
1407
1408         /*
1409          * fixed counters do not take all possible filters
1410          */
1411         if (hwc->config & ARCH_PERFMON_EVENT_FILTER_MASK)
1412                 return -1;
1413
1414         if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_INSTRUCTIONS)))
1415                 return X86_PMC_IDX_FIXED_INSTRUCTIONS;
1416         if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_CPU_CYCLES)))
1417                 return X86_PMC_IDX_FIXED_CPU_CYCLES;
1418         if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_BUS_CYCLES)))
1419                 return X86_PMC_IDX_FIXED_BUS_CYCLES;
1420
1421         return -1;
1422 }
1423
1424 /*
1425  * generic counter allocator: get next free counter
1426  */
1427 static int gen_get_event_idx(struct hw_perf_event *hwc)
1428 {
1429         struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1430         int idx;
1431
1432         idx = find_first_zero_bit(cpuc->used_mask, x86_pmu.num_events);
1433         return idx == x86_pmu.num_events ? -1 : idx;
1434 }
1435
1436 /*
1437  * intel-specific counter allocator: check event constraints
1438  */
1439 static int intel_get_event_idx(struct hw_perf_event *hwc)
1440 {
1441         struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1442         const struct event_constraint *event_constraint;
1443         int i, code;
1444
1445         if (!event_constraint)
1446                 goto skip;
1447
1448         code = hwc->config & 0xff;
1449
1450         for_each_event_constraint(event_constraint, event_constraint) {
1451                 if (code == event_constraint->code) {
1452                         for_each_bit(i, event_constraint->idxmsk, X86_PMC_IDX_MAX) {
1453                                 if (!test_and_set_bit(i, cpuc->used_mask))
1454                                         return i;
1455                         }
1456                         return -1;
1457                 }
1458         }
1459 skip:
1460         return gen_get_event_idx(hwc);
1461 }
1462
1463 /*
1464  * Find a PMC slot for the freshly enabled / scheduled in event:
1465  */
1466 static int x86_pmu_enable(struct perf_event *event)
1467 {
1468         struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1469         struct hw_perf_event *hwc = &event->hw;
1470         int idx;
1471
1472         idx = fixed_mode_idx(event, hwc);
1473         if (idx == X86_PMC_IDX_FIXED_BTS) {
1474                 /* BTS is already occupied. */
1475                 if (test_and_set_bit(idx, cpuc->used_mask))
1476                         return -EAGAIN;
1477
1478                 hwc->config_base        = 0;
1479                 hwc->event_base = 0;
1480                 hwc->idx                = idx;
1481         } else if (idx >= 0) {
1482                 /*
1483                  * Try to get the fixed event, if that is already taken
1484                  * then try to get a generic event:
1485                  */
1486                 if (test_and_set_bit(idx, cpuc->used_mask))
1487                         goto try_generic;
1488
1489                 hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
1490                 /*
1491                  * We set it so that event_base + idx in wrmsr/rdmsr maps to
1492                  * MSR_ARCH_PERFMON_FIXED_CTR0 ... CTR2:
1493                  */
1494                 hwc->event_base =
1495                         MSR_ARCH_PERFMON_FIXED_CTR0 - X86_PMC_IDX_FIXED;
1496                 hwc->idx = idx;
1497         } else {
1498                 idx = hwc->idx;
1499                 /* Try to get the previous generic event again */
1500                 if (idx == -1 || test_and_set_bit(idx, cpuc->used_mask)) {
1501 try_generic:
1502                         idx = x86_pmu.get_event_idx(hwc);
1503                         if (idx == -1)
1504                                 return -EAGAIN;
1505
1506                         set_bit(idx, cpuc->used_mask);
1507                         hwc->idx = idx;
1508                 }
1509                 hwc->config_base  = x86_pmu.eventsel;
1510                 hwc->event_base = x86_pmu.perfctr;
1511         }
1512
1513         perf_events_lapic_init();
1514
1515         x86_pmu.disable(hwc, idx);
1516
1517         cpuc->events[idx] = event;
1518         set_bit(idx, cpuc->active_mask);
1519
1520         x86_perf_event_set_period(event, hwc, idx);
1521         x86_pmu.enable(hwc, idx);
1522
1523         perf_event_update_userpage(event);
1524
1525         return 0;
1526 }
1527
1528 static void x86_pmu_unthrottle(struct perf_event *event)
1529 {
1530         struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1531         struct hw_perf_event *hwc = &event->hw;
1532
1533         if (WARN_ON_ONCE(hwc->idx >= X86_PMC_IDX_MAX ||
1534                                 cpuc->events[hwc->idx] != event))
1535                 return;
1536
1537         x86_pmu.enable(hwc, hwc->idx);
1538 }
1539
1540 void perf_event_print_debug(void)
1541 {
1542         u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed;
1543         struct cpu_hw_events *cpuc;
1544         unsigned long flags;
1545         int cpu, idx;
1546
1547         if (!x86_pmu.num_events)
1548                 return;
1549
1550         local_irq_save(flags);
1551
1552         cpu = smp_processor_id();
1553         cpuc = &per_cpu(cpu_hw_events, cpu);
1554
1555         if (x86_pmu.version >= 2) {
1556                 rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl);
1557                 rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
1558                 rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow);
1559                 rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, fixed);
1560
1561                 pr_info("\n");
1562                 pr_info("CPU#%d: ctrl:       %016llx\n", cpu, ctrl);
1563                 pr_info("CPU#%d: status:     %016llx\n", cpu, status);
1564                 pr_info("CPU#%d: overflow:   %016llx\n", cpu, overflow);
1565                 pr_info("CPU#%d: fixed:      %016llx\n", cpu, fixed);
1566         }
1567         pr_info("CPU#%d: used:       %016llx\n", cpu, *(u64 *)cpuc->used_mask);
1568
1569         for (idx = 0; idx < x86_pmu.num_events; idx++) {
1570                 rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl);
1571                 rdmsrl(x86_pmu.perfctr  + idx, pmc_count);
1572
1573                 prev_left = per_cpu(pmc_prev_left[idx], cpu);
1574
1575                 pr_info("CPU#%d:   gen-PMC%d ctrl:  %016llx\n",
1576                         cpu, idx, pmc_ctrl);
1577                 pr_info("CPU#%d:   gen-PMC%d count: %016llx\n",
1578                         cpu, idx, pmc_count);
1579                 pr_info("CPU#%d:   gen-PMC%d left:  %016llx\n",
1580                         cpu, idx, prev_left);
1581         }
1582         for (idx = 0; idx < x86_pmu.num_events_fixed; idx++) {
1583                 rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count);
1584
1585                 pr_info("CPU#%d: fixed-PMC%d count: %016llx\n",
1586                         cpu, idx, pmc_count);
1587         }
1588         local_irq_restore(flags);
1589 }
1590
1591 static void intel_pmu_drain_bts_buffer(struct cpu_hw_events *cpuc)
1592 {
1593         struct debug_store *ds = cpuc->ds;
1594         struct bts_record {
1595                 u64     from;
1596                 u64     to;
1597                 u64     flags;
1598         };
1599         struct perf_event *event = cpuc->events[X86_PMC_IDX_FIXED_BTS];
1600         struct bts_record *at, *top;
1601         struct perf_output_handle handle;
1602         struct perf_event_header header;
1603         struct perf_sample_data data;
1604         struct pt_regs regs;
1605
1606         if (!event)
1607                 return;
1608
1609         if (!ds)
1610                 return;
1611
1612         at  = (struct bts_record *)(unsigned long)ds->bts_buffer_base;
1613         top = (struct bts_record *)(unsigned long)ds->bts_index;
1614
1615         if (top <= at)
1616                 return;
1617
1618         ds->bts_index = ds->bts_buffer_base;
1619
1620
1621         data.period     = event->hw.last_period;
1622         data.addr       = 0;
1623         regs.ip         = 0;
1624
1625         /*
1626          * Prepare a generic sample, i.e. fill in the invariant fields.
1627          * We will overwrite the from and to address before we output
1628          * the sample.
1629          */
1630         perf_prepare_sample(&header, &data, event, &regs);
1631
1632         if (perf_output_begin(&handle, event,
1633                               header.size * (top - at), 1, 1))
1634                 return;
1635
1636         for (; at < top; at++) {
1637                 data.ip         = at->from;
1638                 data.addr       = at->to;
1639
1640                 perf_output_sample(&handle, &header, &data, event);
1641         }
1642
1643         perf_output_end(&handle);
1644
1645         /* There's new data available. */
1646         event->hw.interrupts++;
1647         event->pending_kill = POLL_IN;
1648 }
1649
1650 static void x86_pmu_disable(struct perf_event *event)
1651 {
1652         struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1653         struct hw_perf_event *hwc = &event->hw;
1654         int idx = hwc->idx;
1655
1656         /*
1657          * Must be done before we disable, otherwise the nmi handler
1658          * could reenable again:
1659          */
1660         clear_bit(idx, cpuc->active_mask);
1661         x86_pmu.disable(hwc, idx);
1662
1663         /*
1664          * Make sure the cleared pointer becomes visible before we
1665          * (potentially) free the event:
1666          */
1667         barrier();
1668
1669         /*
1670          * Drain the remaining delta count out of a event
1671          * that we are disabling:
1672          */
1673         x86_perf_event_update(event, hwc, idx);
1674
1675         /* Drain the remaining BTS records. */
1676         if (unlikely(idx == X86_PMC_IDX_FIXED_BTS))
1677                 intel_pmu_drain_bts_buffer(cpuc);
1678
1679         cpuc->events[idx] = NULL;
1680         clear_bit(idx, cpuc->used_mask);
1681
1682         perf_event_update_userpage(event);
1683 }
1684
1685 /*
1686  * Save and restart an expired event. Called by NMI contexts,
1687  * so it has to be careful about preempting normal event ops:
1688  */
1689 static int intel_pmu_save_and_restart(struct perf_event *event)
1690 {
1691         struct hw_perf_event *hwc = &event->hw;
1692         int idx = hwc->idx;
1693         int ret;
1694
1695         x86_perf_event_update(event, hwc, idx);
1696         ret = x86_perf_event_set_period(event, hwc, idx);
1697
1698         if (event->state == PERF_EVENT_STATE_ACTIVE)
1699                 intel_pmu_enable_event(hwc, idx);
1700
1701         return ret;
1702 }
1703
1704 static void intel_pmu_reset(void)
1705 {
1706         struct debug_store *ds = __get_cpu_var(cpu_hw_events).ds;
1707         unsigned long flags;
1708         int idx;
1709
1710         if (!x86_pmu.num_events)
1711                 return;
1712
1713         local_irq_save(flags);
1714
1715         printk("clearing PMU state on CPU#%d\n", smp_processor_id());
1716
1717         for (idx = 0; idx < x86_pmu.num_events; idx++) {
1718                 checking_wrmsrl(x86_pmu.eventsel + idx, 0ull);
1719                 checking_wrmsrl(x86_pmu.perfctr  + idx, 0ull);
1720         }
1721         for (idx = 0; idx < x86_pmu.num_events_fixed; idx++) {
1722                 checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull);
1723         }
1724         if (ds)
1725                 ds->bts_index = ds->bts_buffer_base;
1726
1727         local_irq_restore(flags);
1728 }
1729
1730 static int p6_pmu_handle_irq(struct pt_regs *regs)
1731 {
1732         struct perf_sample_data data;
1733         struct cpu_hw_events *cpuc;
1734         struct perf_event *event;
1735         struct hw_perf_event *hwc;
1736         int idx, handled = 0;
1737         u64 val;
1738
1739         data.addr = 0;
1740
1741         cpuc = &__get_cpu_var(cpu_hw_events);
1742
1743         for (idx = 0; idx < x86_pmu.num_events; idx++) {
1744                 if (!test_bit(idx, cpuc->active_mask))
1745                         continue;
1746
1747                 event = cpuc->events[idx];
1748                 hwc = &event->hw;
1749
1750                 val = x86_perf_event_update(event, hwc, idx);
1751                 if (val & (1ULL << (x86_pmu.event_bits - 1)))
1752                         continue;
1753
1754                 /*
1755                  * event overflow
1756                  */
1757                 handled         = 1;
1758                 data.period     = event->hw.last_period;
1759
1760                 if (!x86_perf_event_set_period(event, hwc, idx))
1761                         continue;
1762
1763                 if (perf_event_overflow(event, 1, &data, regs))
1764                         p6_pmu_disable_event(hwc, idx);
1765         }
1766
1767         if (handled)
1768                 inc_irq_stat(apic_perf_irqs);
1769
1770         return handled;
1771 }
1772
1773 /*
1774  * This handler is triggered by the local APIC, so the APIC IRQ handling
1775  * rules apply:
1776  */
1777 static int intel_pmu_handle_irq(struct pt_regs *regs)
1778 {
1779         struct perf_sample_data data;
1780         struct cpu_hw_events *cpuc;
1781         int bit, loops;
1782         u64 ack, status;
1783
1784         data.addr = 0;
1785
1786         cpuc = &__get_cpu_var(cpu_hw_events);
1787
1788         perf_disable();
1789         intel_pmu_drain_bts_buffer(cpuc);
1790         status = intel_pmu_get_status();
1791         if (!status) {
1792                 perf_enable();
1793                 return 0;
1794         }
1795
1796         loops = 0;
1797 again:
1798         if (++loops > 100) {
1799                 WARN_ONCE(1, "perfevents: irq loop stuck!\n");
1800                 perf_event_print_debug();
1801                 intel_pmu_reset();
1802                 perf_enable();
1803                 return 1;
1804         }
1805
1806         inc_irq_stat(apic_perf_irqs);
1807         ack = status;
1808         for_each_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
1809                 struct perf_event *event = cpuc->events[bit];
1810
1811                 clear_bit(bit, (unsigned long *) &status);
1812                 if (!test_bit(bit, cpuc->active_mask))
1813                         continue;
1814
1815                 if (!intel_pmu_save_and_restart(event))
1816                         continue;
1817
1818                 data.period = event->hw.last_period;
1819
1820                 if (perf_event_overflow(event, 1, &data, regs))
1821                         intel_pmu_disable_event(&event->hw, bit);
1822         }
1823
1824         intel_pmu_ack_status(ack);
1825
1826         /*
1827          * Repeat if there is more work to be done:
1828          */
1829         status = intel_pmu_get_status();
1830         if (status)
1831                 goto again;
1832
1833         perf_enable();
1834
1835         return 1;
1836 }
1837
1838 static int amd_pmu_handle_irq(struct pt_regs *regs)
1839 {
1840         struct perf_sample_data data;
1841         struct cpu_hw_events *cpuc;
1842         struct perf_event *event;
1843         struct hw_perf_event *hwc;
1844         int idx, handled = 0;
1845         u64 val;
1846
1847         data.addr = 0;
1848
1849         cpuc = &__get_cpu_var(cpu_hw_events);
1850
1851         for (idx = 0; idx < x86_pmu.num_events; idx++) {
1852                 if (!test_bit(idx, cpuc->active_mask))
1853                         continue;
1854
1855                 event = cpuc->events[idx];
1856                 hwc = &event->hw;
1857
1858                 val = x86_perf_event_update(event, hwc, idx);
1859                 if (val & (1ULL << (x86_pmu.event_bits - 1)))
1860                         continue;
1861
1862                 /*
1863                  * event overflow
1864                  */
1865                 handled         = 1;
1866                 data.period     = event->hw.last_period;
1867
1868                 if (!x86_perf_event_set_period(event, hwc, idx))
1869                         continue;
1870
1871                 if (perf_event_overflow(event, 1, &data, regs))
1872                         amd_pmu_disable_event(hwc, idx);
1873         }
1874
1875         if (handled)
1876                 inc_irq_stat(apic_perf_irqs);
1877
1878         return handled;
1879 }
1880
1881 void smp_perf_pending_interrupt(struct pt_regs *regs)
1882 {
1883         irq_enter();
1884         ack_APIC_irq();
1885         inc_irq_stat(apic_pending_irqs);
1886         perf_event_do_pending();
1887         irq_exit();
1888 }
1889
1890 void set_perf_event_pending(void)
1891 {
1892 #ifdef CONFIG_X86_LOCAL_APIC
1893         if (!x86_pmu.apic || !x86_pmu_initialized())
1894                 return;
1895
1896         apic->send_IPI_self(LOCAL_PENDING_VECTOR);
1897 #endif
1898 }
1899
1900 void perf_events_lapic_init(void)
1901 {
1902 #ifdef CONFIG_X86_LOCAL_APIC
1903         if (!x86_pmu.apic || !x86_pmu_initialized())
1904                 return;
1905
1906         /*
1907          * Always use NMI for PMU
1908          */
1909         apic_write(APIC_LVTPC, APIC_DM_NMI);
1910 #endif
1911 }
1912
1913 static int __kprobes
1914 perf_event_nmi_handler(struct notifier_block *self,
1915                          unsigned long cmd, void *__args)
1916 {
1917         struct die_args *args = __args;
1918         struct pt_regs *regs;
1919
1920         if (!atomic_read(&active_events))
1921                 return NOTIFY_DONE;
1922
1923         switch (cmd) {
1924         case DIE_NMI:
1925         case DIE_NMI_IPI:
1926                 break;
1927
1928         default:
1929                 return NOTIFY_DONE;
1930         }
1931
1932         regs = args->regs;
1933
1934 #ifdef CONFIG_X86_LOCAL_APIC
1935         apic_write(APIC_LVTPC, APIC_DM_NMI);
1936 #endif
1937         /*
1938          * Can't rely on the handled return value to say it was our NMI, two
1939          * events could trigger 'simultaneously' raising two back-to-back NMIs.
1940          *
1941          * If the first NMI handles both, the latter will be empty and daze
1942          * the CPU.
1943          */
1944         x86_pmu.handle_irq(regs);
1945
1946         return NOTIFY_STOP;
1947 }
1948
1949 static __read_mostly struct notifier_block perf_event_nmi_notifier = {
1950         .notifier_call          = perf_event_nmi_handler,
1951         .next                   = NULL,
1952         .priority               = 1
1953 };
1954
1955 static struct x86_pmu p6_pmu = {
1956         .name                   = "p6",
1957         .handle_irq             = p6_pmu_handle_irq,
1958         .disable_all            = p6_pmu_disable_all,
1959         .enable_all             = p6_pmu_enable_all,
1960         .enable                 = p6_pmu_enable_event,
1961         .disable                = p6_pmu_disable_event,
1962         .eventsel               = MSR_P6_EVNTSEL0,
1963         .perfctr                = MSR_P6_PERFCTR0,
1964         .event_map              = p6_pmu_event_map,
1965         .raw_event              = p6_pmu_raw_event,
1966         .max_events             = ARRAY_SIZE(p6_perfmon_event_map),
1967         .apic                   = 1,
1968         .max_period             = (1ULL << 31) - 1,
1969         .version                = 0,
1970         .num_events             = 2,
1971         /*
1972          * Events have 40 bits implemented. However they are designed such
1973          * that bits [32-39] are sign extensions of bit 31. As such the
1974          * effective width of a event for P6-like PMU is 32 bits only.
1975          *
1976          * See IA-32 Intel Architecture Software developer manual Vol 3B
1977          */
1978         .event_bits             = 32,
1979         .event_mask             = (1ULL << 32) - 1,
1980         .get_event_idx          = intel_get_event_idx,
1981 };
1982
1983 static struct x86_pmu intel_pmu = {
1984         .name                   = "Intel",
1985         .handle_irq             = intel_pmu_handle_irq,
1986         .disable_all            = intel_pmu_disable_all,
1987         .enable_all             = intel_pmu_enable_all,
1988         .enable                 = intel_pmu_enable_event,
1989         .disable                = intel_pmu_disable_event,
1990         .eventsel               = MSR_ARCH_PERFMON_EVENTSEL0,
1991         .perfctr                = MSR_ARCH_PERFMON_PERFCTR0,
1992         .event_map              = intel_pmu_event_map,
1993         .raw_event              = intel_pmu_raw_event,
1994         .max_events             = ARRAY_SIZE(intel_perfmon_event_map),
1995         .apic                   = 1,
1996         /*
1997          * Intel PMCs cannot be accessed sanely above 32 bit width,
1998          * so we install an artificial 1<<31 period regardless of
1999          * the generic event period:
2000          */
2001         .max_period             = (1ULL << 31) - 1,
2002         .enable_bts             = intel_pmu_enable_bts,
2003         .disable_bts            = intel_pmu_disable_bts,
2004         .get_event_idx          = intel_get_event_idx,
2005 };
2006
2007 static struct x86_pmu amd_pmu = {
2008         .name                   = "AMD",
2009         .handle_irq             = amd_pmu_handle_irq,
2010         .disable_all            = amd_pmu_disable_all,
2011         .enable_all             = amd_pmu_enable_all,
2012         .enable                 = amd_pmu_enable_event,
2013         .disable                = amd_pmu_disable_event,
2014         .eventsel               = MSR_K7_EVNTSEL0,
2015         .perfctr                = MSR_K7_PERFCTR0,
2016         .event_map              = amd_pmu_event_map,
2017         .raw_event              = amd_pmu_raw_event,
2018         .max_events             = ARRAY_SIZE(amd_perfmon_event_map),
2019         .num_events             = 4,
2020         .event_bits             = 48,
2021         .event_mask             = (1ULL << 48) - 1,
2022         .apic                   = 1,
2023         /* use highest bit to detect overflow */
2024         .max_period             = (1ULL << 47) - 1,
2025         .get_event_idx          = gen_get_event_idx,
2026 };
2027
2028 static int p6_pmu_init(void)
2029 {
2030         switch (boot_cpu_data.x86_model) {
2031         case 1:
2032         case 3:  /* Pentium Pro */
2033         case 5:
2034         case 6:  /* Pentium II */
2035         case 7:
2036         case 8:
2037         case 11: /* Pentium III */
2038                 event_constraint = intel_p6_event_constraints;
2039                 break;
2040         case 9:
2041         case 13:
2042                 /* Pentium M */
2043                 event_constraint = intel_p6_event_constraints;
2044                 break;
2045         default:
2046                 pr_cont("unsupported p6 CPU model %d ",
2047                         boot_cpu_data.x86_model);
2048                 return -ENODEV;
2049         }
2050
2051         x86_pmu = p6_pmu;
2052
2053         if (!cpu_has_apic) {
2054                 pr_info("no APIC, boot with the \"lapic\" boot parameter to force-enable it.\n");
2055                 pr_info("no hardware sampling interrupt available.\n");
2056                 x86_pmu.apic = 0;
2057         }
2058
2059         return 0;
2060 }
2061
2062 static int intel_pmu_init(void)
2063 {
2064         union cpuid10_edx edx;
2065         union cpuid10_eax eax;
2066         unsigned int unused;
2067         unsigned int ebx;
2068         int version;
2069
2070         if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
2071                 /* check for P6 processor family */
2072            if (boot_cpu_data.x86 == 6) {
2073                 return p6_pmu_init();
2074            } else {
2075                 return -ENODEV;
2076            }
2077         }
2078
2079         /*
2080          * Check whether the Architectural PerfMon supports
2081          * Branch Misses Retired hw_event or not.
2082          */
2083         cpuid(10, &eax.full, &ebx, &unused, &edx.full);
2084         if (eax.split.mask_length <= ARCH_PERFMON_BRANCH_MISSES_RETIRED)
2085                 return -ENODEV;
2086
2087         version = eax.split.version_id;
2088         if (version < 2)
2089                 return -ENODEV;
2090
2091         x86_pmu                         = intel_pmu;
2092         x86_pmu.version                 = version;
2093         x86_pmu.num_events              = eax.split.num_events;
2094         x86_pmu.event_bits              = eax.split.bit_width;
2095         x86_pmu.event_mask              = (1ULL << eax.split.bit_width) - 1;
2096
2097         /*
2098          * Quirk: v2 perfmon does not report fixed-purpose events, so
2099          * assume at least 3 events:
2100          */
2101         x86_pmu.num_events_fixed        = max((int)edx.split.num_events_fixed, 3);
2102
2103         /*
2104          * Install the hw-cache-events table:
2105          */
2106         switch (boot_cpu_data.x86_model) {
2107         case 15: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */
2108         case 22: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */
2109         case 23: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */
2110         case 29: /* six-core 45 nm xeon "Dunnington" */
2111                 memcpy(hw_cache_event_ids, core2_hw_cache_event_ids,
2112                        sizeof(hw_cache_event_ids));
2113
2114                 pr_cont("Core2 events, ");
2115                 event_constraint = intel_core_event_constraints;
2116                 break;
2117         default:
2118         case 26:
2119                 memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids,
2120                        sizeof(hw_cache_event_ids));
2121
2122                 event_constraint = intel_nehalem_event_constraints;
2123                 pr_cont("Nehalem/Corei7 events, ");
2124                 break;
2125         case 28:
2126                 memcpy(hw_cache_event_ids, atom_hw_cache_event_ids,
2127                        sizeof(hw_cache_event_ids));
2128
2129                 pr_cont("Atom events, ");
2130                 break;
2131         }
2132         return 0;
2133 }
2134
2135 static int amd_pmu_init(void)
2136 {
2137         /* Performance-monitoring supported from K7 and later: */
2138         if (boot_cpu_data.x86 < 6)
2139                 return -ENODEV;
2140
2141         x86_pmu = amd_pmu;
2142
2143         /* Events are common for all AMDs */
2144         memcpy(hw_cache_event_ids, amd_hw_cache_event_ids,
2145                sizeof(hw_cache_event_ids));
2146
2147         return 0;
2148 }
2149
2150 void __init init_hw_perf_events(void)
2151 {
2152         int err;
2153
2154         pr_info("Performance Events: ");
2155
2156         switch (boot_cpu_data.x86_vendor) {
2157         case X86_VENDOR_INTEL:
2158                 err = intel_pmu_init();
2159                 break;
2160         case X86_VENDOR_AMD:
2161                 err = amd_pmu_init();
2162                 break;
2163         default:
2164                 return;
2165         }
2166         if (err != 0) {
2167                 pr_cont("no PMU driver, software events only.\n");
2168                 return;
2169         }
2170
2171         pr_cont("%s PMU driver.\n", x86_pmu.name);
2172
2173         if (x86_pmu.num_events > X86_PMC_MAX_GENERIC) {
2174                 WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!",
2175                      x86_pmu.num_events, X86_PMC_MAX_GENERIC);
2176                 x86_pmu.num_events = X86_PMC_MAX_GENERIC;
2177         }
2178         perf_event_mask = (1 << x86_pmu.num_events) - 1;
2179         perf_max_events = x86_pmu.num_events;
2180
2181         if (x86_pmu.num_events_fixed > X86_PMC_MAX_FIXED) {
2182                 WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!",
2183                      x86_pmu.num_events_fixed, X86_PMC_MAX_FIXED);
2184                 x86_pmu.num_events_fixed = X86_PMC_MAX_FIXED;
2185         }
2186
2187         perf_event_mask |=
2188                 ((1LL << x86_pmu.num_events_fixed)-1) << X86_PMC_IDX_FIXED;
2189         x86_pmu.intel_ctrl = perf_event_mask;
2190
2191         perf_events_lapic_init();
2192         register_die_notifier(&perf_event_nmi_notifier);
2193
2194         pr_info("... version:                %d\n",     x86_pmu.version);
2195         pr_info("... bit width:              %d\n",     x86_pmu.event_bits);
2196         pr_info("... generic registers:      %d\n",     x86_pmu.num_events);
2197         pr_info("... value mask:             %016Lx\n", x86_pmu.event_mask);
2198         pr_info("... max period:             %016Lx\n", x86_pmu.max_period);
2199         pr_info("... fixed-purpose events:   %d\n",     x86_pmu.num_events_fixed);
2200         pr_info("... event mask:             %016Lx\n", perf_event_mask);
2201 }
2202
2203 static inline void x86_pmu_read(struct perf_event *event)
2204 {
2205         x86_perf_event_update(event, &event->hw, event->hw.idx);
2206 }
2207
2208 static const struct pmu pmu = {
2209         .enable         = x86_pmu_enable,
2210         .disable        = x86_pmu_disable,
2211         .read           = x86_pmu_read,
2212         .unthrottle     = x86_pmu_unthrottle,
2213 };
2214
2215 const struct pmu *hw_perf_event_init(struct perf_event *event)
2216 {
2217         int err;
2218
2219         err = __hw_perf_event_init(event);
2220         if (err) {
2221                 if (event->destroy)
2222                         event->destroy(event);
2223                 return ERR_PTR(err);
2224         }
2225
2226         return &pmu;
2227 }
2228
2229 /*
2230  * callchain support
2231  */
2232
2233 static inline
2234 void callchain_store(struct perf_callchain_entry *entry, u64 ip)
2235 {
2236         if (entry->nr < PERF_MAX_STACK_DEPTH)
2237                 entry->ip[entry->nr++] = ip;
2238 }
2239
2240 static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry);
2241 static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_nmi_entry);
2242 static DEFINE_PER_CPU(int, in_nmi_frame);
2243
2244
2245 static void
2246 backtrace_warning_symbol(void *data, char *msg, unsigned long symbol)
2247 {
2248         /* Ignore warnings */
2249 }
2250
2251 static void backtrace_warning(void *data, char *msg)
2252 {
2253         /* Ignore warnings */
2254 }
2255
2256 static int backtrace_stack(void *data, char *name)
2257 {
2258         per_cpu(in_nmi_frame, smp_processor_id()) =
2259                         x86_is_stack_id(NMI_STACK, name);
2260
2261         return 0;
2262 }
2263
2264 static void backtrace_address(void *data, unsigned long addr, int reliable)
2265 {
2266         struct perf_callchain_entry *entry = data;
2267
2268         if (per_cpu(in_nmi_frame, smp_processor_id()))
2269                 return;
2270
2271         if (reliable)
2272                 callchain_store(entry, addr);
2273 }
2274
2275 static const struct stacktrace_ops backtrace_ops = {
2276         .warning                = backtrace_warning,
2277         .warning_symbol         = backtrace_warning_symbol,
2278         .stack                  = backtrace_stack,
2279         .address                = backtrace_address,
2280 };
2281
2282 #include "../dumpstack.h"
2283
2284 static void
2285 perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry)
2286 {
2287         callchain_store(entry, PERF_CONTEXT_KERNEL);
2288         callchain_store(entry, regs->ip);
2289
2290         dump_trace(NULL, regs, NULL, 0, &backtrace_ops, entry);
2291 }
2292
2293 /*
2294  * best effort, GUP based copy_from_user() that assumes IRQ or NMI context
2295  */
2296 static unsigned long
2297 copy_from_user_nmi(void *to, const void __user *from, unsigned long n)
2298 {
2299         unsigned long offset, addr = (unsigned long)from;
2300         int type = in_nmi() ? KM_NMI : KM_IRQ0;
2301         unsigned long size, len = 0;
2302         struct page *page;
2303         void *map;
2304         int ret;
2305
2306         do {
2307                 ret = __get_user_pages_fast(addr, 1, 0, &page);
2308                 if (!ret)
2309                         break;
2310
2311                 offset = addr & (PAGE_SIZE - 1);
2312                 size = min(PAGE_SIZE - offset, n - len);
2313
2314                 map = kmap_atomic(page, type);
2315                 memcpy(to, map+offset, size);
2316                 kunmap_atomic(map, type);
2317                 put_page(page);
2318
2319                 len  += size;
2320                 to   += size;
2321                 addr += size;
2322
2323         } while (len < n);
2324
2325         return len;
2326 }
2327
2328 static int copy_stack_frame(const void __user *fp, struct stack_frame *frame)
2329 {
2330         unsigned long bytes;
2331
2332         bytes = copy_from_user_nmi(frame, fp, sizeof(*frame));
2333
2334         return bytes == sizeof(*frame);
2335 }
2336
2337 static void
2338 perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry)
2339 {
2340         struct stack_frame frame;
2341         const void __user *fp;
2342
2343         if (!user_mode(regs))
2344                 regs = task_pt_regs(current);
2345
2346         fp = (void __user *)regs->bp;
2347
2348         callchain_store(entry, PERF_CONTEXT_USER);
2349         callchain_store(entry, regs->ip);
2350
2351         while (entry->nr < PERF_MAX_STACK_DEPTH) {
2352                 frame.next_frame             = NULL;
2353                 frame.return_address = 0;
2354
2355                 if (!copy_stack_frame(fp, &frame))
2356                         break;
2357
2358                 if ((unsigned long)fp < regs->sp)
2359                         break;
2360
2361                 callchain_store(entry, frame.return_address);
2362                 fp = frame.next_frame;
2363         }
2364 }
2365
2366 static void
2367 perf_do_callchain(struct pt_regs *regs, struct perf_callchain_entry *entry)
2368 {
2369         int is_user;
2370
2371         if (!regs)
2372                 return;
2373
2374         is_user = user_mode(regs);
2375
2376         if (!current || current->pid == 0)
2377                 return;
2378
2379         if (is_user && current->state != TASK_RUNNING)
2380                 return;
2381
2382         if (!is_user)
2383                 perf_callchain_kernel(regs, entry);
2384
2385         if (current->mm)
2386                 perf_callchain_user(regs, entry);
2387 }
2388
2389 struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
2390 {
2391         struct perf_callchain_entry *entry;
2392
2393         if (in_nmi())
2394                 entry = &__get_cpu_var(pmc_nmi_entry);
2395         else
2396                 entry = &__get_cpu_var(pmc_irq_entry);
2397
2398         entry->nr = 0;
2399
2400         perf_do_callchain(regs, entry);
2401
2402         return entry;
2403 }
2404
2405 void hw_perf_event_setup_online(int cpu)
2406 {
2407         init_debug_store_on_cpu(cpu);
2408 }