ARM: 5902/4: arm/perfevents: implement perf event support for ARMv6
[linux-2.6.git] / arch / arm / kernel / perf_event.c
1 #undef DEBUG
2
3 /*
4  * ARM performance counter support.
5  *
6  * Copyright (C) 2009 picoChip Designs, Ltd., Jamie Iles
7  *
8  * This code is based on the sparc64 perf event code, which is in turn based
9  * on the x86 code. Callchain code is based on the ARM OProfile backtrace
10  * code.
11  */
12 #define pr_fmt(fmt) "hw perfevents: " fmt
13
14 #include <linux/interrupt.h>
15 #include <linux/kernel.h>
16 #include <linux/perf_event.h>
17 #include <linux/spinlock.h>
18 #include <linux/uaccess.h>
19
20 #include <asm/cputype.h>
21 #include <asm/irq.h>
22 #include <asm/irq_regs.h>
23 #include <asm/pmu.h>
24 #include <asm/stacktrace.h>
25
26 static const struct pmu_irqs *pmu_irqs;
27
28 /*
29  * Hardware lock to serialize accesses to PMU registers. Needed for the
30  * read/modify/write sequences.
31  */
32 DEFINE_SPINLOCK(pmu_lock);
33
34 /*
35  * ARMv6 supports a maximum of 3 events, starting from index 1. If we add
36  * another platform that supports more, we need to increase this to be the
37  * largest of all platforms.
38  */
39 #define ARMPMU_MAX_HWEVENTS             4
40
41 /* The events for a given CPU. */
42 struct cpu_hw_events {
43         /*
44          * The events that are active on the CPU for the given index. Index 0
45          * is reserved.
46          */
47         struct perf_event       *events[ARMPMU_MAX_HWEVENTS];
48
49         /*
50          * A 1 bit for an index indicates that the counter is being used for
51          * an event. A 0 means that the counter can be used.
52          */
53         unsigned long           used_mask[BITS_TO_LONGS(ARMPMU_MAX_HWEVENTS)];
54
55         /*
56          * A 1 bit for an index indicates that the counter is actively being
57          * used.
58          */
59         unsigned long           active_mask[BITS_TO_LONGS(ARMPMU_MAX_HWEVENTS)];
60 };
61 DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events);
62
63 struct arm_pmu {
64         const char      *name;
65         irqreturn_t     (*handle_irq)(int irq_num, void *dev);
66         void            (*enable)(struct hw_perf_event *evt, int idx);
67         void            (*disable)(struct hw_perf_event *evt, int idx);
68         int             (*event_map)(int evt);
69         u64             (*raw_event)(u64);
70         int             (*get_event_idx)(struct cpu_hw_events *cpuc,
71                                          struct hw_perf_event *hwc);
72         u32             (*read_counter)(int idx);
73         void            (*write_counter)(int idx, u32 val);
74         void            (*start)(void);
75         void            (*stop)(void);
76         int             num_events;
77         u64             max_period;
78 };
79
80 /* Set at runtime when we know what CPU type we are. */
81 static const struct arm_pmu *armpmu;
82
83 #define HW_OP_UNSUPPORTED               0xFFFF
84
85 #define C(_x) \
86         PERF_COUNT_HW_CACHE_##_x
87
88 #define CACHE_OP_UNSUPPORTED            0xFFFF
89
90 static unsigned armpmu_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
91                                      [PERF_COUNT_HW_CACHE_OP_MAX]
92                                      [PERF_COUNT_HW_CACHE_RESULT_MAX];
93
94 static int
95 armpmu_map_cache_event(u64 config)
96 {
97         unsigned int cache_type, cache_op, cache_result, ret;
98
99         cache_type = (config >>  0) & 0xff;
100         if (cache_type >= PERF_COUNT_HW_CACHE_MAX)
101                 return -EINVAL;
102
103         cache_op = (config >>  8) & 0xff;
104         if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX)
105                 return -EINVAL;
106
107         cache_result = (config >> 16) & 0xff;
108         if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
109                 return -EINVAL;
110
111         ret = (int)armpmu_perf_cache_map[cache_type][cache_op][cache_result];
112
113         if (ret == CACHE_OP_UNSUPPORTED)
114                 return -ENOENT;
115
116         return ret;
117 }
118
119 static int
120 armpmu_event_set_period(struct perf_event *event,
121                         struct hw_perf_event *hwc,
122                         int idx)
123 {
124         s64 left = atomic64_read(&hwc->period_left);
125         s64 period = hwc->sample_period;
126         int ret = 0;
127
128         if (unlikely(left <= -period)) {
129                 left = period;
130                 atomic64_set(&hwc->period_left, left);
131                 hwc->last_period = period;
132                 ret = 1;
133         }
134
135         if (unlikely(left <= 0)) {
136                 left += period;
137                 atomic64_set(&hwc->period_left, left);
138                 hwc->last_period = period;
139                 ret = 1;
140         }
141
142         if (left > (s64)armpmu->max_period)
143                 left = armpmu->max_period;
144
145         atomic64_set(&hwc->prev_count, (u64)-left);
146
147         armpmu->write_counter(idx, (u64)(-left) & 0xffffffff);
148
149         perf_event_update_userpage(event);
150
151         return ret;
152 }
153
154 static u64
155 armpmu_event_update(struct perf_event *event,
156                     struct hw_perf_event *hwc,
157                     int idx)
158 {
159         int shift = 64 - 32;
160         s64 prev_raw_count, new_raw_count;
161         s64 delta;
162
163 again:
164         prev_raw_count = atomic64_read(&hwc->prev_count);
165         new_raw_count = armpmu->read_counter(idx);
166
167         if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count,
168                              new_raw_count) != prev_raw_count)
169                 goto again;
170
171         delta = (new_raw_count << shift) - (prev_raw_count << shift);
172         delta >>= shift;
173
174         atomic64_add(delta, &event->count);
175         atomic64_sub(delta, &hwc->period_left);
176
177         return new_raw_count;
178 }
179
180 static void
181 armpmu_disable(struct perf_event *event)
182 {
183         struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
184         struct hw_perf_event *hwc = &event->hw;
185         int idx = hwc->idx;
186
187         WARN_ON(idx < 0);
188
189         clear_bit(idx, cpuc->active_mask);
190         armpmu->disable(hwc, idx);
191
192         barrier();
193
194         armpmu_event_update(event, hwc, idx);
195         cpuc->events[idx] = NULL;
196         clear_bit(idx, cpuc->used_mask);
197
198         perf_event_update_userpage(event);
199 }
200
201 static void
202 armpmu_read(struct perf_event *event)
203 {
204         struct hw_perf_event *hwc = &event->hw;
205
206         /* Don't read disabled counters! */
207         if (hwc->idx < 0)
208                 return;
209
210         armpmu_event_update(event, hwc, hwc->idx);
211 }
212
213 static void
214 armpmu_unthrottle(struct perf_event *event)
215 {
216         struct hw_perf_event *hwc = &event->hw;
217
218         /*
219          * Set the period again. Some counters can't be stopped, so when we
220          * were throttled we simply disabled the IRQ source and the counter
221          * may have been left counting. If we don't do this step then we may
222          * get an interrupt too soon or *way* too late if the overflow has
223          * happened since disabling.
224          */
225         armpmu_event_set_period(event, hwc, hwc->idx);
226         armpmu->enable(hwc, hwc->idx);
227 }
228
229 static int
230 armpmu_enable(struct perf_event *event)
231 {
232         struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
233         struct hw_perf_event *hwc = &event->hw;
234         int idx;
235         int err = 0;
236
237         /* If we don't have a space for the counter then finish early. */
238         idx = armpmu->get_event_idx(cpuc, hwc);
239         if (idx < 0) {
240                 err = idx;
241                 goto out;
242         }
243
244         /*
245          * If there is an event in the counter we are going to use then make
246          * sure it is disabled.
247          */
248         event->hw.idx = idx;
249         armpmu->disable(hwc, idx);
250         cpuc->events[idx] = event;
251         set_bit(idx, cpuc->active_mask);
252
253         /* Set the period for the event. */
254         armpmu_event_set_period(event, hwc, idx);
255
256         /* Enable the event. */
257         armpmu->enable(hwc, idx);
258
259         /* Propagate our changes to the userspace mapping. */
260         perf_event_update_userpage(event);
261
262 out:
263         return err;
264 }
265
266 static struct pmu pmu = {
267         .enable     = armpmu_enable,
268         .disable    = armpmu_disable,
269         .unthrottle = armpmu_unthrottle,
270         .read       = armpmu_read,
271 };
272
273 static int
274 validate_event(struct cpu_hw_events *cpuc,
275                struct perf_event *event)
276 {
277         struct hw_perf_event fake_event = event->hw;
278
279         if (event->pmu && event->pmu != &pmu)
280                 return 0;
281
282         return armpmu->get_event_idx(cpuc, &fake_event) >= 0;
283 }
284
285 static int
286 validate_group(struct perf_event *event)
287 {
288         struct perf_event *sibling, *leader = event->group_leader;
289         struct cpu_hw_events fake_pmu;
290
291         memset(&fake_pmu, 0, sizeof(fake_pmu));
292
293         if (!validate_event(&fake_pmu, leader))
294                 return -ENOSPC;
295
296         list_for_each_entry(sibling, &leader->sibling_list, group_entry) {
297                 if (!validate_event(&fake_pmu, sibling))
298                         return -ENOSPC;
299         }
300
301         if (!validate_event(&fake_pmu, event))
302                 return -ENOSPC;
303
304         return 0;
305 }
306
307 static int
308 armpmu_reserve_hardware(void)
309 {
310         int i;
311         int err;
312
313         pmu_irqs = reserve_pmu();
314         if (IS_ERR(pmu_irqs)) {
315                 pr_warning("unable to reserve pmu\n");
316                 return PTR_ERR(pmu_irqs);
317         }
318
319         init_pmu();
320
321         if (pmu_irqs->num_irqs < 1) {
322                 pr_err("no irqs for PMUs defined\n");
323                 return -ENODEV;
324         }
325
326         for (i = 0; i < pmu_irqs->num_irqs; ++i) {
327                 err = request_irq(pmu_irqs->irqs[i], armpmu->handle_irq,
328                                   IRQF_DISABLED, "armpmu", NULL);
329                 if (err) {
330                         pr_warning("unable to request IRQ%d for ARM "
331                                    "perf counters\n", pmu_irqs->irqs[i]);
332                         break;
333                 }
334         }
335
336         if (err) {
337                 for (i = i - 1; i >= 0; --i)
338                         free_irq(pmu_irqs->irqs[i], NULL);
339                 release_pmu(pmu_irqs);
340                 pmu_irqs = NULL;
341         }
342
343         return err;
344 }
345
346 static void
347 armpmu_release_hardware(void)
348 {
349         int i;
350
351         for (i = pmu_irqs->num_irqs - 1; i >= 0; --i)
352                 free_irq(pmu_irqs->irqs[i], NULL);
353         armpmu->stop();
354
355         release_pmu(pmu_irqs);
356         pmu_irqs = NULL;
357 }
358
359 static atomic_t active_events = ATOMIC_INIT(0);
360 static DEFINE_MUTEX(pmu_reserve_mutex);
361
362 static void
363 hw_perf_event_destroy(struct perf_event *event)
364 {
365         if (atomic_dec_and_mutex_lock(&active_events, &pmu_reserve_mutex)) {
366                 armpmu_release_hardware();
367                 mutex_unlock(&pmu_reserve_mutex);
368         }
369 }
370
371 static int
372 __hw_perf_event_init(struct perf_event *event)
373 {
374         struct hw_perf_event *hwc = &event->hw;
375         int mapping, err;
376
377         /* Decode the generic type into an ARM event identifier. */
378         if (PERF_TYPE_HARDWARE == event->attr.type) {
379                 mapping = armpmu->event_map(event->attr.config);
380         } else if (PERF_TYPE_HW_CACHE == event->attr.type) {
381                 mapping = armpmu_map_cache_event(event->attr.config);
382         } else if (PERF_TYPE_RAW == event->attr.type) {
383                 mapping = armpmu->raw_event(event->attr.config);
384         } else {
385                 pr_debug("event type %x not supported\n", event->attr.type);
386                 return -EOPNOTSUPP;
387         }
388
389         if (mapping < 0) {
390                 pr_debug("event %x:%llx not supported\n", event->attr.type,
391                          event->attr.config);
392                 return mapping;
393         }
394
395         /*
396          * Check whether we need to exclude the counter from certain modes.
397          * The ARM performance counters are on all of the time so if someone
398          * has asked us for some excludes then we have to fail.
399          */
400         if (event->attr.exclude_kernel || event->attr.exclude_user ||
401             event->attr.exclude_hv || event->attr.exclude_idle) {
402                 pr_debug("ARM performance counters do not support "
403                          "mode exclusion\n");
404                 return -EPERM;
405         }
406
407         /*
408          * We don't assign an index until we actually place the event onto
409          * hardware. Use -1 to signify that we haven't decided where to put it
410          * yet. For SMP systems, each core has it's own PMU so we can't do any
411          * clever allocation or constraints checking at this point.
412          */
413         hwc->idx = -1;
414
415         /*
416          * Store the event encoding into the config_base field. config and
417          * event_base are unused as the only 2 things we need to know are
418          * the event mapping and the counter to use. The counter to use is
419          * also the indx and the config_base is the event type.
420          */
421         hwc->config_base            = (unsigned long)mapping;
422         hwc->config                 = 0;
423         hwc->event_base             = 0;
424
425         if (!hwc->sample_period) {
426                 hwc->sample_period  = armpmu->max_period;
427                 hwc->last_period    = hwc->sample_period;
428                 atomic64_set(&hwc->period_left, hwc->sample_period);
429         }
430
431         err = 0;
432         if (event->group_leader != event) {
433                 err = validate_group(event);
434                 if (err)
435                         return -EINVAL;
436         }
437
438         return err;
439 }
440
441 const struct pmu *
442 hw_perf_event_init(struct perf_event *event)
443 {
444         int err = 0;
445
446         if (!armpmu)
447                 return ERR_PTR(-ENODEV);
448
449         event->destroy = hw_perf_event_destroy;
450
451         if (!atomic_inc_not_zero(&active_events)) {
452                 if (atomic_read(&active_events) > perf_max_events) {
453                         atomic_dec(&active_events);
454                         return ERR_PTR(-ENOSPC);
455                 }
456
457                 mutex_lock(&pmu_reserve_mutex);
458                 if (atomic_read(&active_events) == 0) {
459                         err = armpmu_reserve_hardware();
460                 }
461
462                 if (!err)
463                         atomic_inc(&active_events);
464                 mutex_unlock(&pmu_reserve_mutex);
465         }
466
467         if (err)
468                 return ERR_PTR(err);
469
470         err = __hw_perf_event_init(event);
471         if (err)
472                 hw_perf_event_destroy(event);
473
474         return err ? ERR_PTR(err) : &pmu;
475 }
476
477 void
478 hw_perf_enable(void)
479 {
480         /* Enable all of the perf events on hardware. */
481         int idx;
482         struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
483
484         if (!armpmu)
485                 return;
486
487         for (idx = 0; idx <= armpmu->num_events; ++idx) {
488                 struct perf_event *event = cpuc->events[idx];
489
490                 if (!event)
491                         continue;
492
493                 armpmu->enable(&event->hw, idx);
494         }
495
496         armpmu->start();
497 }
498
499 void
500 hw_perf_disable(void)
501 {
502         if (armpmu)
503                 armpmu->stop();
504 }
505
506 /*
507  * ARMv6 Performance counter handling code.
508  *
509  * ARMv6 has 2 configurable performance counters and a single cycle counter.
510  * They all share a single reset bit but can be written to zero so we can use
511  * that for a reset.
512  *
513  * The counters can't be individually enabled or disabled so when we remove
514  * one event and replace it with another we could get spurious counts from the
515  * wrong event. However, we can take advantage of the fact that the
516  * performance counters can export events to the event bus, and the event bus
517  * itself can be monitored. This requires that we *don't* export the events to
518  * the event bus. The procedure for disabling a configurable counter is:
519  *      - change the counter to count the ETMEXTOUT[0] signal (0x20). This
520  *        effectively stops the counter from counting.
521  *      - disable the counter's interrupt generation (each counter has it's
522  *        own interrupt enable bit).
523  * Once stopped, the counter value can be written as 0 to reset.
524  *
525  * To enable a counter:
526  *      - enable the counter's interrupt generation.
527  *      - set the new event type.
528  *
529  * Note: the dedicated cycle counter only counts cycles and can't be
530  * enabled/disabled independently of the others. When we want to disable the
531  * cycle counter, we have to just disable the interrupt reporting and start
532  * ignoring that counter. When re-enabling, we have to reset the value and
533  * enable the interrupt.
534  */
535
536 enum armv6_perf_types {
537         ARMV6_PERFCTR_ICACHE_MISS           = 0x0,
538         ARMV6_PERFCTR_IBUF_STALL            = 0x1,
539         ARMV6_PERFCTR_DDEP_STALL            = 0x2,
540         ARMV6_PERFCTR_ITLB_MISS             = 0x3,
541         ARMV6_PERFCTR_DTLB_MISS             = 0x4,
542         ARMV6_PERFCTR_BR_EXEC               = 0x5,
543         ARMV6_PERFCTR_BR_MISPREDICT         = 0x6,
544         ARMV6_PERFCTR_INSTR_EXEC            = 0x7,
545         ARMV6_PERFCTR_DCACHE_HIT            = 0x9,
546         ARMV6_PERFCTR_DCACHE_ACCESS         = 0xA,
547         ARMV6_PERFCTR_DCACHE_MISS           = 0xB,
548         ARMV6_PERFCTR_DCACHE_WBACK          = 0xC,
549         ARMV6_PERFCTR_SW_PC_CHANGE          = 0xD,
550         ARMV6_PERFCTR_MAIN_TLB_MISS         = 0xF,
551         ARMV6_PERFCTR_EXPL_D_ACCESS         = 0x10,
552         ARMV6_PERFCTR_LSU_FULL_STALL        = 0x11,
553         ARMV6_PERFCTR_WBUF_DRAINED          = 0x12,
554         ARMV6_PERFCTR_CPU_CYCLES            = 0xFF,
555         ARMV6_PERFCTR_NOP                   = 0x20,
556 };
557
558 enum armv6_counters {
559         ARMV6_CYCLE_COUNTER = 1,
560         ARMV6_COUNTER0,
561         ARMV6_COUNTER1,
562 };
563
564 /*
565  * The hardware events that we support. We do support cache operations but
566  * we have harvard caches and no way to combine instruction and data
567  * accesses/misses in hardware.
568  */
569 static const unsigned armv6_perf_map[PERF_COUNT_HW_MAX] = {
570         [PERF_COUNT_HW_CPU_CYCLES]          = ARMV6_PERFCTR_CPU_CYCLES,
571         [PERF_COUNT_HW_INSTRUCTIONS]        = ARMV6_PERFCTR_INSTR_EXEC,
572         [PERF_COUNT_HW_CACHE_REFERENCES]    = HW_OP_UNSUPPORTED,
573         [PERF_COUNT_HW_CACHE_MISSES]        = HW_OP_UNSUPPORTED,
574         [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV6_PERFCTR_BR_EXEC,
575         [PERF_COUNT_HW_BRANCH_MISSES]       = ARMV6_PERFCTR_BR_MISPREDICT,
576         [PERF_COUNT_HW_BUS_CYCLES]          = HW_OP_UNSUPPORTED,
577 };
578
579 static const unsigned armv6_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
580                                           [PERF_COUNT_HW_CACHE_OP_MAX]
581                                           [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
582         [C(L1D)] = {
583                 /*
584                  * The performance counters don't differentiate between read
585                  * and write accesses/misses so this isn't strictly correct,
586                  * but it's the best we can do. Writes and reads get
587                  * combined.
588                  */
589                 [C(OP_READ)] = {
590                         [C(RESULT_ACCESS)]      = ARMV6_PERFCTR_DCACHE_ACCESS,
591                         [C(RESULT_MISS)]        = ARMV6_PERFCTR_DCACHE_MISS,
592                 },
593                 [C(OP_WRITE)] = {
594                         [C(RESULT_ACCESS)]      = ARMV6_PERFCTR_DCACHE_ACCESS,
595                         [C(RESULT_MISS)]        = ARMV6_PERFCTR_DCACHE_MISS,
596                 },
597                 [C(OP_PREFETCH)] = {
598                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
599                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
600                 },
601         },
602         [C(L1I)] = {
603                 [C(OP_READ)] = {
604                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
605                         [C(RESULT_MISS)]        = ARMV6_PERFCTR_ICACHE_MISS,
606                 },
607                 [C(OP_WRITE)] = {
608                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
609                         [C(RESULT_MISS)]        = ARMV6_PERFCTR_ICACHE_MISS,
610                 },
611                 [C(OP_PREFETCH)] = {
612                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
613                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
614                 },
615         },
616         [C(LL)] = {
617                 [C(OP_READ)] = {
618                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
619                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
620                 },
621                 [C(OP_WRITE)] = {
622                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
623                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
624                 },
625                 [C(OP_PREFETCH)] = {
626                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
627                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
628                 },
629         },
630         [C(DTLB)] = {
631                 /*
632                  * The ARM performance counters can count micro DTLB misses,
633                  * micro ITLB misses and main TLB misses. There isn't an event
634                  * for TLB misses, so use the micro misses here and if users
635                  * want the main TLB misses they can use a raw counter.
636                  */
637                 [C(OP_READ)] = {
638                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
639                         [C(RESULT_MISS)]        = ARMV6_PERFCTR_DTLB_MISS,
640                 },
641                 [C(OP_WRITE)] = {
642                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
643                         [C(RESULT_MISS)]        = ARMV6_PERFCTR_DTLB_MISS,
644                 },
645                 [C(OP_PREFETCH)] = {
646                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
647                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
648                 },
649         },
650         [C(ITLB)] = {
651                 [C(OP_READ)] = {
652                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
653                         [C(RESULT_MISS)]        = ARMV6_PERFCTR_ITLB_MISS,
654                 },
655                 [C(OP_WRITE)] = {
656                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
657                         [C(RESULT_MISS)]        = ARMV6_PERFCTR_ITLB_MISS,
658                 },
659                 [C(OP_PREFETCH)] = {
660                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
661                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
662                 },
663         },
664         [C(BPU)] = {
665                 [C(OP_READ)] = {
666                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
667                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
668                 },
669                 [C(OP_WRITE)] = {
670                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
671                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
672                 },
673                 [C(OP_PREFETCH)] = {
674                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
675                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
676                 },
677         },
678 };
679
680 enum armv6mpcore_perf_types {
681         ARMV6MPCORE_PERFCTR_ICACHE_MISS     = 0x0,
682         ARMV6MPCORE_PERFCTR_IBUF_STALL      = 0x1,
683         ARMV6MPCORE_PERFCTR_DDEP_STALL      = 0x2,
684         ARMV6MPCORE_PERFCTR_ITLB_MISS       = 0x3,
685         ARMV6MPCORE_PERFCTR_DTLB_MISS       = 0x4,
686         ARMV6MPCORE_PERFCTR_BR_EXEC         = 0x5,
687         ARMV6MPCORE_PERFCTR_BR_NOTPREDICT   = 0x6,
688         ARMV6MPCORE_PERFCTR_BR_MISPREDICT   = 0x7,
689         ARMV6MPCORE_PERFCTR_INSTR_EXEC      = 0x8,
690         ARMV6MPCORE_PERFCTR_DCACHE_RDACCESS = 0xA,
691         ARMV6MPCORE_PERFCTR_DCACHE_RDMISS   = 0xB,
692         ARMV6MPCORE_PERFCTR_DCACHE_WRACCESS = 0xC,
693         ARMV6MPCORE_PERFCTR_DCACHE_WRMISS   = 0xD,
694         ARMV6MPCORE_PERFCTR_DCACHE_EVICTION = 0xE,
695         ARMV6MPCORE_PERFCTR_SW_PC_CHANGE    = 0xF,
696         ARMV6MPCORE_PERFCTR_MAIN_TLB_MISS   = 0x10,
697         ARMV6MPCORE_PERFCTR_EXPL_MEM_ACCESS = 0x11,
698         ARMV6MPCORE_PERFCTR_LSU_FULL_STALL  = 0x12,
699         ARMV6MPCORE_PERFCTR_WBUF_DRAINED    = 0x13,
700         ARMV6MPCORE_PERFCTR_CPU_CYCLES      = 0xFF,
701 };
702
703 /*
704  * The hardware events that we support. We do support cache operations but
705  * we have harvard caches and no way to combine instruction and data
706  * accesses/misses in hardware.
707  */
708 static const unsigned armv6mpcore_perf_map[PERF_COUNT_HW_MAX] = {
709         [PERF_COUNT_HW_CPU_CYCLES]          = ARMV6MPCORE_PERFCTR_CPU_CYCLES,
710         [PERF_COUNT_HW_INSTRUCTIONS]        = ARMV6MPCORE_PERFCTR_INSTR_EXEC,
711         [PERF_COUNT_HW_CACHE_REFERENCES]    = HW_OP_UNSUPPORTED,
712         [PERF_COUNT_HW_CACHE_MISSES]        = HW_OP_UNSUPPORTED,
713         [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV6MPCORE_PERFCTR_BR_EXEC,
714         [PERF_COUNT_HW_BRANCH_MISSES]       = ARMV6MPCORE_PERFCTR_BR_MISPREDICT,
715         [PERF_COUNT_HW_BUS_CYCLES]          = HW_OP_UNSUPPORTED,
716 };
717
718 static const unsigned armv6mpcore_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
719                                         [PERF_COUNT_HW_CACHE_OP_MAX]
720                                         [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
721         [C(L1D)] = {
722                 [C(OP_READ)] = {
723                         [C(RESULT_ACCESS)]  =
724                                 ARMV6MPCORE_PERFCTR_DCACHE_RDACCESS,
725                         [C(RESULT_MISS)]    =
726                                 ARMV6MPCORE_PERFCTR_DCACHE_RDMISS,
727                 },
728                 [C(OP_WRITE)] = {
729                         [C(RESULT_ACCESS)]  =
730                                 ARMV6MPCORE_PERFCTR_DCACHE_WRACCESS,
731                         [C(RESULT_MISS)]    =
732                                 ARMV6MPCORE_PERFCTR_DCACHE_WRMISS,
733                 },
734                 [C(OP_PREFETCH)] = {
735                         [C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
736                         [C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
737                 },
738         },
739         [C(L1I)] = {
740                 [C(OP_READ)] = {
741                         [C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
742                         [C(RESULT_MISS)]    = ARMV6MPCORE_PERFCTR_ICACHE_MISS,
743                 },
744                 [C(OP_WRITE)] = {
745                         [C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
746                         [C(RESULT_MISS)]    = ARMV6MPCORE_PERFCTR_ICACHE_MISS,
747                 },
748                 [C(OP_PREFETCH)] = {
749                         [C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
750                         [C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
751                 },
752         },
753         [C(LL)] = {
754                 [C(OP_READ)] = {
755                         [C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
756                         [C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
757                 },
758                 [C(OP_WRITE)] = {
759                         [C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
760                         [C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
761                 },
762                 [C(OP_PREFETCH)] = {
763                         [C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
764                         [C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
765                 },
766         },
767         [C(DTLB)] = {
768                 /*
769                  * The ARM performance counters can count micro DTLB misses,
770                  * micro ITLB misses and main TLB misses. There isn't an event
771                  * for TLB misses, so use the micro misses here and if users
772                  * want the main TLB misses they can use a raw counter.
773                  */
774                 [C(OP_READ)] = {
775                         [C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
776                         [C(RESULT_MISS)]    = ARMV6MPCORE_PERFCTR_DTLB_MISS,
777                 },
778                 [C(OP_WRITE)] = {
779                         [C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
780                         [C(RESULT_MISS)]    = ARMV6MPCORE_PERFCTR_DTLB_MISS,
781                 },
782                 [C(OP_PREFETCH)] = {
783                         [C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
784                         [C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
785                 },
786         },
787         [C(ITLB)] = {
788                 [C(OP_READ)] = {
789                         [C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
790                         [C(RESULT_MISS)]    = ARMV6MPCORE_PERFCTR_ITLB_MISS,
791                 },
792                 [C(OP_WRITE)] = {
793                         [C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
794                         [C(RESULT_MISS)]    = ARMV6MPCORE_PERFCTR_ITLB_MISS,
795                 },
796                 [C(OP_PREFETCH)] = {
797                         [C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
798                         [C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
799                 },
800         },
801         [C(BPU)] = {
802                 [C(OP_READ)] = {
803                         [C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
804                         [C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
805                 },
806                 [C(OP_WRITE)] = {
807                         [C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
808                         [C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
809                 },
810                 [C(OP_PREFETCH)] = {
811                         [C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
812                         [C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
813                 },
814         },
815 };
816
817 static inline unsigned long
818 armv6_pmcr_read(void)
819 {
820         u32 val;
821         asm volatile("mrc   p15, 0, %0, c15, c12, 0" : "=r"(val));
822         return val;
823 }
824
825 static inline void
826 armv6_pmcr_write(unsigned long val)
827 {
828         asm volatile("mcr   p15, 0, %0, c15, c12, 0" : : "r"(val));
829 }
830
831 #define ARMV6_PMCR_ENABLE               (1 << 0)
832 #define ARMV6_PMCR_CTR01_RESET          (1 << 1)
833 #define ARMV6_PMCR_CCOUNT_RESET         (1 << 2)
834 #define ARMV6_PMCR_CCOUNT_DIV           (1 << 3)
835 #define ARMV6_PMCR_COUNT0_IEN           (1 << 4)
836 #define ARMV6_PMCR_COUNT1_IEN           (1 << 5)
837 #define ARMV6_PMCR_CCOUNT_IEN           (1 << 6)
838 #define ARMV6_PMCR_COUNT0_OVERFLOW      (1 << 8)
839 #define ARMV6_PMCR_COUNT1_OVERFLOW      (1 << 9)
840 #define ARMV6_PMCR_CCOUNT_OVERFLOW      (1 << 10)
841 #define ARMV6_PMCR_EVT_COUNT0_SHIFT     20
842 #define ARMV6_PMCR_EVT_COUNT0_MASK      (0xFF << ARMV6_PMCR_EVT_COUNT0_SHIFT)
843 #define ARMV6_PMCR_EVT_COUNT1_SHIFT     12
844 #define ARMV6_PMCR_EVT_COUNT1_MASK      (0xFF << ARMV6_PMCR_EVT_COUNT1_SHIFT)
845
846 #define ARMV6_PMCR_OVERFLOWED_MASK \
847         (ARMV6_PMCR_COUNT0_OVERFLOW | ARMV6_PMCR_COUNT1_OVERFLOW | \
848          ARMV6_PMCR_CCOUNT_OVERFLOW)
849
850 static inline int
851 armv6_pmcr_has_overflowed(unsigned long pmcr)
852 {
853         return (pmcr & ARMV6_PMCR_OVERFLOWED_MASK);
854 }
855
856 static inline int
857 armv6_pmcr_counter_has_overflowed(unsigned long pmcr,
858                                   enum armv6_counters counter)
859 {
860         int ret = 0;
861
862         if (ARMV6_CYCLE_COUNTER == counter)
863                 ret = pmcr & ARMV6_PMCR_CCOUNT_OVERFLOW;
864         else if (ARMV6_COUNTER0 == counter)
865                 ret = pmcr & ARMV6_PMCR_COUNT0_OVERFLOW;
866         else if (ARMV6_COUNTER1 == counter)
867                 ret = pmcr & ARMV6_PMCR_COUNT1_OVERFLOW;
868         else
869                 WARN_ONCE(1, "invalid counter number (%d)\n", counter);
870
871         return ret;
872 }
873
874 static inline u32
875 armv6pmu_read_counter(int counter)
876 {
877         unsigned long value = 0;
878
879         if (ARMV6_CYCLE_COUNTER == counter)
880                 asm volatile("mrc   p15, 0, %0, c15, c12, 1" : "=r"(value));
881         else if (ARMV6_COUNTER0 == counter)
882                 asm volatile("mrc   p15, 0, %0, c15, c12, 2" : "=r"(value));
883         else if (ARMV6_COUNTER1 == counter)
884                 asm volatile("mrc   p15, 0, %0, c15, c12, 3" : "=r"(value));
885         else
886                 WARN_ONCE(1, "invalid counter number (%d)\n", counter);
887
888         return value;
889 }
890
891 static inline void
892 armv6pmu_write_counter(int counter,
893                        u32 value)
894 {
895         if (ARMV6_CYCLE_COUNTER == counter)
896                 asm volatile("mcr   p15, 0, %0, c15, c12, 1" : : "r"(value));
897         else if (ARMV6_COUNTER0 == counter)
898                 asm volatile("mcr   p15, 0, %0, c15, c12, 2" : : "r"(value));
899         else if (ARMV6_COUNTER1 == counter)
900                 asm volatile("mcr   p15, 0, %0, c15, c12, 3" : : "r"(value));
901         else
902                 WARN_ONCE(1, "invalid counter number (%d)\n", counter);
903 }
904
905 void
906 armv6pmu_enable_event(struct hw_perf_event *hwc,
907                       int idx)
908 {
909         unsigned long val, mask, evt, flags;
910
911         if (ARMV6_CYCLE_COUNTER == idx) {
912                 mask    = 0;
913                 evt     = ARMV6_PMCR_CCOUNT_IEN;
914         } else if (ARMV6_COUNTER0 == idx) {
915                 mask    = ARMV6_PMCR_EVT_COUNT0_MASK;
916                 evt     = (hwc->config_base << ARMV6_PMCR_EVT_COUNT0_SHIFT) |
917                           ARMV6_PMCR_COUNT0_IEN;
918         } else if (ARMV6_COUNTER1 == idx) {
919                 mask    = ARMV6_PMCR_EVT_COUNT1_MASK;
920                 evt     = (hwc->config_base << ARMV6_PMCR_EVT_COUNT1_SHIFT) |
921                           ARMV6_PMCR_COUNT1_IEN;
922         } else {
923                 WARN_ONCE(1, "invalid counter number (%d)\n", idx);
924                 return;
925         }
926
927         /*
928          * Mask out the current event and set the counter to count the event
929          * that we're interested in.
930          */
931         spin_lock_irqsave(&pmu_lock, flags);
932         val = armv6_pmcr_read();
933         val &= ~mask;
934         val |= evt;
935         armv6_pmcr_write(val);
936         spin_unlock_irqrestore(&pmu_lock, flags);
937 }
938
939 static irqreturn_t
940 armv6pmu_handle_irq(int irq_num,
941                     void *dev)
942 {
943         unsigned long pmcr = armv6_pmcr_read();
944         struct perf_sample_data data;
945         struct cpu_hw_events *cpuc;
946         struct pt_regs *regs;
947         int idx;
948
949         if (!armv6_pmcr_has_overflowed(pmcr))
950                 return IRQ_NONE;
951
952         regs = get_irq_regs();
953
954         /*
955          * The interrupts are cleared by writing the overflow flags back to
956          * the control register. All of the other bits don't have any effect
957          * if they are rewritten, so write the whole value back.
958          */
959         armv6_pmcr_write(pmcr);
960
961         data.addr = 0;
962
963         cpuc = &__get_cpu_var(cpu_hw_events);
964         for (idx = 0; idx <= armpmu->num_events; ++idx) {
965                 struct perf_event *event = cpuc->events[idx];
966                 struct hw_perf_event *hwc;
967
968                 if (!test_bit(idx, cpuc->active_mask))
969                         continue;
970
971                 /*
972                  * We have a single interrupt for all counters. Check that
973                  * each counter has overflowed before we process it.
974                  */
975                 if (!armv6_pmcr_counter_has_overflowed(pmcr, idx))
976                         continue;
977
978                 hwc = &event->hw;
979                 armpmu_event_update(event, hwc, idx);
980                 data.period = event->hw.last_period;
981                 if (!armpmu_event_set_period(event, hwc, idx))
982                         continue;
983
984                 if (perf_event_overflow(event, 0, &data, regs))
985                         armpmu->disable(hwc, idx);
986         }
987
988         /*
989          * Handle the pending perf events.
990          *
991          * Note: this call *must* be run with interrupts enabled. For
992          * platforms that can have the PMU interrupts raised as a PMI, this
993          * will not work.
994          */
995         perf_event_do_pending();
996
997         return IRQ_HANDLED;
998 }
999
1000 static void
1001 armv6pmu_start(void)
1002 {
1003         unsigned long flags, val;
1004
1005         spin_lock_irqsave(&pmu_lock, flags);
1006         val = armv6_pmcr_read();
1007         val |= ARMV6_PMCR_ENABLE;
1008         armv6_pmcr_write(val);
1009         spin_unlock_irqrestore(&pmu_lock, flags);
1010 }
1011
1012 void
1013 armv6pmu_stop(void)
1014 {
1015         unsigned long flags, val;
1016
1017         spin_lock_irqsave(&pmu_lock, flags);
1018         val = armv6_pmcr_read();
1019         val &= ~ARMV6_PMCR_ENABLE;
1020         armv6_pmcr_write(val);
1021         spin_unlock_irqrestore(&pmu_lock, flags);
1022 }
1023
1024 static inline int
1025 armv6pmu_event_map(int config)
1026 {
1027         int mapping = armv6_perf_map[config];
1028         if (HW_OP_UNSUPPORTED == mapping)
1029                 mapping = -EOPNOTSUPP;
1030         return mapping;
1031 }
1032
1033 static inline int
1034 armv6mpcore_pmu_event_map(int config)
1035 {
1036         int mapping = armv6mpcore_perf_map[config];
1037         if (HW_OP_UNSUPPORTED == mapping)
1038                 mapping = -EOPNOTSUPP;
1039         return mapping;
1040 }
1041
1042 static u64
1043 armv6pmu_raw_event(u64 config)
1044 {
1045         return config & 0xff;
1046 }
1047
1048 static int
1049 armv6pmu_get_event_idx(struct cpu_hw_events *cpuc,
1050                        struct hw_perf_event *event)
1051 {
1052         /* Always place a cycle counter into the cycle counter. */
1053         if (ARMV6_PERFCTR_CPU_CYCLES == event->config_base) {
1054                 if (test_and_set_bit(ARMV6_CYCLE_COUNTER, cpuc->used_mask))
1055                         return -EAGAIN;
1056
1057                 return ARMV6_CYCLE_COUNTER;
1058         } else {
1059                 /*
1060                  * For anything other than a cycle counter, try and use
1061                  * counter0 and counter1.
1062                  */
1063                 if (!test_and_set_bit(ARMV6_COUNTER1, cpuc->used_mask)) {
1064                         return ARMV6_COUNTER1;
1065                 }
1066
1067                 if (!test_and_set_bit(ARMV6_COUNTER0, cpuc->used_mask)) {
1068                         return ARMV6_COUNTER0;
1069                 }
1070
1071                 /* The counters are all in use. */
1072                 return -EAGAIN;
1073         }
1074 }
1075
1076 static void
1077 armv6pmu_disable_event(struct hw_perf_event *hwc,
1078                        int idx)
1079 {
1080         unsigned long val, mask, evt, flags;
1081
1082         if (ARMV6_CYCLE_COUNTER == idx) {
1083                 mask    = ARMV6_PMCR_CCOUNT_IEN;
1084                 evt     = 0;
1085         } else if (ARMV6_COUNTER0 == idx) {
1086                 mask    = ARMV6_PMCR_COUNT0_IEN | ARMV6_PMCR_EVT_COUNT0_MASK;
1087                 evt     = ARMV6_PERFCTR_NOP << ARMV6_PMCR_EVT_COUNT0_SHIFT;
1088         } else if (ARMV6_COUNTER1 == idx) {
1089                 mask    = ARMV6_PMCR_COUNT1_IEN | ARMV6_PMCR_EVT_COUNT1_MASK;
1090                 evt     = ARMV6_PERFCTR_NOP << ARMV6_PMCR_EVT_COUNT1_SHIFT;
1091         } else {
1092                 WARN_ONCE(1, "invalid counter number (%d)\n", idx);
1093                 return;
1094         }
1095
1096         /*
1097          * Mask out the current event and set the counter to count the number
1098          * of ETM bus signal assertion cycles. The external reporting should
1099          * be disabled and so this should never increment.
1100          */
1101         spin_lock_irqsave(&pmu_lock, flags);
1102         val = armv6_pmcr_read();
1103         val &= ~mask;
1104         val |= evt;
1105         armv6_pmcr_write(val);
1106         spin_unlock_irqrestore(&pmu_lock, flags);
1107 }
1108
1109 static void
1110 armv6mpcore_pmu_disable_event(struct hw_perf_event *hwc,
1111                               int idx)
1112 {
1113         unsigned long val, mask, flags, evt = 0;
1114
1115         if (ARMV6_CYCLE_COUNTER == idx) {
1116                 mask    = ARMV6_PMCR_CCOUNT_IEN;
1117         } else if (ARMV6_COUNTER0 == idx) {
1118                 mask    = ARMV6_PMCR_COUNT0_IEN;
1119         } else if (ARMV6_COUNTER1 == idx) {
1120                 mask    = ARMV6_PMCR_COUNT1_IEN;
1121         } else {
1122                 WARN_ONCE(1, "invalid counter number (%d)\n", idx);
1123                 return;
1124         }
1125
1126         /*
1127          * Unlike UP ARMv6, we don't have a way of stopping the counters. We
1128          * simply disable the interrupt reporting.
1129          */
1130         spin_lock_irqsave(&pmu_lock, flags);
1131         val = armv6_pmcr_read();
1132         val &= ~mask;
1133         val |= evt;
1134         armv6_pmcr_write(val);
1135         spin_unlock_irqrestore(&pmu_lock, flags);
1136 }
1137
1138 static const struct arm_pmu armv6pmu = {
1139         .name                   = "v6",
1140         .handle_irq             = armv6pmu_handle_irq,
1141         .enable                 = armv6pmu_enable_event,
1142         .disable                = armv6pmu_disable_event,
1143         .event_map              = armv6pmu_event_map,
1144         .raw_event              = armv6pmu_raw_event,
1145         .read_counter           = armv6pmu_read_counter,
1146         .write_counter          = armv6pmu_write_counter,
1147         .get_event_idx          = armv6pmu_get_event_idx,
1148         .start                  = armv6pmu_start,
1149         .stop                   = armv6pmu_stop,
1150         .num_events             = 3,
1151         .max_period             = (1LLU << 32) - 1,
1152 };
1153
1154 /*
1155  * ARMv6mpcore is almost identical to single core ARMv6 with the exception
1156  * that some of the events have different enumerations and that there is no
1157  * *hack* to stop the programmable counters. To stop the counters we simply
1158  * disable the interrupt reporting and update the event. When unthrottling we
1159  * reset the period and enable the interrupt reporting.
1160  */
1161 static const struct arm_pmu armv6mpcore_pmu = {
1162         .name                   = "v6mpcore",
1163         .handle_irq             = armv6pmu_handle_irq,
1164         .enable                 = armv6pmu_enable_event,
1165         .disable                = armv6mpcore_pmu_disable_event,
1166         .event_map              = armv6mpcore_pmu_event_map,
1167         .raw_event              = armv6pmu_raw_event,
1168         .read_counter           = armv6pmu_read_counter,
1169         .write_counter          = armv6pmu_write_counter,
1170         .get_event_idx          = armv6pmu_get_event_idx,
1171         .start                  = armv6pmu_start,
1172         .stop                   = armv6pmu_stop,
1173         .num_events             = 3,
1174         .max_period             = (1LLU << 32) - 1,
1175 };
1176
1177 static int __init
1178 init_hw_perf_events(void)
1179 {
1180         unsigned long cpuid = read_cpuid_id();
1181         unsigned long implementor = (cpuid & 0xFF000000) >> 24;
1182         unsigned long part_number = (cpuid & 0xFFF0);
1183
1184         /* We only support ARM CPUs implemented by ARM at the moment. */
1185         if (0x41 == implementor) {
1186                 switch (part_number) {
1187                 case 0xB360:    /* ARM1136 */
1188                 case 0xB560:    /* ARM1156 */
1189                 case 0xB760:    /* ARM1176 */
1190                         armpmu = &armv6pmu;
1191                         memcpy(armpmu_perf_cache_map, armv6_perf_cache_map,
1192                                         sizeof(armv6_perf_cache_map));
1193                         perf_max_events = armv6pmu.num_events;
1194                         break;
1195                 case 0xB020:    /* ARM11mpcore */
1196                         armpmu = &armv6mpcore_pmu;
1197                         memcpy(armpmu_perf_cache_map,
1198                                armv6mpcore_perf_cache_map,
1199                                sizeof(armv6mpcore_perf_cache_map));
1200                         perf_max_events = armv6mpcore_pmu.num_events;
1201                         break;
1202                 default:
1203                         pr_info("no hardware support available\n");
1204                         perf_max_events = -1;
1205                 }
1206         }
1207
1208         if (armpmu)
1209                 pr_info("enabled with %s PMU driver\n",
1210                                 armpmu->name);
1211
1212         return 0;
1213 }
1214 arch_initcall(init_hw_perf_events);
1215
1216 /*
1217  * Callchain handling code.
1218  */
1219 static inline void
1220 callchain_store(struct perf_callchain_entry *entry,
1221                 u64 ip)
1222 {
1223         if (entry->nr < PERF_MAX_STACK_DEPTH)
1224                 entry->ip[entry->nr++] = ip;
1225 }
1226
1227 /*
1228  * The registers we're interested in are at the end of the variable
1229  * length saved register structure. The fp points at the end of this
1230  * structure so the address of this struct is:
1231  * (struct frame_tail *)(xxx->fp)-1
1232  *
1233  * This code has been adapted from the ARM OProfile support.
1234  */
1235 struct frame_tail {
1236         struct frame_tail   *fp;
1237         unsigned long       sp;
1238         unsigned long       lr;
1239 } __attribute__((packed));
1240
1241 /*
1242  * Get the return address for a single stackframe and return a pointer to the
1243  * next frame tail.
1244  */
1245 static struct frame_tail *
1246 user_backtrace(struct frame_tail *tail,
1247                struct perf_callchain_entry *entry)
1248 {
1249         struct frame_tail buftail;
1250
1251         /* Also check accessibility of one struct frame_tail beyond */
1252         if (!access_ok(VERIFY_READ, tail, sizeof(buftail)))
1253                 return NULL;
1254         if (__copy_from_user_inatomic(&buftail, tail, sizeof(buftail)))
1255                 return NULL;
1256
1257         callchain_store(entry, buftail.lr);
1258
1259         /*
1260          * Frame pointers should strictly progress back up the stack
1261          * (towards higher addresses).
1262          */
1263         if (tail >= buftail.fp)
1264                 return NULL;
1265
1266         return buftail.fp - 1;
1267 }
1268
1269 static void
1270 perf_callchain_user(struct pt_regs *regs,
1271                     struct perf_callchain_entry *entry)
1272 {
1273         struct frame_tail *tail;
1274
1275         callchain_store(entry, PERF_CONTEXT_USER);
1276
1277         if (!user_mode(regs))
1278                 regs = task_pt_regs(current);
1279
1280         tail = (struct frame_tail *)regs->ARM_fp - 1;
1281
1282         while (tail && !((unsigned long)tail & 0x3))
1283                 tail = user_backtrace(tail, entry);
1284 }
1285
1286 /*
1287  * Gets called by walk_stackframe() for every stackframe. This will be called
1288  * whist unwinding the stackframe and is like a subroutine return so we use
1289  * the PC.
1290  */
1291 static int
1292 callchain_trace(struct stackframe *fr,
1293                 void *data)
1294 {
1295         struct perf_callchain_entry *entry = data;
1296         callchain_store(entry, fr->pc);
1297         return 0;
1298 }
1299
1300 static void
1301 perf_callchain_kernel(struct pt_regs *regs,
1302                       struct perf_callchain_entry *entry)
1303 {
1304         struct stackframe fr;
1305
1306         callchain_store(entry, PERF_CONTEXT_KERNEL);
1307         fr.fp = regs->ARM_fp;
1308         fr.sp = regs->ARM_sp;
1309         fr.lr = regs->ARM_lr;
1310         fr.pc = regs->ARM_pc;
1311         walk_stackframe(&fr, callchain_trace, entry);
1312 }
1313
1314 static void
1315 perf_do_callchain(struct pt_regs *regs,
1316                   struct perf_callchain_entry *entry)
1317 {
1318         int is_user;
1319
1320         if (!regs)
1321                 return;
1322
1323         is_user = user_mode(regs);
1324
1325         if (!current || !current->pid)
1326                 return;
1327
1328         if (is_user && current->state != TASK_RUNNING)
1329                 return;
1330
1331         if (!is_user)
1332                 perf_callchain_kernel(regs, entry);
1333
1334         if (current->mm)
1335                 perf_callchain_user(regs, entry);
1336 }
1337
1338 static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry);
1339
1340 struct perf_callchain_entry *
1341 perf_callchain(struct pt_regs *regs)
1342 {
1343         struct perf_callchain_entry *entry = &__get_cpu_var(pmc_irq_entry);
1344
1345         entry->nr = 0;
1346         perf_do_callchain(regs, entry);
1347         return entry;
1348 }