PM / Wakeup: Show wakeup sources statistics in debugfs
[linux-2.6.git] / arch / arm / kernel / perf_event.c
1 #undef DEBUG
2
3 /*
4  * ARM performance counter support.
5  *
6  * Copyright (C) 2009 picoChip Designs, Ltd., Jamie Iles
7  *
8  * ARMv7 support: Jean Pihet <jpihet@mvista.com>
9  * 2010 (c) MontaVista Software, LLC.
10  *
11  * This code is based on the sparc64 perf event code, which is in turn based
12  * on the x86 code. Callchain code is based on the ARM OProfile backtrace
13  * code.
14  */
15 #define pr_fmt(fmt) "hw perfevents: " fmt
16
17 #include <linux/interrupt.h>
18 #include <linux/kernel.h>
19 #include <linux/module.h>
20 #include <linux/perf_event.h>
21 #include <linux/platform_device.h>
22 #include <linux/spinlock.h>
23 #include <linux/uaccess.h>
24
25 #include <asm/cputype.h>
26 #include <asm/irq.h>
27 #include <asm/irq_regs.h>
28 #include <asm/pmu.h>
29 #include <asm/stacktrace.h>
30
31 static struct platform_device *pmu_device;
32
33 /*
34  * Hardware lock to serialize accesses to PMU registers. Needed for the
35  * read/modify/write sequences.
36  */
37 DEFINE_SPINLOCK(pmu_lock);
38
39 /*
40  * ARMv6 supports a maximum of 3 events, starting from index 1. If we add
41  * another platform that supports more, we need to increase this to be the
42  * largest of all platforms.
43  *
44  * ARMv7 supports up to 32 events:
45  *  cycle counter CCNT + 31 events counters CNT0..30.
46  *  Cortex-A8 has 1+4 counters, Cortex-A9 has 1+6 counters.
47  */
48 #define ARMPMU_MAX_HWEVENTS             33
49
50 /* The events for a given CPU. */
51 struct cpu_hw_events {
52         /*
53          * The events that are active on the CPU for the given index. Index 0
54          * is reserved.
55          */
56         struct perf_event       *events[ARMPMU_MAX_HWEVENTS];
57
58         /*
59          * A 1 bit for an index indicates that the counter is being used for
60          * an event. A 0 means that the counter can be used.
61          */
62         unsigned long           used_mask[BITS_TO_LONGS(ARMPMU_MAX_HWEVENTS)];
63
64         /*
65          * A 1 bit for an index indicates that the counter is actively being
66          * used.
67          */
68         unsigned long           active_mask[BITS_TO_LONGS(ARMPMU_MAX_HWEVENTS)];
69 };
70 DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events);
71
72 /* PMU names. */
73 static const char *arm_pmu_names[] = {
74         [ARM_PERF_PMU_ID_XSCALE1] = "xscale1",
75         [ARM_PERF_PMU_ID_XSCALE2] = "xscale2",
76         [ARM_PERF_PMU_ID_V6]      = "v6",
77         [ARM_PERF_PMU_ID_V6MP]    = "v6mpcore",
78         [ARM_PERF_PMU_ID_CA8]     = "ARMv7 Cortex-A8",
79         [ARM_PERF_PMU_ID_CA9]     = "ARMv7 Cortex-A9",
80 };
81
82 struct arm_pmu {
83         enum arm_perf_pmu_ids id;
84         irqreturn_t     (*handle_irq)(int irq_num, void *dev);
85         void            (*enable)(struct hw_perf_event *evt, int idx);
86         void            (*disable)(struct hw_perf_event *evt, int idx);
87         int             (*event_map)(int evt);
88         u64             (*raw_event)(u64);
89         int             (*get_event_idx)(struct cpu_hw_events *cpuc,
90                                          struct hw_perf_event *hwc);
91         u32             (*read_counter)(int idx);
92         void            (*write_counter)(int idx, u32 val);
93         void            (*start)(void);
94         void            (*stop)(void);
95         int             num_events;
96         u64             max_period;
97 };
98
99 /* Set at runtime when we know what CPU type we are. */
100 static const struct arm_pmu *armpmu;
101
102 enum arm_perf_pmu_ids
103 armpmu_get_pmu_id(void)
104 {
105         int id = -ENODEV;
106
107         if (armpmu != NULL)
108                 id = armpmu->id;
109
110         return id;
111 }
112 EXPORT_SYMBOL_GPL(armpmu_get_pmu_id);
113
114 int
115 armpmu_get_max_events(void)
116 {
117         int max_events = 0;
118
119         if (armpmu != NULL)
120                 max_events = armpmu->num_events;
121
122         return max_events;
123 }
124 EXPORT_SYMBOL_GPL(armpmu_get_max_events);
125
126 #define HW_OP_UNSUPPORTED               0xFFFF
127
128 #define C(_x) \
129         PERF_COUNT_HW_CACHE_##_x
130
131 #define CACHE_OP_UNSUPPORTED            0xFFFF
132
133 static unsigned armpmu_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
134                                      [PERF_COUNT_HW_CACHE_OP_MAX]
135                                      [PERF_COUNT_HW_CACHE_RESULT_MAX];
136
137 static int
138 armpmu_map_cache_event(u64 config)
139 {
140         unsigned int cache_type, cache_op, cache_result, ret;
141
142         cache_type = (config >>  0) & 0xff;
143         if (cache_type >= PERF_COUNT_HW_CACHE_MAX)
144                 return -EINVAL;
145
146         cache_op = (config >>  8) & 0xff;
147         if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX)
148                 return -EINVAL;
149
150         cache_result = (config >> 16) & 0xff;
151         if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
152                 return -EINVAL;
153
154         ret = (int)armpmu_perf_cache_map[cache_type][cache_op][cache_result];
155
156         if (ret == CACHE_OP_UNSUPPORTED)
157                 return -ENOENT;
158
159         return ret;
160 }
161
162 static int
163 armpmu_event_set_period(struct perf_event *event,
164                         struct hw_perf_event *hwc,
165                         int idx)
166 {
167         s64 left = local64_read(&hwc->period_left);
168         s64 period = hwc->sample_period;
169         int ret = 0;
170
171         if (unlikely(left <= -period)) {
172                 left = period;
173                 local64_set(&hwc->period_left, left);
174                 hwc->last_period = period;
175                 ret = 1;
176         }
177
178         if (unlikely(left <= 0)) {
179                 left += period;
180                 local64_set(&hwc->period_left, left);
181                 hwc->last_period = period;
182                 ret = 1;
183         }
184
185         if (left > (s64)armpmu->max_period)
186                 left = armpmu->max_period;
187
188         local64_set(&hwc->prev_count, (u64)-left);
189
190         armpmu->write_counter(idx, (u64)(-left) & 0xffffffff);
191
192         perf_event_update_userpage(event);
193
194         return ret;
195 }
196
197 static u64
198 armpmu_event_update(struct perf_event *event,
199                     struct hw_perf_event *hwc,
200                     int idx)
201 {
202         int shift = 64 - 32;
203         s64 prev_raw_count, new_raw_count;
204         u64 delta;
205
206 again:
207         prev_raw_count = local64_read(&hwc->prev_count);
208         new_raw_count = armpmu->read_counter(idx);
209
210         if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
211                              new_raw_count) != prev_raw_count)
212                 goto again;
213
214         delta = (new_raw_count << shift) - (prev_raw_count << shift);
215         delta >>= shift;
216
217         local64_add(delta, &event->count);
218         local64_sub(delta, &hwc->period_left);
219
220         return new_raw_count;
221 }
222
223 static void
224 armpmu_disable(struct perf_event *event)
225 {
226         struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
227         struct hw_perf_event *hwc = &event->hw;
228         int idx = hwc->idx;
229
230         WARN_ON(idx < 0);
231
232         clear_bit(idx, cpuc->active_mask);
233         armpmu->disable(hwc, idx);
234
235         barrier();
236
237         armpmu_event_update(event, hwc, idx);
238         cpuc->events[idx] = NULL;
239         clear_bit(idx, cpuc->used_mask);
240
241         perf_event_update_userpage(event);
242 }
243
244 static void
245 armpmu_read(struct perf_event *event)
246 {
247         struct hw_perf_event *hwc = &event->hw;
248
249         /* Don't read disabled counters! */
250         if (hwc->idx < 0)
251                 return;
252
253         armpmu_event_update(event, hwc, hwc->idx);
254 }
255
256 static void
257 armpmu_unthrottle(struct perf_event *event)
258 {
259         struct hw_perf_event *hwc = &event->hw;
260
261         /*
262          * Set the period again. Some counters can't be stopped, so when we
263          * were throttled we simply disabled the IRQ source and the counter
264          * may have been left counting. If we don't do this step then we may
265          * get an interrupt too soon or *way* too late if the overflow has
266          * happened since disabling.
267          */
268         armpmu_event_set_period(event, hwc, hwc->idx);
269         armpmu->enable(hwc, hwc->idx);
270 }
271
272 static int
273 armpmu_enable(struct perf_event *event)
274 {
275         struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
276         struct hw_perf_event *hwc = &event->hw;
277         int idx;
278         int err = 0;
279
280         /* If we don't have a space for the counter then finish early. */
281         idx = armpmu->get_event_idx(cpuc, hwc);
282         if (idx < 0) {
283                 err = idx;
284                 goto out;
285         }
286
287         /*
288          * If there is an event in the counter we are going to use then make
289          * sure it is disabled.
290          */
291         event->hw.idx = idx;
292         armpmu->disable(hwc, idx);
293         cpuc->events[idx] = event;
294         set_bit(idx, cpuc->active_mask);
295
296         /* Set the period for the event. */
297         armpmu_event_set_period(event, hwc, idx);
298
299         /* Enable the event. */
300         armpmu->enable(hwc, idx);
301
302         /* Propagate our changes to the userspace mapping. */
303         perf_event_update_userpage(event);
304
305 out:
306         return err;
307 }
308
309 static struct pmu pmu = {
310         .enable     = armpmu_enable,
311         .disable    = armpmu_disable,
312         .unthrottle = armpmu_unthrottle,
313         .read       = armpmu_read,
314 };
315
316 static int
317 validate_event(struct cpu_hw_events *cpuc,
318                struct perf_event *event)
319 {
320         struct hw_perf_event fake_event = event->hw;
321
322         if (event->pmu != &pmu || event->state <= PERF_EVENT_STATE_OFF)
323                 return 1;
324
325         return armpmu->get_event_idx(cpuc, &fake_event) >= 0;
326 }
327
328 static int
329 validate_group(struct perf_event *event)
330 {
331         struct perf_event *sibling, *leader = event->group_leader;
332         struct cpu_hw_events fake_pmu;
333
334         memset(&fake_pmu, 0, sizeof(fake_pmu));
335
336         if (!validate_event(&fake_pmu, leader))
337                 return -ENOSPC;
338
339         list_for_each_entry(sibling, &leader->sibling_list, group_entry) {
340                 if (!validate_event(&fake_pmu, sibling))
341                         return -ENOSPC;
342         }
343
344         if (!validate_event(&fake_pmu, event))
345                 return -ENOSPC;
346
347         return 0;
348 }
349
350 static int
351 armpmu_reserve_hardware(void)
352 {
353         int i, err = -ENODEV, irq;
354
355         pmu_device = reserve_pmu(ARM_PMU_DEVICE_CPU);
356         if (IS_ERR(pmu_device)) {
357                 pr_warning("unable to reserve pmu\n");
358                 return PTR_ERR(pmu_device);
359         }
360
361         init_pmu(ARM_PMU_DEVICE_CPU);
362
363         if (pmu_device->num_resources < 1) {
364                 pr_err("no irqs for PMUs defined\n");
365                 return -ENODEV;
366         }
367
368         for (i = 0; i < pmu_device->num_resources; ++i) {
369                 irq = platform_get_irq(pmu_device, i);
370                 if (irq < 0)
371                         continue;
372
373                 err = request_irq(irq, armpmu->handle_irq,
374                                   IRQF_DISABLED | IRQF_NOBALANCING,
375                                   "armpmu", NULL);
376                 if (err) {
377                         pr_warning("unable to request IRQ%d for ARM perf "
378                                 "counters\n", irq);
379                         break;
380                 }
381         }
382
383         if (err) {
384                 for (i = i - 1; i >= 0; --i) {
385                         irq = platform_get_irq(pmu_device, i);
386                         if (irq >= 0)
387                                 free_irq(irq, NULL);
388                 }
389                 release_pmu(pmu_device);
390                 pmu_device = NULL;
391         }
392
393         return err;
394 }
395
396 static void
397 armpmu_release_hardware(void)
398 {
399         int i, irq;
400
401         for (i = pmu_device->num_resources - 1; i >= 0; --i) {
402                 irq = platform_get_irq(pmu_device, i);
403                 if (irq >= 0)
404                         free_irq(irq, NULL);
405         }
406         armpmu->stop();
407
408         release_pmu(pmu_device);
409         pmu_device = NULL;
410 }
411
412 static atomic_t active_events = ATOMIC_INIT(0);
413 static DEFINE_MUTEX(pmu_reserve_mutex);
414
415 static void
416 hw_perf_event_destroy(struct perf_event *event)
417 {
418         if (atomic_dec_and_mutex_lock(&active_events, &pmu_reserve_mutex)) {
419                 armpmu_release_hardware();
420                 mutex_unlock(&pmu_reserve_mutex);
421         }
422 }
423
424 static int
425 __hw_perf_event_init(struct perf_event *event)
426 {
427         struct hw_perf_event *hwc = &event->hw;
428         int mapping, err;
429
430         /* Decode the generic type into an ARM event identifier. */
431         if (PERF_TYPE_HARDWARE == event->attr.type) {
432                 mapping = armpmu->event_map(event->attr.config);
433         } else if (PERF_TYPE_HW_CACHE == event->attr.type) {
434                 mapping = armpmu_map_cache_event(event->attr.config);
435         } else if (PERF_TYPE_RAW == event->attr.type) {
436                 mapping = armpmu->raw_event(event->attr.config);
437         } else {
438                 pr_debug("event type %x not supported\n", event->attr.type);
439                 return -EOPNOTSUPP;
440         }
441
442         if (mapping < 0) {
443                 pr_debug("event %x:%llx not supported\n", event->attr.type,
444                          event->attr.config);
445                 return mapping;
446         }
447
448         /*
449          * Check whether we need to exclude the counter from certain modes.
450          * The ARM performance counters are on all of the time so if someone
451          * has asked us for some excludes then we have to fail.
452          */
453         if (event->attr.exclude_kernel || event->attr.exclude_user ||
454             event->attr.exclude_hv || event->attr.exclude_idle) {
455                 pr_debug("ARM performance counters do not support "
456                          "mode exclusion\n");
457                 return -EPERM;
458         }
459
460         /*
461          * We don't assign an index until we actually place the event onto
462          * hardware. Use -1 to signify that we haven't decided where to put it
463          * yet. For SMP systems, each core has it's own PMU so we can't do any
464          * clever allocation or constraints checking at this point.
465          */
466         hwc->idx = -1;
467
468         /*
469          * Store the event encoding into the config_base field. config and
470          * event_base are unused as the only 2 things we need to know are
471          * the event mapping and the counter to use. The counter to use is
472          * also the indx and the config_base is the event type.
473          */
474         hwc->config_base            = (unsigned long)mapping;
475         hwc->config                 = 0;
476         hwc->event_base             = 0;
477
478         if (!hwc->sample_period) {
479                 hwc->sample_period  = armpmu->max_period;
480                 hwc->last_period    = hwc->sample_period;
481                 local64_set(&hwc->period_left, hwc->sample_period);
482         }
483
484         err = 0;
485         if (event->group_leader != event) {
486                 err = validate_group(event);
487                 if (err)
488                         return -EINVAL;
489         }
490
491         return err;
492 }
493
494 const struct pmu *
495 hw_perf_event_init(struct perf_event *event)
496 {
497         int err = 0;
498
499         if (!armpmu)
500                 return ERR_PTR(-ENODEV);
501
502         event->destroy = hw_perf_event_destroy;
503
504         if (!atomic_inc_not_zero(&active_events)) {
505                 if (atomic_read(&active_events) > perf_max_events) {
506                         atomic_dec(&active_events);
507                         return ERR_PTR(-ENOSPC);
508                 }
509
510                 mutex_lock(&pmu_reserve_mutex);
511                 if (atomic_read(&active_events) == 0) {
512                         err = armpmu_reserve_hardware();
513                 }
514
515                 if (!err)
516                         atomic_inc(&active_events);
517                 mutex_unlock(&pmu_reserve_mutex);
518         }
519
520         if (err)
521                 return ERR_PTR(err);
522
523         err = __hw_perf_event_init(event);
524         if (err)
525                 hw_perf_event_destroy(event);
526
527         return err ? ERR_PTR(err) : &pmu;
528 }
529
530 void
531 hw_perf_enable(void)
532 {
533         /* Enable all of the perf events on hardware. */
534         int idx;
535         struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
536
537         if (!armpmu)
538                 return;
539
540         for (idx = 0; idx <= armpmu->num_events; ++idx) {
541                 struct perf_event *event = cpuc->events[idx];
542
543                 if (!event)
544                         continue;
545
546                 armpmu->enable(&event->hw, idx);
547         }
548
549         armpmu->start();
550 }
551
552 void
553 hw_perf_disable(void)
554 {
555         if (armpmu)
556                 armpmu->stop();
557 }
558
559 /*
560  * ARMv6 Performance counter handling code.
561  *
562  * ARMv6 has 2 configurable performance counters and a single cycle counter.
563  * They all share a single reset bit but can be written to zero so we can use
564  * that for a reset.
565  *
566  * The counters can't be individually enabled or disabled so when we remove
567  * one event and replace it with another we could get spurious counts from the
568  * wrong event. However, we can take advantage of the fact that the
569  * performance counters can export events to the event bus, and the event bus
570  * itself can be monitored. This requires that we *don't* export the events to
571  * the event bus. The procedure for disabling a configurable counter is:
572  *      - change the counter to count the ETMEXTOUT[0] signal (0x20). This
573  *        effectively stops the counter from counting.
574  *      - disable the counter's interrupt generation (each counter has it's
575  *        own interrupt enable bit).
576  * Once stopped, the counter value can be written as 0 to reset.
577  *
578  * To enable a counter:
579  *      - enable the counter's interrupt generation.
580  *      - set the new event type.
581  *
582  * Note: the dedicated cycle counter only counts cycles and can't be
583  * enabled/disabled independently of the others. When we want to disable the
584  * cycle counter, we have to just disable the interrupt reporting and start
585  * ignoring that counter. When re-enabling, we have to reset the value and
586  * enable the interrupt.
587  */
588
589 enum armv6_perf_types {
590         ARMV6_PERFCTR_ICACHE_MISS           = 0x0,
591         ARMV6_PERFCTR_IBUF_STALL            = 0x1,
592         ARMV6_PERFCTR_DDEP_STALL            = 0x2,
593         ARMV6_PERFCTR_ITLB_MISS             = 0x3,
594         ARMV6_PERFCTR_DTLB_MISS             = 0x4,
595         ARMV6_PERFCTR_BR_EXEC               = 0x5,
596         ARMV6_PERFCTR_BR_MISPREDICT         = 0x6,
597         ARMV6_PERFCTR_INSTR_EXEC            = 0x7,
598         ARMV6_PERFCTR_DCACHE_HIT            = 0x9,
599         ARMV6_PERFCTR_DCACHE_ACCESS         = 0xA,
600         ARMV6_PERFCTR_DCACHE_MISS           = 0xB,
601         ARMV6_PERFCTR_DCACHE_WBACK          = 0xC,
602         ARMV6_PERFCTR_SW_PC_CHANGE          = 0xD,
603         ARMV6_PERFCTR_MAIN_TLB_MISS         = 0xF,
604         ARMV6_PERFCTR_EXPL_D_ACCESS         = 0x10,
605         ARMV6_PERFCTR_LSU_FULL_STALL        = 0x11,
606         ARMV6_PERFCTR_WBUF_DRAINED          = 0x12,
607         ARMV6_PERFCTR_CPU_CYCLES            = 0xFF,
608         ARMV6_PERFCTR_NOP                   = 0x20,
609 };
610
611 enum armv6_counters {
612         ARMV6_CYCLE_COUNTER = 1,
613         ARMV6_COUNTER0,
614         ARMV6_COUNTER1,
615 };
616
617 /*
618  * The hardware events that we support. We do support cache operations but
619  * we have harvard caches and no way to combine instruction and data
620  * accesses/misses in hardware.
621  */
622 static const unsigned armv6_perf_map[PERF_COUNT_HW_MAX] = {
623         [PERF_COUNT_HW_CPU_CYCLES]          = ARMV6_PERFCTR_CPU_CYCLES,
624         [PERF_COUNT_HW_INSTRUCTIONS]        = ARMV6_PERFCTR_INSTR_EXEC,
625         [PERF_COUNT_HW_CACHE_REFERENCES]    = HW_OP_UNSUPPORTED,
626         [PERF_COUNT_HW_CACHE_MISSES]        = HW_OP_UNSUPPORTED,
627         [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV6_PERFCTR_BR_EXEC,
628         [PERF_COUNT_HW_BRANCH_MISSES]       = ARMV6_PERFCTR_BR_MISPREDICT,
629         [PERF_COUNT_HW_BUS_CYCLES]          = HW_OP_UNSUPPORTED,
630 };
631
632 static const unsigned armv6_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
633                                           [PERF_COUNT_HW_CACHE_OP_MAX]
634                                           [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
635         [C(L1D)] = {
636                 /*
637                  * The performance counters don't differentiate between read
638                  * and write accesses/misses so this isn't strictly correct,
639                  * but it's the best we can do. Writes and reads get
640                  * combined.
641                  */
642                 [C(OP_READ)] = {
643                         [C(RESULT_ACCESS)]      = ARMV6_PERFCTR_DCACHE_ACCESS,
644                         [C(RESULT_MISS)]        = ARMV6_PERFCTR_DCACHE_MISS,
645                 },
646                 [C(OP_WRITE)] = {
647                         [C(RESULT_ACCESS)]      = ARMV6_PERFCTR_DCACHE_ACCESS,
648                         [C(RESULT_MISS)]        = ARMV6_PERFCTR_DCACHE_MISS,
649                 },
650                 [C(OP_PREFETCH)] = {
651                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
652                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
653                 },
654         },
655         [C(L1I)] = {
656                 [C(OP_READ)] = {
657                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
658                         [C(RESULT_MISS)]        = ARMV6_PERFCTR_ICACHE_MISS,
659                 },
660                 [C(OP_WRITE)] = {
661                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
662                         [C(RESULT_MISS)]        = ARMV6_PERFCTR_ICACHE_MISS,
663                 },
664                 [C(OP_PREFETCH)] = {
665                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
666                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
667                 },
668         },
669         [C(LL)] = {
670                 [C(OP_READ)] = {
671                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
672                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
673                 },
674                 [C(OP_WRITE)] = {
675                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
676                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
677                 },
678                 [C(OP_PREFETCH)] = {
679                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
680                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
681                 },
682         },
683         [C(DTLB)] = {
684                 /*
685                  * The ARM performance counters can count micro DTLB misses,
686                  * micro ITLB misses and main TLB misses. There isn't an event
687                  * for TLB misses, so use the micro misses here and if users
688                  * want the main TLB misses they can use a raw counter.
689                  */
690                 [C(OP_READ)] = {
691                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
692                         [C(RESULT_MISS)]        = ARMV6_PERFCTR_DTLB_MISS,
693                 },
694                 [C(OP_WRITE)] = {
695                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
696                         [C(RESULT_MISS)]        = ARMV6_PERFCTR_DTLB_MISS,
697                 },
698                 [C(OP_PREFETCH)] = {
699                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
700                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
701                 },
702         },
703         [C(ITLB)] = {
704                 [C(OP_READ)] = {
705                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
706                         [C(RESULT_MISS)]        = ARMV6_PERFCTR_ITLB_MISS,
707                 },
708                 [C(OP_WRITE)] = {
709                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
710                         [C(RESULT_MISS)]        = ARMV6_PERFCTR_ITLB_MISS,
711                 },
712                 [C(OP_PREFETCH)] = {
713                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
714                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
715                 },
716         },
717         [C(BPU)] = {
718                 [C(OP_READ)] = {
719                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
720                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
721                 },
722                 [C(OP_WRITE)] = {
723                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
724                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
725                 },
726                 [C(OP_PREFETCH)] = {
727                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
728                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
729                 },
730         },
731 };
732
733 enum armv6mpcore_perf_types {
734         ARMV6MPCORE_PERFCTR_ICACHE_MISS     = 0x0,
735         ARMV6MPCORE_PERFCTR_IBUF_STALL      = 0x1,
736         ARMV6MPCORE_PERFCTR_DDEP_STALL      = 0x2,
737         ARMV6MPCORE_PERFCTR_ITLB_MISS       = 0x3,
738         ARMV6MPCORE_PERFCTR_DTLB_MISS       = 0x4,
739         ARMV6MPCORE_PERFCTR_BR_EXEC         = 0x5,
740         ARMV6MPCORE_PERFCTR_BR_NOTPREDICT   = 0x6,
741         ARMV6MPCORE_PERFCTR_BR_MISPREDICT   = 0x7,
742         ARMV6MPCORE_PERFCTR_INSTR_EXEC      = 0x8,
743         ARMV6MPCORE_PERFCTR_DCACHE_RDACCESS = 0xA,
744         ARMV6MPCORE_PERFCTR_DCACHE_RDMISS   = 0xB,
745         ARMV6MPCORE_PERFCTR_DCACHE_WRACCESS = 0xC,
746         ARMV6MPCORE_PERFCTR_DCACHE_WRMISS   = 0xD,
747         ARMV6MPCORE_PERFCTR_DCACHE_EVICTION = 0xE,
748         ARMV6MPCORE_PERFCTR_SW_PC_CHANGE    = 0xF,
749         ARMV6MPCORE_PERFCTR_MAIN_TLB_MISS   = 0x10,
750         ARMV6MPCORE_PERFCTR_EXPL_MEM_ACCESS = 0x11,
751         ARMV6MPCORE_PERFCTR_LSU_FULL_STALL  = 0x12,
752         ARMV6MPCORE_PERFCTR_WBUF_DRAINED    = 0x13,
753         ARMV6MPCORE_PERFCTR_CPU_CYCLES      = 0xFF,
754 };
755
756 /*
757  * The hardware events that we support. We do support cache operations but
758  * we have harvard caches and no way to combine instruction and data
759  * accesses/misses in hardware.
760  */
761 static const unsigned armv6mpcore_perf_map[PERF_COUNT_HW_MAX] = {
762         [PERF_COUNT_HW_CPU_CYCLES]          = ARMV6MPCORE_PERFCTR_CPU_CYCLES,
763         [PERF_COUNT_HW_INSTRUCTIONS]        = ARMV6MPCORE_PERFCTR_INSTR_EXEC,
764         [PERF_COUNT_HW_CACHE_REFERENCES]    = HW_OP_UNSUPPORTED,
765         [PERF_COUNT_HW_CACHE_MISSES]        = HW_OP_UNSUPPORTED,
766         [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV6MPCORE_PERFCTR_BR_EXEC,
767         [PERF_COUNT_HW_BRANCH_MISSES]       = ARMV6MPCORE_PERFCTR_BR_MISPREDICT,
768         [PERF_COUNT_HW_BUS_CYCLES]          = HW_OP_UNSUPPORTED,
769 };
770
771 static const unsigned armv6mpcore_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
772                                         [PERF_COUNT_HW_CACHE_OP_MAX]
773                                         [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
774         [C(L1D)] = {
775                 [C(OP_READ)] = {
776                         [C(RESULT_ACCESS)]  =
777                                 ARMV6MPCORE_PERFCTR_DCACHE_RDACCESS,
778                         [C(RESULT_MISS)]    =
779                                 ARMV6MPCORE_PERFCTR_DCACHE_RDMISS,
780                 },
781                 [C(OP_WRITE)] = {
782                         [C(RESULT_ACCESS)]  =
783                                 ARMV6MPCORE_PERFCTR_DCACHE_WRACCESS,
784                         [C(RESULT_MISS)]    =
785                                 ARMV6MPCORE_PERFCTR_DCACHE_WRMISS,
786                 },
787                 [C(OP_PREFETCH)] = {
788                         [C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
789                         [C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
790                 },
791         },
792         [C(L1I)] = {
793                 [C(OP_READ)] = {
794                         [C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
795                         [C(RESULT_MISS)]    = ARMV6MPCORE_PERFCTR_ICACHE_MISS,
796                 },
797                 [C(OP_WRITE)] = {
798                         [C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
799                         [C(RESULT_MISS)]    = ARMV6MPCORE_PERFCTR_ICACHE_MISS,
800                 },
801                 [C(OP_PREFETCH)] = {
802                         [C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
803                         [C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
804                 },
805         },
806         [C(LL)] = {
807                 [C(OP_READ)] = {
808                         [C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
809                         [C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
810                 },
811                 [C(OP_WRITE)] = {
812                         [C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
813                         [C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
814                 },
815                 [C(OP_PREFETCH)] = {
816                         [C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
817                         [C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
818                 },
819         },
820         [C(DTLB)] = {
821                 /*
822                  * The ARM performance counters can count micro DTLB misses,
823                  * micro ITLB misses and main TLB misses. There isn't an event
824                  * for TLB misses, so use the micro misses here and if users
825                  * want the main TLB misses they can use a raw counter.
826                  */
827                 [C(OP_READ)] = {
828                         [C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
829                         [C(RESULT_MISS)]    = ARMV6MPCORE_PERFCTR_DTLB_MISS,
830                 },
831                 [C(OP_WRITE)] = {
832                         [C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
833                         [C(RESULT_MISS)]    = ARMV6MPCORE_PERFCTR_DTLB_MISS,
834                 },
835                 [C(OP_PREFETCH)] = {
836                         [C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
837                         [C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
838                 },
839         },
840         [C(ITLB)] = {
841                 [C(OP_READ)] = {
842                         [C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
843                         [C(RESULT_MISS)]    = ARMV6MPCORE_PERFCTR_ITLB_MISS,
844                 },
845                 [C(OP_WRITE)] = {
846                         [C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
847                         [C(RESULT_MISS)]    = ARMV6MPCORE_PERFCTR_ITLB_MISS,
848                 },
849                 [C(OP_PREFETCH)] = {
850                         [C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
851                         [C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
852                 },
853         },
854         [C(BPU)] = {
855                 [C(OP_READ)] = {
856                         [C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
857                         [C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
858                 },
859                 [C(OP_WRITE)] = {
860                         [C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
861                         [C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
862                 },
863                 [C(OP_PREFETCH)] = {
864                         [C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
865                         [C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
866                 },
867         },
868 };
869
870 static inline unsigned long
871 armv6_pmcr_read(void)
872 {
873         u32 val;
874         asm volatile("mrc   p15, 0, %0, c15, c12, 0" : "=r"(val));
875         return val;
876 }
877
878 static inline void
879 armv6_pmcr_write(unsigned long val)
880 {
881         asm volatile("mcr   p15, 0, %0, c15, c12, 0" : : "r"(val));
882 }
883
884 #define ARMV6_PMCR_ENABLE               (1 << 0)
885 #define ARMV6_PMCR_CTR01_RESET          (1 << 1)
886 #define ARMV6_PMCR_CCOUNT_RESET         (1 << 2)
887 #define ARMV6_PMCR_CCOUNT_DIV           (1 << 3)
888 #define ARMV6_PMCR_COUNT0_IEN           (1 << 4)
889 #define ARMV6_PMCR_COUNT1_IEN           (1 << 5)
890 #define ARMV6_PMCR_CCOUNT_IEN           (1 << 6)
891 #define ARMV6_PMCR_COUNT0_OVERFLOW      (1 << 8)
892 #define ARMV6_PMCR_COUNT1_OVERFLOW      (1 << 9)
893 #define ARMV6_PMCR_CCOUNT_OVERFLOW      (1 << 10)
894 #define ARMV6_PMCR_EVT_COUNT0_SHIFT     20
895 #define ARMV6_PMCR_EVT_COUNT0_MASK      (0xFF << ARMV6_PMCR_EVT_COUNT0_SHIFT)
896 #define ARMV6_PMCR_EVT_COUNT1_SHIFT     12
897 #define ARMV6_PMCR_EVT_COUNT1_MASK      (0xFF << ARMV6_PMCR_EVT_COUNT1_SHIFT)
898
899 #define ARMV6_PMCR_OVERFLOWED_MASK \
900         (ARMV6_PMCR_COUNT0_OVERFLOW | ARMV6_PMCR_COUNT1_OVERFLOW | \
901          ARMV6_PMCR_CCOUNT_OVERFLOW)
902
903 static inline int
904 armv6_pmcr_has_overflowed(unsigned long pmcr)
905 {
906         return (pmcr & ARMV6_PMCR_OVERFLOWED_MASK);
907 }
908
909 static inline int
910 armv6_pmcr_counter_has_overflowed(unsigned long pmcr,
911                                   enum armv6_counters counter)
912 {
913         int ret = 0;
914
915         if (ARMV6_CYCLE_COUNTER == counter)
916                 ret = pmcr & ARMV6_PMCR_CCOUNT_OVERFLOW;
917         else if (ARMV6_COUNTER0 == counter)
918                 ret = pmcr & ARMV6_PMCR_COUNT0_OVERFLOW;
919         else if (ARMV6_COUNTER1 == counter)
920                 ret = pmcr & ARMV6_PMCR_COUNT1_OVERFLOW;
921         else
922                 WARN_ONCE(1, "invalid counter number (%d)\n", counter);
923
924         return ret;
925 }
926
927 static inline u32
928 armv6pmu_read_counter(int counter)
929 {
930         unsigned long value = 0;
931
932         if (ARMV6_CYCLE_COUNTER == counter)
933                 asm volatile("mrc   p15, 0, %0, c15, c12, 1" : "=r"(value));
934         else if (ARMV6_COUNTER0 == counter)
935                 asm volatile("mrc   p15, 0, %0, c15, c12, 2" : "=r"(value));
936         else if (ARMV6_COUNTER1 == counter)
937                 asm volatile("mrc   p15, 0, %0, c15, c12, 3" : "=r"(value));
938         else
939                 WARN_ONCE(1, "invalid counter number (%d)\n", counter);
940
941         return value;
942 }
943
944 static inline void
945 armv6pmu_write_counter(int counter,
946                        u32 value)
947 {
948         if (ARMV6_CYCLE_COUNTER == counter)
949                 asm volatile("mcr   p15, 0, %0, c15, c12, 1" : : "r"(value));
950         else if (ARMV6_COUNTER0 == counter)
951                 asm volatile("mcr   p15, 0, %0, c15, c12, 2" : : "r"(value));
952         else if (ARMV6_COUNTER1 == counter)
953                 asm volatile("mcr   p15, 0, %0, c15, c12, 3" : : "r"(value));
954         else
955                 WARN_ONCE(1, "invalid counter number (%d)\n", counter);
956 }
957
958 void
959 armv6pmu_enable_event(struct hw_perf_event *hwc,
960                       int idx)
961 {
962         unsigned long val, mask, evt, flags;
963
964         if (ARMV6_CYCLE_COUNTER == idx) {
965                 mask    = 0;
966                 evt     = ARMV6_PMCR_CCOUNT_IEN;
967         } else if (ARMV6_COUNTER0 == idx) {
968                 mask    = ARMV6_PMCR_EVT_COUNT0_MASK;
969                 evt     = (hwc->config_base << ARMV6_PMCR_EVT_COUNT0_SHIFT) |
970                           ARMV6_PMCR_COUNT0_IEN;
971         } else if (ARMV6_COUNTER1 == idx) {
972                 mask    = ARMV6_PMCR_EVT_COUNT1_MASK;
973                 evt     = (hwc->config_base << ARMV6_PMCR_EVT_COUNT1_SHIFT) |
974                           ARMV6_PMCR_COUNT1_IEN;
975         } else {
976                 WARN_ONCE(1, "invalid counter number (%d)\n", idx);
977                 return;
978         }
979
980         /*
981          * Mask out the current event and set the counter to count the event
982          * that we're interested in.
983          */
984         spin_lock_irqsave(&pmu_lock, flags);
985         val = armv6_pmcr_read();
986         val &= ~mask;
987         val |= evt;
988         armv6_pmcr_write(val);
989         spin_unlock_irqrestore(&pmu_lock, flags);
990 }
991
992 static irqreturn_t
993 armv6pmu_handle_irq(int irq_num,
994                     void *dev)
995 {
996         unsigned long pmcr = armv6_pmcr_read();
997         struct perf_sample_data data;
998         struct cpu_hw_events *cpuc;
999         struct pt_regs *regs;
1000         int idx;
1001
1002         if (!armv6_pmcr_has_overflowed(pmcr))
1003                 return IRQ_NONE;
1004
1005         regs = get_irq_regs();
1006
1007         /*
1008          * The interrupts are cleared by writing the overflow flags back to
1009          * the control register. All of the other bits don't have any effect
1010          * if they are rewritten, so write the whole value back.
1011          */
1012         armv6_pmcr_write(pmcr);
1013
1014         perf_sample_data_init(&data, 0);
1015
1016         cpuc = &__get_cpu_var(cpu_hw_events);
1017         for (idx = 0; idx <= armpmu->num_events; ++idx) {
1018                 struct perf_event *event = cpuc->events[idx];
1019                 struct hw_perf_event *hwc;
1020
1021                 if (!test_bit(idx, cpuc->active_mask))
1022                         continue;
1023
1024                 /*
1025                  * We have a single interrupt for all counters. Check that
1026                  * each counter has overflowed before we process it.
1027                  */
1028                 if (!armv6_pmcr_counter_has_overflowed(pmcr, idx))
1029                         continue;
1030
1031                 hwc = &event->hw;
1032                 armpmu_event_update(event, hwc, idx);
1033                 data.period = event->hw.last_period;
1034                 if (!armpmu_event_set_period(event, hwc, idx))
1035                         continue;
1036
1037                 if (perf_event_overflow(event, 0, &data, regs))
1038                         armpmu->disable(hwc, idx);
1039         }
1040
1041         /*
1042          * Handle the pending perf events.
1043          *
1044          * Note: this call *must* be run with interrupts disabled. For
1045          * platforms that can have the PMU interrupts raised as an NMI, this
1046          * will not work.
1047          */
1048         perf_event_do_pending();
1049
1050         return IRQ_HANDLED;
1051 }
1052
1053 static void
1054 armv6pmu_start(void)
1055 {
1056         unsigned long flags, val;
1057
1058         spin_lock_irqsave(&pmu_lock, flags);
1059         val = armv6_pmcr_read();
1060         val |= ARMV6_PMCR_ENABLE;
1061         armv6_pmcr_write(val);
1062         spin_unlock_irqrestore(&pmu_lock, flags);
1063 }
1064
1065 void
1066 armv6pmu_stop(void)
1067 {
1068         unsigned long flags, val;
1069
1070         spin_lock_irqsave(&pmu_lock, flags);
1071         val = armv6_pmcr_read();
1072         val &= ~ARMV6_PMCR_ENABLE;
1073         armv6_pmcr_write(val);
1074         spin_unlock_irqrestore(&pmu_lock, flags);
1075 }
1076
1077 static inline int
1078 armv6pmu_event_map(int config)
1079 {
1080         int mapping = armv6_perf_map[config];
1081         if (HW_OP_UNSUPPORTED == mapping)
1082                 mapping = -EOPNOTSUPP;
1083         return mapping;
1084 }
1085
1086 static inline int
1087 armv6mpcore_pmu_event_map(int config)
1088 {
1089         int mapping = armv6mpcore_perf_map[config];
1090         if (HW_OP_UNSUPPORTED == mapping)
1091                 mapping = -EOPNOTSUPP;
1092         return mapping;
1093 }
1094
1095 static u64
1096 armv6pmu_raw_event(u64 config)
1097 {
1098         return config & 0xff;
1099 }
1100
1101 static int
1102 armv6pmu_get_event_idx(struct cpu_hw_events *cpuc,
1103                        struct hw_perf_event *event)
1104 {
1105         /* Always place a cycle counter into the cycle counter. */
1106         if (ARMV6_PERFCTR_CPU_CYCLES == event->config_base) {
1107                 if (test_and_set_bit(ARMV6_CYCLE_COUNTER, cpuc->used_mask))
1108                         return -EAGAIN;
1109
1110                 return ARMV6_CYCLE_COUNTER;
1111         } else {
1112                 /*
1113                  * For anything other than a cycle counter, try and use
1114                  * counter0 and counter1.
1115                  */
1116                 if (!test_and_set_bit(ARMV6_COUNTER1, cpuc->used_mask)) {
1117                         return ARMV6_COUNTER1;
1118                 }
1119
1120                 if (!test_and_set_bit(ARMV6_COUNTER0, cpuc->used_mask)) {
1121                         return ARMV6_COUNTER0;
1122                 }
1123
1124                 /* The counters are all in use. */
1125                 return -EAGAIN;
1126         }
1127 }
1128
1129 static void
1130 armv6pmu_disable_event(struct hw_perf_event *hwc,
1131                        int idx)
1132 {
1133         unsigned long val, mask, evt, flags;
1134
1135         if (ARMV6_CYCLE_COUNTER == idx) {
1136                 mask    = ARMV6_PMCR_CCOUNT_IEN;
1137                 evt     = 0;
1138         } else if (ARMV6_COUNTER0 == idx) {
1139                 mask    = ARMV6_PMCR_COUNT0_IEN | ARMV6_PMCR_EVT_COUNT0_MASK;
1140                 evt     = ARMV6_PERFCTR_NOP << ARMV6_PMCR_EVT_COUNT0_SHIFT;
1141         } else if (ARMV6_COUNTER1 == idx) {
1142                 mask    = ARMV6_PMCR_COUNT1_IEN | ARMV6_PMCR_EVT_COUNT1_MASK;
1143                 evt     = ARMV6_PERFCTR_NOP << ARMV6_PMCR_EVT_COUNT1_SHIFT;
1144         } else {
1145                 WARN_ONCE(1, "invalid counter number (%d)\n", idx);
1146                 return;
1147         }
1148
1149         /*
1150          * Mask out the current event and set the counter to count the number
1151          * of ETM bus signal assertion cycles. The external reporting should
1152          * be disabled and so this should never increment.
1153          */
1154         spin_lock_irqsave(&pmu_lock, flags);
1155         val = armv6_pmcr_read();
1156         val &= ~mask;
1157         val |= evt;
1158         armv6_pmcr_write(val);
1159         spin_unlock_irqrestore(&pmu_lock, flags);
1160 }
1161
1162 static void
1163 armv6mpcore_pmu_disable_event(struct hw_perf_event *hwc,
1164                               int idx)
1165 {
1166         unsigned long val, mask, flags, evt = 0;
1167
1168         if (ARMV6_CYCLE_COUNTER == idx) {
1169                 mask    = ARMV6_PMCR_CCOUNT_IEN;
1170         } else if (ARMV6_COUNTER0 == idx) {
1171                 mask    = ARMV6_PMCR_COUNT0_IEN;
1172         } else if (ARMV6_COUNTER1 == idx) {
1173                 mask    = ARMV6_PMCR_COUNT1_IEN;
1174         } else {
1175                 WARN_ONCE(1, "invalid counter number (%d)\n", idx);
1176                 return;
1177         }
1178
1179         /*
1180          * Unlike UP ARMv6, we don't have a way of stopping the counters. We
1181          * simply disable the interrupt reporting.
1182          */
1183         spin_lock_irqsave(&pmu_lock, flags);
1184         val = armv6_pmcr_read();
1185         val &= ~mask;
1186         val |= evt;
1187         armv6_pmcr_write(val);
1188         spin_unlock_irqrestore(&pmu_lock, flags);
1189 }
1190
1191 static const struct arm_pmu armv6pmu = {
1192         .id                     = ARM_PERF_PMU_ID_V6,
1193         .handle_irq             = armv6pmu_handle_irq,
1194         .enable                 = armv6pmu_enable_event,
1195         .disable                = armv6pmu_disable_event,
1196         .event_map              = armv6pmu_event_map,
1197         .raw_event              = armv6pmu_raw_event,
1198         .read_counter           = armv6pmu_read_counter,
1199         .write_counter          = armv6pmu_write_counter,
1200         .get_event_idx          = armv6pmu_get_event_idx,
1201         .start                  = armv6pmu_start,
1202         .stop                   = armv6pmu_stop,
1203         .num_events             = 3,
1204         .max_period             = (1LLU << 32) - 1,
1205 };
1206
1207 /*
1208  * ARMv6mpcore is almost identical to single core ARMv6 with the exception
1209  * that some of the events have different enumerations and that there is no
1210  * *hack* to stop the programmable counters. To stop the counters we simply
1211  * disable the interrupt reporting and update the event. When unthrottling we
1212  * reset the period and enable the interrupt reporting.
1213  */
1214 static const struct arm_pmu armv6mpcore_pmu = {
1215         .id                     = ARM_PERF_PMU_ID_V6MP,
1216         .handle_irq             = armv6pmu_handle_irq,
1217         .enable                 = armv6pmu_enable_event,
1218         .disable                = armv6mpcore_pmu_disable_event,
1219         .event_map              = armv6mpcore_pmu_event_map,
1220         .raw_event              = armv6pmu_raw_event,
1221         .read_counter           = armv6pmu_read_counter,
1222         .write_counter          = armv6pmu_write_counter,
1223         .get_event_idx          = armv6pmu_get_event_idx,
1224         .start                  = armv6pmu_start,
1225         .stop                   = armv6pmu_stop,
1226         .num_events             = 3,
1227         .max_period             = (1LLU << 32) - 1,
1228 };
1229
1230 /*
1231  * ARMv7 Cortex-A8 and Cortex-A9 Performance Events handling code.
1232  *
1233  * Copied from ARMv6 code, with the low level code inspired
1234  *  by the ARMv7 Oprofile code.
1235  *
1236  * Cortex-A8 has up to 4 configurable performance counters and
1237  *  a single cycle counter.
1238  * Cortex-A9 has up to 31 configurable performance counters and
1239  *  a single cycle counter.
1240  *
1241  * All counters can be enabled/disabled and IRQ masked separately. The cycle
1242  *  counter and all 4 performance counters together can be reset separately.
1243  */
1244
1245 /* Common ARMv7 event types */
1246 enum armv7_perf_types {
1247         ARMV7_PERFCTR_PMNC_SW_INCR              = 0x00,
1248         ARMV7_PERFCTR_IFETCH_MISS               = 0x01,
1249         ARMV7_PERFCTR_ITLB_MISS                 = 0x02,
1250         ARMV7_PERFCTR_DCACHE_REFILL             = 0x03,
1251         ARMV7_PERFCTR_DCACHE_ACCESS             = 0x04,
1252         ARMV7_PERFCTR_DTLB_REFILL               = 0x05,
1253         ARMV7_PERFCTR_DREAD                     = 0x06,
1254         ARMV7_PERFCTR_DWRITE                    = 0x07,
1255
1256         ARMV7_PERFCTR_EXC_TAKEN                 = 0x09,
1257         ARMV7_PERFCTR_EXC_EXECUTED              = 0x0A,
1258         ARMV7_PERFCTR_CID_WRITE                 = 0x0B,
1259         /* ARMV7_PERFCTR_PC_WRITE is equivalent to HW_BRANCH_INSTRUCTIONS.
1260          * It counts:
1261          *  - all branch instructions,
1262          *  - instructions that explicitly write the PC,
1263          *  - exception generating instructions.
1264          */
1265         ARMV7_PERFCTR_PC_WRITE                  = 0x0C,
1266         ARMV7_PERFCTR_PC_IMM_BRANCH             = 0x0D,
1267         ARMV7_PERFCTR_UNALIGNED_ACCESS          = 0x0F,
1268         ARMV7_PERFCTR_PC_BRANCH_MIS_PRED        = 0x10,
1269         ARMV7_PERFCTR_CLOCK_CYCLES              = 0x11,
1270
1271         ARMV7_PERFCTR_PC_BRANCH_MIS_USED        = 0x12,
1272
1273         ARMV7_PERFCTR_CPU_CYCLES                = 0xFF
1274 };
1275
1276 /* ARMv7 Cortex-A8 specific event types */
1277 enum armv7_a8_perf_types {
1278         ARMV7_PERFCTR_INSTR_EXECUTED            = 0x08,
1279
1280         ARMV7_PERFCTR_PC_PROC_RETURN            = 0x0E,
1281
1282         ARMV7_PERFCTR_WRITE_BUFFER_FULL         = 0x40,
1283         ARMV7_PERFCTR_L2_STORE_MERGED           = 0x41,
1284         ARMV7_PERFCTR_L2_STORE_BUFF             = 0x42,
1285         ARMV7_PERFCTR_L2_ACCESS                 = 0x43,
1286         ARMV7_PERFCTR_L2_CACH_MISS              = 0x44,
1287         ARMV7_PERFCTR_AXI_READ_CYCLES           = 0x45,
1288         ARMV7_PERFCTR_AXI_WRITE_CYCLES          = 0x46,
1289         ARMV7_PERFCTR_MEMORY_REPLAY             = 0x47,
1290         ARMV7_PERFCTR_UNALIGNED_ACCESS_REPLAY   = 0x48,
1291         ARMV7_PERFCTR_L1_DATA_MISS              = 0x49,
1292         ARMV7_PERFCTR_L1_INST_MISS              = 0x4A,
1293         ARMV7_PERFCTR_L1_DATA_COLORING          = 0x4B,
1294         ARMV7_PERFCTR_L1_NEON_DATA              = 0x4C,
1295         ARMV7_PERFCTR_L1_NEON_CACH_DATA         = 0x4D,
1296         ARMV7_PERFCTR_L2_NEON                   = 0x4E,
1297         ARMV7_PERFCTR_L2_NEON_HIT               = 0x4F,
1298         ARMV7_PERFCTR_L1_INST                   = 0x50,
1299         ARMV7_PERFCTR_PC_RETURN_MIS_PRED        = 0x51,
1300         ARMV7_PERFCTR_PC_BRANCH_FAILED          = 0x52,
1301         ARMV7_PERFCTR_PC_BRANCH_TAKEN           = 0x53,
1302         ARMV7_PERFCTR_PC_BRANCH_EXECUTED        = 0x54,
1303         ARMV7_PERFCTR_OP_EXECUTED               = 0x55,
1304         ARMV7_PERFCTR_CYCLES_INST_STALL         = 0x56,
1305         ARMV7_PERFCTR_CYCLES_INST               = 0x57,
1306         ARMV7_PERFCTR_CYCLES_NEON_DATA_STALL    = 0x58,
1307         ARMV7_PERFCTR_CYCLES_NEON_INST_STALL    = 0x59,
1308         ARMV7_PERFCTR_NEON_CYCLES               = 0x5A,
1309
1310         ARMV7_PERFCTR_PMU0_EVENTS               = 0x70,
1311         ARMV7_PERFCTR_PMU1_EVENTS               = 0x71,
1312         ARMV7_PERFCTR_PMU_EVENTS                = 0x72,
1313 };
1314
1315 /* ARMv7 Cortex-A9 specific event types */
1316 enum armv7_a9_perf_types {
1317         ARMV7_PERFCTR_JAVA_HW_BYTECODE_EXEC     = 0x40,
1318         ARMV7_PERFCTR_JAVA_SW_BYTECODE_EXEC     = 0x41,
1319         ARMV7_PERFCTR_JAZELLE_BRANCH_EXEC       = 0x42,
1320
1321         ARMV7_PERFCTR_COHERENT_LINE_MISS        = 0x50,
1322         ARMV7_PERFCTR_COHERENT_LINE_HIT         = 0x51,
1323
1324         ARMV7_PERFCTR_ICACHE_DEP_STALL_CYCLES   = 0x60,
1325         ARMV7_PERFCTR_DCACHE_DEP_STALL_CYCLES   = 0x61,
1326         ARMV7_PERFCTR_TLB_MISS_DEP_STALL_CYCLES = 0x62,
1327         ARMV7_PERFCTR_STREX_EXECUTED_PASSED     = 0x63,
1328         ARMV7_PERFCTR_STREX_EXECUTED_FAILED     = 0x64,
1329         ARMV7_PERFCTR_DATA_EVICTION             = 0x65,
1330         ARMV7_PERFCTR_ISSUE_STAGE_NO_INST       = 0x66,
1331         ARMV7_PERFCTR_ISSUE_STAGE_EMPTY         = 0x67,
1332         ARMV7_PERFCTR_INST_OUT_OF_RENAME_STAGE  = 0x68,
1333
1334         ARMV7_PERFCTR_PREDICTABLE_FUNCT_RETURNS = 0x6E,
1335
1336         ARMV7_PERFCTR_MAIN_UNIT_EXECUTED_INST   = 0x70,
1337         ARMV7_PERFCTR_SECOND_UNIT_EXECUTED_INST = 0x71,
1338         ARMV7_PERFCTR_LD_ST_UNIT_EXECUTED_INST  = 0x72,
1339         ARMV7_PERFCTR_FP_EXECUTED_INST          = 0x73,
1340         ARMV7_PERFCTR_NEON_EXECUTED_INST        = 0x74,
1341
1342         ARMV7_PERFCTR_PLD_FULL_DEP_STALL_CYCLES = 0x80,
1343         ARMV7_PERFCTR_DATA_WR_DEP_STALL_CYCLES  = 0x81,
1344         ARMV7_PERFCTR_ITLB_MISS_DEP_STALL_CYCLES        = 0x82,
1345         ARMV7_PERFCTR_DTLB_MISS_DEP_STALL_CYCLES        = 0x83,
1346         ARMV7_PERFCTR_MICRO_ITLB_MISS_DEP_STALL_CYCLES  = 0x84,
1347         ARMV7_PERFCTR_MICRO_DTLB_MISS_DEP_STALL_CYCLES  = 0x85,
1348         ARMV7_PERFCTR_DMB_DEP_STALL_CYCLES      = 0x86,
1349
1350         ARMV7_PERFCTR_INTGR_CLK_ENABLED_CYCLES  = 0x8A,
1351         ARMV7_PERFCTR_DATA_ENGINE_CLK_EN_CYCLES = 0x8B,
1352
1353         ARMV7_PERFCTR_ISB_INST                  = 0x90,
1354         ARMV7_PERFCTR_DSB_INST                  = 0x91,
1355         ARMV7_PERFCTR_DMB_INST                  = 0x92,
1356         ARMV7_PERFCTR_EXT_INTERRUPTS            = 0x93,
1357
1358         ARMV7_PERFCTR_PLE_CACHE_LINE_RQST_COMPLETED     = 0xA0,
1359         ARMV7_PERFCTR_PLE_CACHE_LINE_RQST_SKIPPED       = 0xA1,
1360         ARMV7_PERFCTR_PLE_FIFO_FLUSH            = 0xA2,
1361         ARMV7_PERFCTR_PLE_RQST_COMPLETED        = 0xA3,
1362         ARMV7_PERFCTR_PLE_FIFO_OVERFLOW         = 0xA4,
1363         ARMV7_PERFCTR_PLE_RQST_PROG             = 0xA5
1364 };
1365
1366 /*
1367  * Cortex-A8 HW events mapping
1368  *
1369  * The hardware events that we support. We do support cache operations but
1370  * we have harvard caches and no way to combine instruction and data
1371  * accesses/misses in hardware.
1372  */
1373 static const unsigned armv7_a8_perf_map[PERF_COUNT_HW_MAX] = {
1374         [PERF_COUNT_HW_CPU_CYCLES]          = ARMV7_PERFCTR_CPU_CYCLES,
1375         [PERF_COUNT_HW_INSTRUCTIONS]        = ARMV7_PERFCTR_INSTR_EXECUTED,
1376         [PERF_COUNT_HW_CACHE_REFERENCES]    = HW_OP_UNSUPPORTED,
1377         [PERF_COUNT_HW_CACHE_MISSES]        = HW_OP_UNSUPPORTED,
1378         [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE,
1379         [PERF_COUNT_HW_BRANCH_MISSES]       = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
1380         [PERF_COUNT_HW_BUS_CYCLES]          = ARMV7_PERFCTR_CLOCK_CYCLES,
1381 };
1382
1383 static const unsigned armv7_a8_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
1384                                           [PERF_COUNT_HW_CACHE_OP_MAX]
1385                                           [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
1386         [C(L1D)] = {
1387                 /*
1388                  * The performance counters don't differentiate between read
1389                  * and write accesses/misses so this isn't strictly correct,
1390                  * but it's the best we can do. Writes and reads get
1391                  * combined.
1392                  */
1393                 [C(OP_READ)] = {
1394                         [C(RESULT_ACCESS)]      = ARMV7_PERFCTR_DCACHE_ACCESS,
1395                         [C(RESULT_MISS)]        = ARMV7_PERFCTR_DCACHE_REFILL,
1396                 },
1397                 [C(OP_WRITE)] = {
1398                         [C(RESULT_ACCESS)]      = ARMV7_PERFCTR_DCACHE_ACCESS,
1399                         [C(RESULT_MISS)]        = ARMV7_PERFCTR_DCACHE_REFILL,
1400                 },
1401                 [C(OP_PREFETCH)] = {
1402                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
1403                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
1404                 },
1405         },
1406         [C(L1I)] = {
1407                 [C(OP_READ)] = {
1408                         [C(RESULT_ACCESS)]      = ARMV7_PERFCTR_L1_INST,
1409                         [C(RESULT_MISS)]        = ARMV7_PERFCTR_L1_INST_MISS,
1410                 },
1411                 [C(OP_WRITE)] = {
1412                         [C(RESULT_ACCESS)]      = ARMV7_PERFCTR_L1_INST,
1413                         [C(RESULT_MISS)]        = ARMV7_PERFCTR_L1_INST_MISS,
1414                 },
1415                 [C(OP_PREFETCH)] = {
1416                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
1417                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
1418                 },
1419         },
1420         [C(LL)] = {
1421                 [C(OP_READ)] = {
1422                         [C(RESULT_ACCESS)]      = ARMV7_PERFCTR_L2_ACCESS,
1423                         [C(RESULT_MISS)]        = ARMV7_PERFCTR_L2_CACH_MISS,
1424                 },
1425                 [C(OP_WRITE)] = {
1426                         [C(RESULT_ACCESS)]      = ARMV7_PERFCTR_L2_ACCESS,
1427                         [C(RESULT_MISS)]        = ARMV7_PERFCTR_L2_CACH_MISS,
1428                 },
1429                 [C(OP_PREFETCH)] = {
1430                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
1431                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
1432                 },
1433         },
1434         [C(DTLB)] = {
1435                 /*
1436                  * Only ITLB misses and DTLB refills are supported.
1437                  * If users want the DTLB refills misses a raw counter
1438                  * must be used.
1439                  */
1440                 [C(OP_READ)] = {
1441                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
1442                         [C(RESULT_MISS)]        = ARMV7_PERFCTR_DTLB_REFILL,
1443                 },
1444                 [C(OP_WRITE)] = {
1445                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
1446                         [C(RESULT_MISS)]        = ARMV7_PERFCTR_DTLB_REFILL,
1447                 },
1448                 [C(OP_PREFETCH)] = {
1449                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
1450                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
1451                 },
1452         },
1453         [C(ITLB)] = {
1454                 [C(OP_READ)] = {
1455                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
1456                         [C(RESULT_MISS)]        = ARMV7_PERFCTR_ITLB_MISS,
1457                 },
1458                 [C(OP_WRITE)] = {
1459                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
1460                         [C(RESULT_MISS)]        = ARMV7_PERFCTR_ITLB_MISS,
1461                 },
1462                 [C(OP_PREFETCH)] = {
1463                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
1464                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
1465                 },
1466         },
1467         [C(BPU)] = {
1468                 [C(OP_READ)] = {
1469                         [C(RESULT_ACCESS)]      = ARMV7_PERFCTR_PC_WRITE,
1470                         [C(RESULT_MISS)]
1471                                         = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
1472                 },
1473                 [C(OP_WRITE)] = {
1474                         [C(RESULT_ACCESS)]      = ARMV7_PERFCTR_PC_WRITE,
1475                         [C(RESULT_MISS)]
1476                                         = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
1477                 },
1478                 [C(OP_PREFETCH)] = {
1479                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
1480                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
1481                 },
1482         },
1483 };
1484
1485 /*
1486  * Cortex-A9 HW events mapping
1487  */
1488 static const unsigned armv7_a9_perf_map[PERF_COUNT_HW_MAX] = {
1489         [PERF_COUNT_HW_CPU_CYCLES]          = ARMV7_PERFCTR_CPU_CYCLES,
1490         [PERF_COUNT_HW_INSTRUCTIONS]        =
1491                                         ARMV7_PERFCTR_INST_OUT_OF_RENAME_STAGE,
1492         [PERF_COUNT_HW_CACHE_REFERENCES]    = ARMV7_PERFCTR_COHERENT_LINE_HIT,
1493         [PERF_COUNT_HW_CACHE_MISSES]        = ARMV7_PERFCTR_COHERENT_LINE_MISS,
1494         [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE,
1495         [PERF_COUNT_HW_BRANCH_MISSES]       = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
1496         [PERF_COUNT_HW_BUS_CYCLES]          = ARMV7_PERFCTR_CLOCK_CYCLES,
1497 };
1498
1499 static const unsigned armv7_a9_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
1500                                           [PERF_COUNT_HW_CACHE_OP_MAX]
1501                                           [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
1502         [C(L1D)] = {
1503                 /*
1504                  * The performance counters don't differentiate between read
1505                  * and write accesses/misses so this isn't strictly correct,
1506                  * but it's the best we can do. Writes and reads get
1507                  * combined.
1508                  */
1509                 [C(OP_READ)] = {
1510                         [C(RESULT_ACCESS)]      = ARMV7_PERFCTR_DCACHE_ACCESS,
1511                         [C(RESULT_MISS)]        = ARMV7_PERFCTR_DCACHE_REFILL,
1512                 },
1513                 [C(OP_WRITE)] = {
1514                         [C(RESULT_ACCESS)]      = ARMV7_PERFCTR_DCACHE_ACCESS,
1515                         [C(RESULT_MISS)]        = ARMV7_PERFCTR_DCACHE_REFILL,
1516                 },
1517                 [C(OP_PREFETCH)] = {
1518                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
1519                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
1520                 },
1521         },
1522         [C(L1I)] = {
1523                 [C(OP_READ)] = {
1524                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
1525                         [C(RESULT_MISS)]        = ARMV7_PERFCTR_IFETCH_MISS,
1526                 },
1527                 [C(OP_WRITE)] = {
1528                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
1529                         [C(RESULT_MISS)]        = ARMV7_PERFCTR_IFETCH_MISS,
1530                 },
1531                 [C(OP_PREFETCH)] = {
1532                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
1533                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
1534                 },
1535         },
1536         [C(LL)] = {
1537                 [C(OP_READ)] = {
1538                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
1539                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
1540                 },
1541                 [C(OP_WRITE)] = {
1542                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
1543                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
1544                 },
1545                 [C(OP_PREFETCH)] = {
1546                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
1547                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
1548                 },
1549         },
1550         [C(DTLB)] = {
1551                 /*
1552                  * Only ITLB misses and DTLB refills are supported.
1553                  * If users want the DTLB refills misses a raw counter
1554                  * must be used.
1555                  */
1556                 [C(OP_READ)] = {
1557                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
1558                         [C(RESULT_MISS)]        = ARMV7_PERFCTR_DTLB_REFILL,
1559                 },
1560                 [C(OP_WRITE)] = {
1561                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
1562                         [C(RESULT_MISS)]        = ARMV7_PERFCTR_DTLB_REFILL,
1563                 },
1564                 [C(OP_PREFETCH)] = {
1565                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
1566                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
1567                 },
1568         },
1569         [C(ITLB)] = {
1570                 [C(OP_READ)] = {
1571                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
1572                         [C(RESULT_MISS)]        = ARMV7_PERFCTR_ITLB_MISS,
1573                 },
1574                 [C(OP_WRITE)] = {
1575                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
1576                         [C(RESULT_MISS)]        = ARMV7_PERFCTR_ITLB_MISS,
1577                 },
1578                 [C(OP_PREFETCH)] = {
1579                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
1580                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
1581                 },
1582         },
1583         [C(BPU)] = {
1584                 [C(OP_READ)] = {
1585                         [C(RESULT_ACCESS)]      = ARMV7_PERFCTR_PC_WRITE,
1586                         [C(RESULT_MISS)]
1587                                         = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
1588                 },
1589                 [C(OP_WRITE)] = {
1590                         [C(RESULT_ACCESS)]      = ARMV7_PERFCTR_PC_WRITE,
1591                         [C(RESULT_MISS)]
1592                                         = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
1593                 },
1594                 [C(OP_PREFETCH)] = {
1595                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
1596                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
1597                 },
1598         },
1599 };
1600
1601 /*
1602  * Perf Events counters
1603  */
1604 enum armv7_counters {
1605         ARMV7_CYCLE_COUNTER             = 1,    /* Cycle counter */
1606         ARMV7_COUNTER0                  = 2,    /* First event counter */
1607 };
1608
1609 /*
1610  * The cycle counter is ARMV7_CYCLE_COUNTER.
1611  * The first event counter is ARMV7_COUNTER0.
1612  * The last event counter is (ARMV7_COUNTER0 + armpmu->num_events - 1).
1613  */
1614 #define ARMV7_COUNTER_LAST      (ARMV7_COUNTER0 + armpmu->num_events - 1)
1615
1616 /*
1617  * ARMv7 low level PMNC access
1618  */
1619
1620 /*
1621  * Per-CPU PMNC: config reg
1622  */
1623 #define ARMV7_PMNC_E            (1 << 0) /* Enable all counters */
1624 #define ARMV7_PMNC_P            (1 << 1) /* Reset all counters */
1625 #define ARMV7_PMNC_C            (1 << 2) /* Cycle counter reset */
1626 #define ARMV7_PMNC_D            (1 << 3) /* CCNT counts every 64th cpu cycle */
1627 #define ARMV7_PMNC_X            (1 << 4) /* Export to ETM */
1628 #define ARMV7_PMNC_DP           (1 << 5) /* Disable CCNT if non-invasive debug*/
1629 #define ARMV7_PMNC_N_SHIFT      11       /* Number of counters supported */
1630 #define ARMV7_PMNC_N_MASK       0x1f
1631 #define ARMV7_PMNC_MASK         0x3f     /* Mask for writable bits */
1632
1633 /*
1634  * Available counters
1635  */
1636 #define ARMV7_CNT0              0       /* First event counter */
1637 #define ARMV7_CCNT              31      /* Cycle counter */
1638
1639 /* Perf Event to low level counters mapping */
1640 #define ARMV7_EVENT_CNT_TO_CNTx (ARMV7_COUNTER0 - ARMV7_CNT0)
1641
1642 /*
1643  * CNTENS: counters enable reg
1644  */
1645 #define ARMV7_CNTENS_P(idx)     (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx))
1646 #define ARMV7_CNTENS_C          (1 << ARMV7_CCNT)
1647
1648 /*
1649  * CNTENC: counters disable reg
1650  */
1651 #define ARMV7_CNTENC_P(idx)     (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx))
1652 #define ARMV7_CNTENC_C          (1 << ARMV7_CCNT)
1653
1654 /*
1655  * INTENS: counters overflow interrupt enable reg
1656  */
1657 #define ARMV7_INTENS_P(idx)     (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx))
1658 #define ARMV7_INTENS_C          (1 << ARMV7_CCNT)
1659
1660 /*
1661  * INTENC: counters overflow interrupt disable reg
1662  */
1663 #define ARMV7_INTENC_P(idx)     (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx))
1664 #define ARMV7_INTENC_C          (1 << ARMV7_CCNT)
1665
1666 /*
1667  * EVTSEL: Event selection reg
1668  */
1669 #define ARMV7_EVTSEL_MASK       0xff            /* Mask for writable bits */
1670
1671 /*
1672  * SELECT: Counter selection reg
1673  */
1674 #define ARMV7_SELECT_MASK       0x1f            /* Mask for writable bits */
1675
1676 /*
1677  * FLAG: counters overflow flag status reg
1678  */
1679 #define ARMV7_FLAG_P(idx)       (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx))
1680 #define ARMV7_FLAG_C            (1 << ARMV7_CCNT)
1681 #define ARMV7_FLAG_MASK         0xffffffff      /* Mask for writable bits */
1682 #define ARMV7_OVERFLOWED_MASK   ARMV7_FLAG_MASK
1683
1684 static inline unsigned long armv7_pmnc_read(void)
1685 {
1686         u32 val;
1687         asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r"(val));
1688         return val;
1689 }
1690
1691 static inline void armv7_pmnc_write(unsigned long val)
1692 {
1693         val &= ARMV7_PMNC_MASK;
1694         asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r"(val));
1695 }
1696
1697 static inline int armv7_pmnc_has_overflowed(unsigned long pmnc)
1698 {
1699         return pmnc & ARMV7_OVERFLOWED_MASK;
1700 }
1701
1702 static inline int armv7_pmnc_counter_has_overflowed(unsigned long pmnc,
1703                                         enum armv7_counters counter)
1704 {
1705         int ret;
1706
1707         if (counter == ARMV7_CYCLE_COUNTER)
1708                 ret = pmnc & ARMV7_FLAG_C;
1709         else if ((counter >= ARMV7_COUNTER0) && (counter <= ARMV7_COUNTER_LAST))
1710                 ret = pmnc & ARMV7_FLAG_P(counter);
1711         else
1712                 pr_err("CPU%u checking wrong counter %d overflow status\n",
1713                         smp_processor_id(), counter);
1714
1715         return ret;
1716 }
1717
1718 static inline int armv7_pmnc_select_counter(unsigned int idx)
1719 {
1720         u32 val;
1721
1722         if ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST)) {
1723                 pr_err("CPU%u selecting wrong PMNC counter"
1724                         " %d\n", smp_processor_id(), idx);
1725                 return -1;
1726         }
1727
1728         val = (idx - ARMV7_EVENT_CNT_TO_CNTx) & ARMV7_SELECT_MASK;
1729         asm volatile("mcr p15, 0, %0, c9, c12, 5" : : "r" (val));
1730
1731         return idx;
1732 }
1733
1734 static inline u32 armv7pmu_read_counter(int idx)
1735 {
1736         unsigned long value = 0;
1737
1738         if (idx == ARMV7_CYCLE_COUNTER)
1739                 asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (value));
1740         else if ((idx >= ARMV7_COUNTER0) && (idx <= ARMV7_COUNTER_LAST)) {
1741                 if (armv7_pmnc_select_counter(idx) == idx)
1742                         asm volatile("mrc p15, 0, %0, c9, c13, 2"
1743                                      : "=r" (value));
1744         } else
1745                 pr_err("CPU%u reading wrong counter %d\n",
1746                         smp_processor_id(), idx);
1747
1748         return value;
1749 }
1750
1751 static inline void armv7pmu_write_counter(int idx, u32 value)
1752 {
1753         if (idx == ARMV7_CYCLE_COUNTER)
1754                 asm volatile("mcr p15, 0, %0, c9, c13, 0" : : "r" (value));
1755         else if ((idx >= ARMV7_COUNTER0) && (idx <= ARMV7_COUNTER_LAST)) {
1756                 if (armv7_pmnc_select_counter(idx) == idx)
1757                         asm volatile("mcr p15, 0, %0, c9, c13, 2"
1758                                      : : "r" (value));
1759         } else
1760                 pr_err("CPU%u writing wrong counter %d\n",
1761                         smp_processor_id(), idx);
1762 }
1763
1764 static inline void armv7_pmnc_write_evtsel(unsigned int idx, u32 val)
1765 {
1766         if (armv7_pmnc_select_counter(idx) == idx) {
1767                 val &= ARMV7_EVTSEL_MASK;
1768                 asm volatile("mcr p15, 0, %0, c9, c13, 1" : : "r" (val));
1769         }
1770 }
1771
1772 static inline u32 armv7_pmnc_enable_counter(unsigned int idx)
1773 {
1774         u32 val;
1775
1776         if ((idx != ARMV7_CYCLE_COUNTER) &&
1777             ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) {
1778                 pr_err("CPU%u enabling wrong PMNC counter"
1779                         " %d\n", smp_processor_id(), idx);
1780                 return -1;
1781         }
1782
1783         if (idx == ARMV7_CYCLE_COUNTER)
1784                 val = ARMV7_CNTENS_C;
1785         else
1786                 val = ARMV7_CNTENS_P(idx);
1787
1788         asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r" (val));
1789
1790         return idx;
1791 }
1792
1793 static inline u32 armv7_pmnc_disable_counter(unsigned int idx)
1794 {
1795         u32 val;
1796
1797
1798         if ((idx != ARMV7_CYCLE_COUNTER) &&
1799             ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) {
1800                 pr_err("CPU%u disabling wrong PMNC counter"
1801                         " %d\n", smp_processor_id(), idx);
1802                 return -1;
1803         }
1804
1805         if (idx == ARMV7_CYCLE_COUNTER)
1806                 val = ARMV7_CNTENC_C;
1807         else
1808                 val = ARMV7_CNTENC_P(idx);
1809
1810         asm volatile("mcr p15, 0, %0, c9, c12, 2" : : "r" (val));
1811
1812         return idx;
1813 }
1814
1815 static inline u32 armv7_pmnc_enable_intens(unsigned int idx)
1816 {
1817         u32 val;
1818
1819         if ((idx != ARMV7_CYCLE_COUNTER) &&
1820             ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) {
1821                 pr_err("CPU%u enabling wrong PMNC counter"
1822                         " interrupt enable %d\n", smp_processor_id(), idx);
1823                 return -1;
1824         }
1825
1826         if (idx == ARMV7_CYCLE_COUNTER)
1827                 val = ARMV7_INTENS_C;
1828         else
1829                 val = ARMV7_INTENS_P(idx);
1830
1831         asm volatile("mcr p15, 0, %0, c9, c14, 1" : : "r" (val));
1832
1833         return idx;
1834 }
1835
1836 static inline u32 armv7_pmnc_disable_intens(unsigned int idx)
1837 {
1838         u32 val;
1839
1840         if ((idx != ARMV7_CYCLE_COUNTER) &&
1841             ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) {
1842                 pr_err("CPU%u disabling wrong PMNC counter"
1843                         " interrupt enable %d\n", smp_processor_id(), idx);
1844                 return -1;
1845         }
1846
1847         if (idx == ARMV7_CYCLE_COUNTER)
1848                 val = ARMV7_INTENC_C;
1849         else
1850                 val = ARMV7_INTENC_P(idx);
1851
1852         asm volatile("mcr p15, 0, %0, c9, c14, 2" : : "r" (val));
1853
1854         return idx;
1855 }
1856
1857 static inline u32 armv7_pmnc_getreset_flags(void)
1858 {
1859         u32 val;
1860
1861         /* Read */
1862         asm volatile("mrc p15, 0, %0, c9, c12, 3" : "=r" (val));
1863
1864         /* Write to clear flags */
1865         val &= ARMV7_FLAG_MASK;
1866         asm volatile("mcr p15, 0, %0, c9, c12, 3" : : "r" (val));
1867
1868         return val;
1869 }
1870
1871 #ifdef DEBUG
1872 static void armv7_pmnc_dump_regs(void)
1873 {
1874         u32 val;
1875         unsigned int cnt;
1876
1877         printk(KERN_INFO "PMNC registers dump:\n");
1878
1879         asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r" (val));
1880         printk(KERN_INFO "PMNC  =0x%08x\n", val);
1881
1882         asm volatile("mrc p15, 0, %0, c9, c12, 1" : "=r" (val));
1883         printk(KERN_INFO "CNTENS=0x%08x\n", val);
1884
1885         asm volatile("mrc p15, 0, %0, c9, c14, 1" : "=r" (val));
1886         printk(KERN_INFO "INTENS=0x%08x\n", val);
1887
1888         asm volatile("mrc p15, 0, %0, c9, c12, 3" : "=r" (val));
1889         printk(KERN_INFO "FLAGS =0x%08x\n", val);
1890
1891         asm volatile("mrc p15, 0, %0, c9, c12, 5" : "=r" (val));
1892         printk(KERN_INFO "SELECT=0x%08x\n", val);
1893
1894         asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (val));
1895         printk(KERN_INFO "CCNT  =0x%08x\n", val);
1896
1897         for (cnt = ARMV7_COUNTER0; cnt < ARMV7_COUNTER_LAST; cnt++) {
1898                 armv7_pmnc_select_counter(cnt);
1899                 asm volatile("mrc p15, 0, %0, c9, c13, 2" : "=r" (val));
1900                 printk(KERN_INFO "CNT[%d] count =0x%08x\n",
1901                         cnt-ARMV7_EVENT_CNT_TO_CNTx, val);
1902                 asm volatile("mrc p15, 0, %0, c9, c13, 1" : "=r" (val));
1903                 printk(KERN_INFO "CNT[%d] evtsel=0x%08x\n",
1904                         cnt-ARMV7_EVENT_CNT_TO_CNTx, val);
1905         }
1906 }
1907 #endif
1908
1909 void armv7pmu_enable_event(struct hw_perf_event *hwc, int idx)
1910 {
1911         unsigned long flags;
1912
1913         /*
1914          * Enable counter and interrupt, and set the counter to count
1915          * the event that we're interested in.
1916          */
1917         spin_lock_irqsave(&pmu_lock, flags);
1918
1919         /*
1920          * Disable counter
1921          */
1922         armv7_pmnc_disable_counter(idx);
1923
1924         /*
1925          * Set event (if destined for PMNx counters)
1926          * We don't need to set the event if it's a cycle count
1927          */
1928         if (idx != ARMV7_CYCLE_COUNTER)
1929                 armv7_pmnc_write_evtsel(idx, hwc->config_base);
1930
1931         /*
1932          * Enable interrupt for this counter
1933          */
1934         armv7_pmnc_enable_intens(idx);
1935
1936         /*
1937          * Enable counter
1938          */
1939         armv7_pmnc_enable_counter(idx);
1940
1941         spin_unlock_irqrestore(&pmu_lock, flags);
1942 }
1943
1944 static void armv7pmu_disable_event(struct hw_perf_event *hwc, int idx)
1945 {
1946         unsigned long flags;
1947
1948         /*
1949          * Disable counter and interrupt
1950          */
1951         spin_lock_irqsave(&pmu_lock, flags);
1952
1953         /*
1954          * Disable counter
1955          */
1956         armv7_pmnc_disable_counter(idx);
1957
1958         /*
1959          * Disable interrupt for this counter
1960          */
1961         armv7_pmnc_disable_intens(idx);
1962
1963         spin_unlock_irqrestore(&pmu_lock, flags);
1964 }
1965
1966 static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev)
1967 {
1968         unsigned long pmnc;
1969         struct perf_sample_data data;
1970         struct cpu_hw_events *cpuc;
1971         struct pt_regs *regs;
1972         int idx;
1973
1974         /*
1975          * Get and reset the IRQ flags
1976          */
1977         pmnc = armv7_pmnc_getreset_flags();
1978
1979         /*
1980          * Did an overflow occur?
1981          */
1982         if (!armv7_pmnc_has_overflowed(pmnc))
1983                 return IRQ_NONE;
1984
1985         /*
1986          * Handle the counter(s) overflow(s)
1987          */
1988         regs = get_irq_regs();
1989
1990         perf_sample_data_init(&data, 0);
1991
1992         cpuc = &__get_cpu_var(cpu_hw_events);
1993         for (idx = 0; idx <= armpmu->num_events; ++idx) {
1994                 struct perf_event *event = cpuc->events[idx];
1995                 struct hw_perf_event *hwc;
1996
1997                 if (!test_bit(idx, cpuc->active_mask))
1998                         continue;
1999
2000                 /*
2001                  * We have a single interrupt for all counters. Check that
2002                  * each counter has overflowed before we process it.
2003                  */
2004                 if (!armv7_pmnc_counter_has_overflowed(pmnc, idx))
2005                         continue;
2006
2007                 hwc = &event->hw;
2008                 armpmu_event_update(event, hwc, idx);
2009                 data.period = event->hw.last_period;
2010                 if (!armpmu_event_set_period(event, hwc, idx))
2011                         continue;
2012
2013                 if (perf_event_overflow(event, 0, &data, regs))
2014                         armpmu->disable(hwc, idx);
2015         }
2016
2017         /*
2018          * Handle the pending perf events.
2019          *
2020          * Note: this call *must* be run with interrupts disabled. For
2021          * platforms that can have the PMU interrupts raised as an NMI, this
2022          * will not work.
2023          */
2024         perf_event_do_pending();
2025
2026         return IRQ_HANDLED;
2027 }
2028
2029 static void armv7pmu_start(void)
2030 {
2031         unsigned long flags;
2032
2033         spin_lock_irqsave(&pmu_lock, flags);
2034         /* Enable all counters */
2035         armv7_pmnc_write(armv7_pmnc_read() | ARMV7_PMNC_E);
2036         spin_unlock_irqrestore(&pmu_lock, flags);
2037 }
2038
2039 static void armv7pmu_stop(void)
2040 {
2041         unsigned long flags;
2042
2043         spin_lock_irqsave(&pmu_lock, flags);
2044         /* Disable all counters */
2045         armv7_pmnc_write(armv7_pmnc_read() & ~ARMV7_PMNC_E);
2046         spin_unlock_irqrestore(&pmu_lock, flags);
2047 }
2048
2049 static inline int armv7_a8_pmu_event_map(int config)
2050 {
2051         int mapping = armv7_a8_perf_map[config];
2052         if (HW_OP_UNSUPPORTED == mapping)
2053                 mapping = -EOPNOTSUPP;
2054         return mapping;
2055 }
2056
2057 static inline int armv7_a9_pmu_event_map(int config)
2058 {
2059         int mapping = armv7_a9_perf_map[config];
2060         if (HW_OP_UNSUPPORTED == mapping)
2061                 mapping = -EOPNOTSUPP;
2062         return mapping;
2063 }
2064
2065 static u64 armv7pmu_raw_event(u64 config)
2066 {
2067         return config & 0xff;
2068 }
2069
2070 static int armv7pmu_get_event_idx(struct cpu_hw_events *cpuc,
2071                                   struct hw_perf_event *event)
2072 {
2073         int idx;
2074
2075         /* Always place a cycle counter into the cycle counter. */
2076         if (event->config_base == ARMV7_PERFCTR_CPU_CYCLES) {
2077                 if (test_and_set_bit(ARMV7_CYCLE_COUNTER, cpuc->used_mask))
2078                         return -EAGAIN;
2079
2080                 return ARMV7_CYCLE_COUNTER;
2081         } else {
2082                 /*
2083                  * For anything other than a cycle counter, try and use
2084                  * the events counters
2085                  */
2086                 for (idx = ARMV7_COUNTER0; idx <= armpmu->num_events; ++idx) {
2087                         if (!test_and_set_bit(idx, cpuc->used_mask))
2088                                 return idx;
2089                 }
2090
2091                 /* The counters are all in use. */
2092                 return -EAGAIN;
2093         }
2094 }
2095
2096 static struct arm_pmu armv7pmu = {
2097         .handle_irq             = armv7pmu_handle_irq,
2098         .enable                 = armv7pmu_enable_event,
2099         .disable                = armv7pmu_disable_event,
2100         .raw_event              = armv7pmu_raw_event,
2101         .read_counter           = armv7pmu_read_counter,
2102         .write_counter          = armv7pmu_write_counter,
2103         .get_event_idx          = armv7pmu_get_event_idx,
2104         .start                  = armv7pmu_start,
2105         .stop                   = armv7pmu_stop,
2106         .max_period             = (1LLU << 32) - 1,
2107 };
2108
2109 static u32 __init armv7_reset_read_pmnc(void)
2110 {
2111         u32 nb_cnt;
2112
2113         /* Initialize & Reset PMNC: C and P bits */
2114         armv7_pmnc_write(ARMV7_PMNC_P | ARMV7_PMNC_C);
2115
2116         /* Read the nb of CNTx counters supported from PMNC */
2117         nb_cnt = (armv7_pmnc_read() >> ARMV7_PMNC_N_SHIFT) & ARMV7_PMNC_N_MASK;
2118
2119         /* Add the CPU cycles counter and return */
2120         return nb_cnt + 1;
2121 }
2122
2123 /*
2124  * ARMv5 [xscale] Performance counter handling code.
2125  *
2126  * Based on xscale OProfile code.
2127  *
2128  * There are two variants of the xscale PMU that we support:
2129  *      - xscale1pmu: 2 event counters and a cycle counter
2130  *      - xscale2pmu: 4 event counters and a cycle counter
2131  * The two variants share event definitions, but have different
2132  * PMU structures.
2133  */
2134
2135 enum xscale_perf_types {
2136         XSCALE_PERFCTR_ICACHE_MISS              = 0x00,
2137         XSCALE_PERFCTR_ICACHE_NO_DELIVER        = 0x01,
2138         XSCALE_PERFCTR_DATA_STALL               = 0x02,
2139         XSCALE_PERFCTR_ITLB_MISS                = 0x03,
2140         XSCALE_PERFCTR_DTLB_MISS                = 0x04,
2141         XSCALE_PERFCTR_BRANCH                   = 0x05,
2142         XSCALE_PERFCTR_BRANCH_MISS              = 0x06,
2143         XSCALE_PERFCTR_INSTRUCTION              = 0x07,
2144         XSCALE_PERFCTR_DCACHE_FULL_STALL        = 0x08,
2145         XSCALE_PERFCTR_DCACHE_FULL_STALL_CONTIG = 0x09,
2146         XSCALE_PERFCTR_DCACHE_ACCESS            = 0x0A,
2147         XSCALE_PERFCTR_DCACHE_MISS              = 0x0B,
2148         XSCALE_PERFCTR_DCACHE_WRITE_BACK        = 0x0C,
2149         XSCALE_PERFCTR_PC_CHANGED               = 0x0D,
2150         XSCALE_PERFCTR_BCU_REQUEST              = 0x10,
2151         XSCALE_PERFCTR_BCU_FULL                 = 0x11,
2152         XSCALE_PERFCTR_BCU_DRAIN                = 0x12,
2153         XSCALE_PERFCTR_BCU_ECC_NO_ELOG          = 0x14,
2154         XSCALE_PERFCTR_BCU_1_BIT_ERR            = 0x15,
2155         XSCALE_PERFCTR_RMW                      = 0x16,
2156         /* XSCALE_PERFCTR_CCNT is not hardware defined */
2157         XSCALE_PERFCTR_CCNT                     = 0xFE,
2158         XSCALE_PERFCTR_UNUSED                   = 0xFF,
2159 };
2160
2161 enum xscale_counters {
2162         XSCALE_CYCLE_COUNTER    = 1,
2163         XSCALE_COUNTER0,
2164         XSCALE_COUNTER1,
2165         XSCALE_COUNTER2,
2166         XSCALE_COUNTER3,
2167 };
2168
2169 static const unsigned xscale_perf_map[PERF_COUNT_HW_MAX] = {
2170         [PERF_COUNT_HW_CPU_CYCLES]          = XSCALE_PERFCTR_CCNT,
2171         [PERF_COUNT_HW_INSTRUCTIONS]        = XSCALE_PERFCTR_INSTRUCTION,
2172         [PERF_COUNT_HW_CACHE_REFERENCES]    = HW_OP_UNSUPPORTED,
2173         [PERF_COUNT_HW_CACHE_MISSES]        = HW_OP_UNSUPPORTED,
2174         [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = XSCALE_PERFCTR_BRANCH,
2175         [PERF_COUNT_HW_BRANCH_MISSES]       = XSCALE_PERFCTR_BRANCH_MISS,
2176         [PERF_COUNT_HW_BUS_CYCLES]          = HW_OP_UNSUPPORTED,
2177 };
2178
2179 static const unsigned xscale_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
2180                                            [PERF_COUNT_HW_CACHE_OP_MAX]
2181                                            [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
2182         [C(L1D)] = {
2183                 [C(OP_READ)] = {
2184                         [C(RESULT_ACCESS)]      = XSCALE_PERFCTR_DCACHE_ACCESS,
2185                         [C(RESULT_MISS)]        = XSCALE_PERFCTR_DCACHE_MISS,
2186                 },
2187                 [C(OP_WRITE)] = {
2188                         [C(RESULT_ACCESS)]      = XSCALE_PERFCTR_DCACHE_ACCESS,
2189                         [C(RESULT_MISS)]        = XSCALE_PERFCTR_DCACHE_MISS,
2190                 },
2191                 [C(OP_PREFETCH)] = {
2192                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
2193                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
2194                 },
2195         },
2196         [C(L1I)] = {
2197                 [C(OP_READ)] = {
2198                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
2199                         [C(RESULT_MISS)]        = XSCALE_PERFCTR_ICACHE_MISS,
2200                 },
2201                 [C(OP_WRITE)] = {
2202                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
2203                         [C(RESULT_MISS)]        = XSCALE_PERFCTR_ICACHE_MISS,
2204                 },
2205                 [C(OP_PREFETCH)] = {
2206                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
2207                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
2208                 },
2209         },
2210         [C(LL)] = {
2211                 [C(OP_READ)] = {
2212                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
2213                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
2214                 },
2215                 [C(OP_WRITE)] = {
2216                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
2217                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
2218                 },
2219                 [C(OP_PREFETCH)] = {
2220                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
2221                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
2222                 },
2223         },
2224         [C(DTLB)] = {
2225                 [C(OP_READ)] = {
2226                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
2227                         [C(RESULT_MISS)]        = XSCALE_PERFCTR_DTLB_MISS,
2228                 },
2229                 [C(OP_WRITE)] = {
2230                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
2231                         [C(RESULT_MISS)]        = XSCALE_PERFCTR_DTLB_MISS,
2232                 },
2233                 [C(OP_PREFETCH)] = {
2234                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
2235                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
2236                 },
2237         },
2238         [C(ITLB)] = {
2239                 [C(OP_READ)] = {
2240                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
2241                         [C(RESULT_MISS)]        = XSCALE_PERFCTR_ITLB_MISS,
2242                 },
2243                 [C(OP_WRITE)] = {
2244                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
2245                         [C(RESULT_MISS)]        = XSCALE_PERFCTR_ITLB_MISS,
2246                 },
2247                 [C(OP_PREFETCH)] = {
2248                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
2249                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
2250                 },
2251         },
2252         [C(BPU)] = {
2253                 [C(OP_READ)] = {
2254                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
2255                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
2256                 },
2257                 [C(OP_WRITE)] = {
2258                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
2259                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
2260                 },
2261                 [C(OP_PREFETCH)] = {
2262                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
2263                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
2264                 },
2265         },
2266 };
2267
2268 #define XSCALE_PMU_ENABLE       0x001
2269 #define XSCALE_PMN_RESET        0x002
2270 #define XSCALE_CCNT_RESET       0x004
2271 #define XSCALE_PMU_RESET        (CCNT_RESET | PMN_RESET)
2272 #define XSCALE_PMU_CNT64        0x008
2273
2274 static inline int
2275 xscalepmu_event_map(int config)
2276 {
2277         int mapping = xscale_perf_map[config];
2278         if (HW_OP_UNSUPPORTED == mapping)
2279                 mapping = -EOPNOTSUPP;
2280         return mapping;
2281 }
2282
2283 static u64
2284 xscalepmu_raw_event(u64 config)
2285 {
2286         return config & 0xff;
2287 }
2288
2289 #define XSCALE1_OVERFLOWED_MASK 0x700
2290 #define XSCALE1_CCOUNT_OVERFLOW 0x400
2291 #define XSCALE1_COUNT0_OVERFLOW 0x100
2292 #define XSCALE1_COUNT1_OVERFLOW 0x200
2293 #define XSCALE1_CCOUNT_INT_EN   0x040
2294 #define XSCALE1_COUNT0_INT_EN   0x010
2295 #define XSCALE1_COUNT1_INT_EN   0x020
2296 #define XSCALE1_COUNT0_EVT_SHFT 12
2297 #define XSCALE1_COUNT0_EVT_MASK (0xff << XSCALE1_COUNT0_EVT_SHFT)
2298 #define XSCALE1_COUNT1_EVT_SHFT 20
2299 #define XSCALE1_COUNT1_EVT_MASK (0xff << XSCALE1_COUNT1_EVT_SHFT)
2300
2301 static inline u32
2302 xscale1pmu_read_pmnc(void)
2303 {
2304         u32 val;
2305         asm volatile("mrc p14, 0, %0, c0, c0, 0" : "=r" (val));
2306         return val;
2307 }
2308
2309 static inline void
2310 xscale1pmu_write_pmnc(u32 val)
2311 {
2312         /* upper 4bits and 7, 11 are write-as-0 */
2313         val &= 0xffff77f;
2314         asm volatile("mcr p14, 0, %0, c0, c0, 0" : : "r" (val));
2315 }
2316
2317 static inline int
2318 xscale1_pmnc_counter_has_overflowed(unsigned long pmnc,
2319                                         enum xscale_counters counter)
2320 {
2321         int ret = 0;
2322
2323         switch (counter) {
2324         case XSCALE_CYCLE_COUNTER:
2325                 ret = pmnc & XSCALE1_CCOUNT_OVERFLOW;
2326                 break;
2327         case XSCALE_COUNTER0:
2328                 ret = pmnc & XSCALE1_COUNT0_OVERFLOW;
2329                 break;
2330         case XSCALE_COUNTER1:
2331                 ret = pmnc & XSCALE1_COUNT1_OVERFLOW;
2332                 break;
2333         default:
2334                 WARN_ONCE(1, "invalid counter number (%d)\n", counter);
2335         }
2336
2337         return ret;
2338 }
2339
2340 static irqreturn_t
2341 xscale1pmu_handle_irq(int irq_num, void *dev)
2342 {
2343         unsigned long pmnc;
2344         struct perf_sample_data data;
2345         struct cpu_hw_events *cpuc;
2346         struct pt_regs *regs;
2347         int idx;
2348
2349         /*
2350          * NOTE: there's an A stepping erratum that states if an overflow
2351          *       bit already exists and another occurs, the previous
2352          *       Overflow bit gets cleared. There's no workaround.
2353          *       Fixed in B stepping or later.
2354          */
2355         pmnc = xscale1pmu_read_pmnc();
2356
2357         /*
2358          * Write the value back to clear the overflow flags. Overflow
2359          * flags remain in pmnc for use below. We also disable the PMU
2360          * while we process the interrupt.
2361          */
2362         xscale1pmu_write_pmnc(pmnc & ~XSCALE_PMU_ENABLE);
2363
2364         if (!(pmnc & XSCALE1_OVERFLOWED_MASK))
2365                 return IRQ_NONE;
2366
2367         regs = get_irq_regs();
2368
2369         perf_sample_data_init(&data, 0);
2370
2371         cpuc = &__get_cpu_var(cpu_hw_events);
2372         for (idx = 0; idx <= armpmu->num_events; ++idx) {
2373                 struct perf_event *event = cpuc->events[idx];
2374                 struct hw_perf_event *hwc;
2375
2376                 if (!test_bit(idx, cpuc->active_mask))
2377                         continue;
2378
2379                 if (!xscale1_pmnc_counter_has_overflowed(pmnc, idx))
2380                         continue;
2381
2382                 hwc = &event->hw;
2383                 armpmu_event_update(event, hwc, idx);
2384                 data.period = event->hw.last_period;
2385                 if (!armpmu_event_set_period(event, hwc, idx))
2386                         continue;
2387
2388                 if (perf_event_overflow(event, 0, &data, regs))
2389                         armpmu->disable(hwc, idx);
2390         }
2391
2392         perf_event_do_pending();
2393
2394         /*
2395          * Re-enable the PMU.
2396          */
2397         pmnc = xscale1pmu_read_pmnc() | XSCALE_PMU_ENABLE;
2398         xscale1pmu_write_pmnc(pmnc);
2399
2400         return IRQ_HANDLED;
2401 }
2402
2403 static void
2404 xscale1pmu_enable_event(struct hw_perf_event *hwc, int idx)
2405 {
2406         unsigned long val, mask, evt, flags;
2407
2408         switch (idx) {
2409         case XSCALE_CYCLE_COUNTER:
2410                 mask = 0;
2411                 evt = XSCALE1_CCOUNT_INT_EN;
2412                 break;
2413         case XSCALE_COUNTER0:
2414                 mask = XSCALE1_COUNT0_EVT_MASK;
2415                 evt = (hwc->config_base << XSCALE1_COUNT0_EVT_SHFT) |
2416                         XSCALE1_COUNT0_INT_EN;
2417                 break;
2418         case XSCALE_COUNTER1:
2419                 mask = XSCALE1_COUNT1_EVT_MASK;
2420                 evt = (hwc->config_base << XSCALE1_COUNT1_EVT_SHFT) |
2421                         XSCALE1_COUNT1_INT_EN;
2422                 break;
2423         default:
2424                 WARN_ONCE(1, "invalid counter number (%d)\n", idx);
2425                 return;
2426         }
2427
2428         spin_lock_irqsave(&pmu_lock, flags);
2429         val = xscale1pmu_read_pmnc();
2430         val &= ~mask;
2431         val |= evt;
2432         xscale1pmu_write_pmnc(val);
2433         spin_unlock_irqrestore(&pmu_lock, flags);
2434 }
2435
2436 static void
2437 xscale1pmu_disable_event(struct hw_perf_event *hwc, int idx)
2438 {
2439         unsigned long val, mask, evt, flags;
2440
2441         switch (idx) {
2442         case XSCALE_CYCLE_COUNTER:
2443                 mask = XSCALE1_CCOUNT_INT_EN;
2444                 evt = 0;
2445                 break;
2446         case XSCALE_COUNTER0:
2447                 mask = XSCALE1_COUNT0_INT_EN | XSCALE1_COUNT0_EVT_MASK;
2448                 evt = XSCALE_PERFCTR_UNUSED << XSCALE1_COUNT0_EVT_SHFT;
2449                 break;
2450         case XSCALE_COUNTER1:
2451                 mask = XSCALE1_COUNT1_INT_EN | XSCALE1_COUNT1_EVT_MASK;
2452                 evt = XSCALE_PERFCTR_UNUSED << XSCALE1_COUNT1_EVT_SHFT;
2453                 break;
2454         default:
2455                 WARN_ONCE(1, "invalid counter number (%d)\n", idx);
2456                 return;
2457         }
2458
2459         spin_lock_irqsave(&pmu_lock, flags);
2460         val = xscale1pmu_read_pmnc();
2461         val &= ~mask;
2462         val |= evt;
2463         xscale1pmu_write_pmnc(val);
2464         spin_unlock_irqrestore(&pmu_lock, flags);
2465 }
2466
2467 static int
2468 xscale1pmu_get_event_idx(struct cpu_hw_events *cpuc,
2469                         struct hw_perf_event *event)
2470 {
2471         if (XSCALE_PERFCTR_CCNT == event->config_base) {
2472                 if (test_and_set_bit(XSCALE_CYCLE_COUNTER, cpuc->used_mask))
2473                         return -EAGAIN;
2474
2475                 return XSCALE_CYCLE_COUNTER;
2476         } else {
2477                 if (!test_and_set_bit(XSCALE_COUNTER1, cpuc->used_mask)) {
2478                         return XSCALE_COUNTER1;
2479                 }
2480
2481                 if (!test_and_set_bit(XSCALE_COUNTER0, cpuc->used_mask)) {
2482                         return XSCALE_COUNTER0;
2483                 }
2484
2485                 return -EAGAIN;
2486         }
2487 }
2488
2489 static void
2490 xscale1pmu_start(void)
2491 {
2492         unsigned long flags, val;
2493
2494         spin_lock_irqsave(&pmu_lock, flags);
2495         val = xscale1pmu_read_pmnc();
2496         val |= XSCALE_PMU_ENABLE;
2497         xscale1pmu_write_pmnc(val);
2498         spin_unlock_irqrestore(&pmu_lock, flags);
2499 }
2500
2501 static void
2502 xscale1pmu_stop(void)
2503 {
2504         unsigned long flags, val;
2505
2506         spin_lock_irqsave(&pmu_lock, flags);
2507         val = xscale1pmu_read_pmnc();
2508         val &= ~XSCALE_PMU_ENABLE;
2509         xscale1pmu_write_pmnc(val);
2510         spin_unlock_irqrestore(&pmu_lock, flags);
2511 }
2512
2513 static inline u32
2514 xscale1pmu_read_counter(int counter)
2515 {
2516         u32 val = 0;
2517
2518         switch (counter) {
2519         case XSCALE_CYCLE_COUNTER:
2520                 asm volatile("mrc p14, 0, %0, c1, c0, 0" : "=r" (val));
2521                 break;
2522         case XSCALE_COUNTER0:
2523                 asm volatile("mrc p14, 0, %0, c2, c0, 0" : "=r" (val));
2524                 break;
2525         case XSCALE_COUNTER1:
2526                 asm volatile("mrc p14, 0, %0, c3, c0, 0" : "=r" (val));
2527                 break;
2528         }
2529
2530         return val;
2531 }
2532
2533 static inline void
2534 xscale1pmu_write_counter(int counter, u32 val)
2535 {
2536         switch (counter) {
2537         case XSCALE_CYCLE_COUNTER:
2538                 asm volatile("mcr p14, 0, %0, c1, c0, 0" : : "r" (val));
2539                 break;
2540         case XSCALE_COUNTER0:
2541                 asm volatile("mcr p14, 0, %0, c2, c0, 0" : : "r" (val));
2542                 break;
2543         case XSCALE_COUNTER1:
2544                 asm volatile("mcr p14, 0, %0, c3, c0, 0" : : "r" (val));
2545                 break;
2546         }
2547 }
2548
2549 static const struct arm_pmu xscale1pmu = {
2550         .id             = ARM_PERF_PMU_ID_XSCALE1,
2551         .handle_irq     = xscale1pmu_handle_irq,
2552         .enable         = xscale1pmu_enable_event,
2553         .disable        = xscale1pmu_disable_event,
2554         .event_map      = xscalepmu_event_map,
2555         .raw_event      = xscalepmu_raw_event,
2556         .read_counter   = xscale1pmu_read_counter,
2557         .write_counter  = xscale1pmu_write_counter,
2558         .get_event_idx  = xscale1pmu_get_event_idx,
2559         .start          = xscale1pmu_start,
2560         .stop           = xscale1pmu_stop,
2561         .num_events     = 3,
2562         .max_period     = (1LLU << 32) - 1,
2563 };
2564
2565 #define XSCALE2_OVERFLOWED_MASK 0x01f
2566 #define XSCALE2_CCOUNT_OVERFLOW 0x001
2567 #define XSCALE2_COUNT0_OVERFLOW 0x002
2568 #define XSCALE2_COUNT1_OVERFLOW 0x004
2569 #define XSCALE2_COUNT2_OVERFLOW 0x008
2570 #define XSCALE2_COUNT3_OVERFLOW 0x010
2571 #define XSCALE2_CCOUNT_INT_EN   0x001
2572 #define XSCALE2_COUNT0_INT_EN   0x002
2573 #define XSCALE2_COUNT1_INT_EN   0x004
2574 #define XSCALE2_COUNT2_INT_EN   0x008
2575 #define XSCALE2_COUNT3_INT_EN   0x010
2576 #define XSCALE2_COUNT0_EVT_SHFT 0
2577 #define XSCALE2_COUNT0_EVT_MASK (0xff << XSCALE2_COUNT0_EVT_SHFT)
2578 #define XSCALE2_COUNT1_EVT_SHFT 8
2579 #define XSCALE2_COUNT1_EVT_MASK (0xff << XSCALE2_COUNT1_EVT_SHFT)
2580 #define XSCALE2_COUNT2_EVT_SHFT 16
2581 #define XSCALE2_COUNT2_EVT_MASK (0xff << XSCALE2_COUNT2_EVT_SHFT)
2582 #define XSCALE2_COUNT3_EVT_SHFT 24
2583 #define XSCALE2_COUNT3_EVT_MASK (0xff << XSCALE2_COUNT3_EVT_SHFT)
2584
2585 static inline u32
2586 xscale2pmu_read_pmnc(void)
2587 {
2588         u32 val;
2589         asm volatile("mrc p14, 0, %0, c0, c1, 0" : "=r" (val));
2590         /* bits 1-2 and 4-23 are read-unpredictable */
2591         return val & 0xff000009;
2592 }
2593
2594 static inline void
2595 xscale2pmu_write_pmnc(u32 val)
2596 {
2597         /* bits 4-23 are write-as-0, 24-31 are write ignored */
2598         val &= 0xf;
2599         asm volatile("mcr p14, 0, %0, c0, c1, 0" : : "r" (val));
2600 }
2601
2602 static inline u32
2603 xscale2pmu_read_overflow_flags(void)
2604 {
2605         u32 val;
2606         asm volatile("mrc p14, 0, %0, c5, c1, 0" : "=r" (val));
2607         return val;
2608 }
2609
2610 static inline void
2611 xscale2pmu_write_overflow_flags(u32 val)
2612 {
2613         asm volatile("mcr p14, 0, %0, c5, c1, 0" : : "r" (val));
2614 }
2615
2616 static inline u32
2617 xscale2pmu_read_event_select(void)
2618 {
2619         u32 val;
2620         asm volatile("mrc p14, 0, %0, c8, c1, 0" : "=r" (val));
2621         return val;
2622 }
2623
2624 static inline void
2625 xscale2pmu_write_event_select(u32 val)
2626 {
2627         asm volatile("mcr p14, 0, %0, c8, c1, 0" : : "r"(val));
2628 }
2629
2630 static inline u32
2631 xscale2pmu_read_int_enable(void)
2632 {
2633         u32 val;
2634         asm volatile("mrc p14, 0, %0, c4, c1, 0" : "=r" (val));
2635         return val;
2636 }
2637
2638 static void
2639 xscale2pmu_write_int_enable(u32 val)
2640 {
2641         asm volatile("mcr p14, 0, %0, c4, c1, 0" : : "r" (val));
2642 }
2643
2644 static inline int
2645 xscale2_pmnc_counter_has_overflowed(unsigned long of_flags,
2646                                         enum xscale_counters counter)
2647 {
2648         int ret = 0;
2649
2650         switch (counter) {
2651         case XSCALE_CYCLE_COUNTER:
2652                 ret = of_flags & XSCALE2_CCOUNT_OVERFLOW;
2653                 break;
2654         case XSCALE_COUNTER0:
2655                 ret = of_flags & XSCALE2_COUNT0_OVERFLOW;
2656                 break;
2657         case XSCALE_COUNTER1:
2658                 ret = of_flags & XSCALE2_COUNT1_OVERFLOW;
2659                 break;
2660         case XSCALE_COUNTER2:
2661                 ret = of_flags & XSCALE2_COUNT2_OVERFLOW;
2662                 break;
2663         case XSCALE_COUNTER3:
2664                 ret = of_flags & XSCALE2_COUNT3_OVERFLOW;
2665                 break;
2666         default:
2667                 WARN_ONCE(1, "invalid counter number (%d)\n", counter);
2668         }
2669
2670         return ret;
2671 }
2672
2673 static irqreturn_t
2674 xscale2pmu_handle_irq(int irq_num, void *dev)
2675 {
2676         unsigned long pmnc, of_flags;
2677         struct perf_sample_data data;
2678         struct cpu_hw_events *cpuc;
2679         struct pt_regs *regs;
2680         int idx;
2681
2682         /* Disable the PMU. */
2683         pmnc = xscale2pmu_read_pmnc();
2684         xscale2pmu_write_pmnc(pmnc & ~XSCALE_PMU_ENABLE);
2685
2686         /* Check the overflow flag register. */
2687         of_flags = xscale2pmu_read_overflow_flags();
2688         if (!(of_flags & XSCALE2_OVERFLOWED_MASK))
2689                 return IRQ_NONE;
2690
2691         /* Clear the overflow bits. */
2692         xscale2pmu_write_overflow_flags(of_flags);
2693
2694         regs = get_irq_regs();
2695
2696         perf_sample_data_init(&data, 0);
2697
2698         cpuc = &__get_cpu_var(cpu_hw_events);
2699         for (idx = 0; idx <= armpmu->num_events; ++idx) {
2700                 struct perf_event *event = cpuc->events[idx];
2701                 struct hw_perf_event *hwc;
2702
2703                 if (!test_bit(idx, cpuc->active_mask))
2704                         continue;
2705
2706                 if (!xscale2_pmnc_counter_has_overflowed(pmnc, idx))
2707                         continue;
2708
2709                 hwc = &event->hw;
2710                 armpmu_event_update(event, hwc, idx);
2711                 data.period = event->hw.last_period;
2712                 if (!armpmu_event_set_period(event, hwc, idx))
2713                         continue;
2714
2715                 if (perf_event_overflow(event, 0, &data, regs))
2716                         armpmu->disable(hwc, idx);
2717         }
2718
2719         perf_event_do_pending();
2720
2721         /*
2722          * Re-enable the PMU.
2723          */
2724         pmnc = xscale2pmu_read_pmnc() | XSCALE_PMU_ENABLE;
2725         xscale2pmu_write_pmnc(pmnc);
2726
2727         return IRQ_HANDLED;
2728 }
2729
2730 static void
2731 xscale2pmu_enable_event(struct hw_perf_event *hwc, int idx)
2732 {
2733         unsigned long flags, ien, evtsel;
2734
2735         ien = xscale2pmu_read_int_enable();
2736         evtsel = xscale2pmu_read_event_select();
2737
2738         switch (idx) {
2739         case XSCALE_CYCLE_COUNTER:
2740                 ien |= XSCALE2_CCOUNT_INT_EN;
2741                 break;
2742         case XSCALE_COUNTER0:
2743                 ien |= XSCALE2_COUNT0_INT_EN;
2744                 evtsel &= ~XSCALE2_COUNT0_EVT_MASK;
2745                 evtsel |= hwc->config_base << XSCALE2_COUNT0_EVT_SHFT;
2746                 break;
2747         case XSCALE_COUNTER1:
2748                 ien |= XSCALE2_COUNT1_INT_EN;
2749                 evtsel &= ~XSCALE2_COUNT1_EVT_MASK;
2750                 evtsel |= hwc->config_base << XSCALE2_COUNT1_EVT_SHFT;
2751                 break;
2752         case XSCALE_COUNTER2:
2753                 ien |= XSCALE2_COUNT2_INT_EN;
2754                 evtsel &= ~XSCALE2_COUNT2_EVT_MASK;
2755                 evtsel |= hwc->config_base << XSCALE2_COUNT2_EVT_SHFT;
2756                 break;
2757         case XSCALE_COUNTER3:
2758                 ien |= XSCALE2_COUNT3_INT_EN;
2759                 evtsel &= ~XSCALE2_COUNT3_EVT_MASK;
2760                 evtsel |= hwc->config_base << XSCALE2_COUNT3_EVT_SHFT;
2761                 break;
2762         default:
2763                 WARN_ONCE(1, "invalid counter number (%d)\n", idx);
2764                 return;
2765         }
2766
2767         spin_lock_irqsave(&pmu_lock, flags);
2768         xscale2pmu_write_event_select(evtsel);
2769         xscale2pmu_write_int_enable(ien);
2770         spin_unlock_irqrestore(&pmu_lock, flags);
2771 }
2772
2773 static void
2774 xscale2pmu_disable_event(struct hw_perf_event *hwc, int idx)
2775 {
2776         unsigned long flags, ien, evtsel;
2777
2778         ien = xscale2pmu_read_int_enable();
2779         evtsel = xscale2pmu_read_event_select();
2780
2781         switch (idx) {
2782         case XSCALE_CYCLE_COUNTER:
2783                 ien &= ~XSCALE2_CCOUNT_INT_EN;
2784                 break;
2785         case XSCALE_COUNTER0:
2786                 ien &= ~XSCALE2_COUNT0_INT_EN;
2787                 evtsel &= ~XSCALE2_COUNT0_EVT_MASK;
2788                 evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT0_EVT_SHFT;
2789                 break;
2790         case XSCALE_COUNTER1:
2791                 ien &= ~XSCALE2_COUNT1_INT_EN;
2792                 evtsel &= ~XSCALE2_COUNT1_EVT_MASK;
2793                 evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT1_EVT_SHFT;
2794                 break;
2795         case XSCALE_COUNTER2:
2796                 ien &= ~XSCALE2_COUNT2_INT_EN;
2797                 evtsel &= ~XSCALE2_COUNT2_EVT_MASK;
2798                 evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT2_EVT_SHFT;
2799                 break;
2800         case XSCALE_COUNTER3:
2801                 ien &= ~XSCALE2_COUNT3_INT_EN;
2802                 evtsel &= ~XSCALE2_COUNT3_EVT_MASK;
2803                 evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT3_EVT_SHFT;
2804                 break;
2805         default:
2806                 WARN_ONCE(1, "invalid counter number (%d)\n", idx);
2807                 return;
2808         }
2809
2810         spin_lock_irqsave(&pmu_lock, flags);
2811         xscale2pmu_write_event_select(evtsel);
2812         xscale2pmu_write_int_enable(ien);
2813         spin_unlock_irqrestore(&pmu_lock, flags);
2814 }
2815
2816 static int
2817 xscale2pmu_get_event_idx(struct cpu_hw_events *cpuc,
2818                         struct hw_perf_event *event)
2819 {
2820         int idx = xscale1pmu_get_event_idx(cpuc, event);
2821         if (idx >= 0)
2822                 goto out;
2823
2824         if (!test_and_set_bit(XSCALE_COUNTER3, cpuc->used_mask))
2825                 idx = XSCALE_COUNTER3;
2826         else if (!test_and_set_bit(XSCALE_COUNTER2, cpuc->used_mask))
2827                 idx = XSCALE_COUNTER2;
2828 out:
2829         return idx;
2830 }
2831
2832 static void
2833 xscale2pmu_start(void)
2834 {
2835         unsigned long flags, val;
2836
2837         spin_lock_irqsave(&pmu_lock, flags);
2838         val = xscale2pmu_read_pmnc() & ~XSCALE_PMU_CNT64;
2839         val |= XSCALE_PMU_ENABLE;
2840         xscale2pmu_write_pmnc(val);
2841         spin_unlock_irqrestore(&pmu_lock, flags);
2842 }
2843
2844 static void
2845 xscale2pmu_stop(void)
2846 {
2847         unsigned long flags, val;
2848
2849         spin_lock_irqsave(&pmu_lock, flags);
2850         val = xscale2pmu_read_pmnc();
2851         val &= ~XSCALE_PMU_ENABLE;
2852         xscale2pmu_write_pmnc(val);
2853         spin_unlock_irqrestore(&pmu_lock, flags);
2854 }
2855
2856 static inline u32
2857 xscale2pmu_read_counter(int counter)
2858 {
2859         u32 val = 0;
2860
2861         switch (counter) {
2862         case XSCALE_CYCLE_COUNTER:
2863                 asm volatile("mrc p14, 0, %0, c1, c1, 0" : "=r" (val));
2864                 break;
2865         case XSCALE_COUNTER0:
2866                 asm volatile("mrc p14, 0, %0, c0, c2, 0" : "=r" (val));
2867                 break;
2868         case XSCALE_COUNTER1:
2869                 asm volatile("mrc p14, 0, %0, c1, c2, 0" : "=r" (val));
2870                 break;
2871         case XSCALE_COUNTER2:
2872                 asm volatile("mrc p14, 0, %0, c2, c2, 0" : "=r" (val));
2873                 break;
2874         case XSCALE_COUNTER3:
2875                 asm volatile("mrc p14, 0, %0, c3, c2, 0" : "=r" (val));
2876                 break;
2877         }
2878
2879         return val;
2880 }
2881
2882 static inline void
2883 xscale2pmu_write_counter(int counter, u32 val)
2884 {
2885         switch (counter) {
2886         case XSCALE_CYCLE_COUNTER:
2887                 asm volatile("mcr p14, 0, %0, c1, c1, 0" : : "r" (val));
2888                 break;
2889         case XSCALE_COUNTER0:
2890                 asm volatile("mcr p14, 0, %0, c0, c2, 0" : : "r" (val));
2891                 break;
2892         case XSCALE_COUNTER1:
2893                 asm volatile("mcr p14, 0, %0, c1, c2, 0" : : "r" (val));
2894                 break;
2895         case XSCALE_COUNTER2:
2896                 asm volatile("mcr p14, 0, %0, c2, c2, 0" : : "r" (val));
2897                 break;
2898         case XSCALE_COUNTER3:
2899                 asm volatile("mcr p14, 0, %0, c3, c2, 0" : : "r" (val));
2900                 break;
2901         }
2902 }
2903
2904 static const struct arm_pmu xscale2pmu = {
2905         .id             = ARM_PERF_PMU_ID_XSCALE2,
2906         .handle_irq     = xscale2pmu_handle_irq,
2907         .enable         = xscale2pmu_enable_event,
2908         .disable        = xscale2pmu_disable_event,
2909         .event_map      = xscalepmu_event_map,
2910         .raw_event      = xscalepmu_raw_event,
2911         .read_counter   = xscale2pmu_read_counter,
2912         .write_counter  = xscale2pmu_write_counter,
2913         .get_event_idx  = xscale2pmu_get_event_idx,
2914         .start          = xscale2pmu_start,
2915         .stop           = xscale2pmu_stop,
2916         .num_events     = 5,
2917         .max_period     = (1LLU << 32) - 1,
2918 };
2919
2920 static int __init
2921 init_hw_perf_events(void)
2922 {
2923         unsigned long cpuid = read_cpuid_id();
2924         unsigned long implementor = (cpuid & 0xFF000000) >> 24;
2925         unsigned long part_number = (cpuid & 0xFFF0);
2926
2927         /* ARM Ltd CPUs. */
2928         if (0x41 == implementor) {
2929                 switch (part_number) {
2930                 case 0xB360:    /* ARM1136 */
2931                 case 0xB560:    /* ARM1156 */
2932                 case 0xB760:    /* ARM1176 */
2933                         armpmu = &armv6pmu;
2934                         memcpy(armpmu_perf_cache_map, armv6_perf_cache_map,
2935                                         sizeof(armv6_perf_cache_map));
2936                         perf_max_events = armv6pmu.num_events;
2937                         break;
2938                 case 0xB020:    /* ARM11mpcore */
2939                         armpmu = &armv6mpcore_pmu;
2940                         memcpy(armpmu_perf_cache_map,
2941                                armv6mpcore_perf_cache_map,
2942                                sizeof(armv6mpcore_perf_cache_map));
2943                         perf_max_events = armv6mpcore_pmu.num_events;
2944                         break;
2945                 case 0xC080:    /* Cortex-A8 */
2946                         armv7pmu.id = ARM_PERF_PMU_ID_CA8;
2947                         memcpy(armpmu_perf_cache_map, armv7_a8_perf_cache_map,
2948                                 sizeof(armv7_a8_perf_cache_map));
2949                         armv7pmu.event_map = armv7_a8_pmu_event_map;
2950                         armpmu = &armv7pmu;
2951
2952                         /* Reset PMNC and read the nb of CNTx counters
2953                             supported */
2954                         armv7pmu.num_events = armv7_reset_read_pmnc();
2955                         perf_max_events = armv7pmu.num_events;
2956                         break;
2957                 case 0xC090:    /* Cortex-A9 */
2958                         armv7pmu.id = ARM_PERF_PMU_ID_CA9;
2959                         memcpy(armpmu_perf_cache_map, armv7_a9_perf_cache_map,
2960                                 sizeof(armv7_a9_perf_cache_map));
2961                         armv7pmu.event_map = armv7_a9_pmu_event_map;
2962                         armpmu = &armv7pmu;
2963
2964                         /* Reset PMNC and read the nb of CNTx counters
2965                             supported */
2966                         armv7pmu.num_events = armv7_reset_read_pmnc();
2967                         perf_max_events = armv7pmu.num_events;
2968                         break;
2969                 }
2970         /* Intel CPUs [xscale]. */
2971         } else if (0x69 == implementor) {
2972                 part_number = (cpuid >> 13) & 0x7;
2973                 switch (part_number) {
2974                 case 1:
2975                         armpmu = &xscale1pmu;
2976                         memcpy(armpmu_perf_cache_map, xscale_perf_cache_map,
2977                                         sizeof(xscale_perf_cache_map));
2978                         perf_max_events = xscale1pmu.num_events;
2979                         break;
2980                 case 2:
2981                         armpmu = &xscale2pmu;
2982                         memcpy(armpmu_perf_cache_map, xscale_perf_cache_map,
2983                                         sizeof(xscale_perf_cache_map));
2984                         perf_max_events = xscale2pmu.num_events;
2985                         break;
2986                 }
2987         }
2988
2989         if (armpmu) {
2990                 pr_info("enabled with %s PMU driver, %d counters available\n",
2991                                 arm_pmu_names[armpmu->id], armpmu->num_events);
2992         } else {
2993                 pr_info("no hardware support available\n");
2994                 perf_max_events = -1;
2995         }
2996
2997         return 0;
2998 }
2999 arch_initcall(init_hw_perf_events);
3000
3001 /*
3002  * Callchain handling code.
3003  */
3004 static inline void
3005 callchain_store(struct perf_callchain_entry *entry,
3006                 u64 ip)
3007 {
3008         if (entry->nr < PERF_MAX_STACK_DEPTH)
3009                 entry->ip[entry->nr++] = ip;
3010 }
3011
3012 /*
3013  * The registers we're interested in are at the end of the variable
3014  * length saved register structure. The fp points at the end of this
3015  * structure so the address of this struct is:
3016  * (struct frame_tail *)(xxx->fp)-1
3017  *
3018  * This code has been adapted from the ARM OProfile support.
3019  */
3020 struct frame_tail {
3021         struct frame_tail   *fp;
3022         unsigned long       sp;
3023         unsigned long       lr;
3024 } __attribute__((packed));
3025
3026 /*
3027  * Get the return address for a single stackframe and return a pointer to the
3028  * next frame tail.
3029  */
3030 static struct frame_tail *
3031 user_backtrace(struct frame_tail *tail,
3032                struct perf_callchain_entry *entry)
3033 {
3034         struct frame_tail buftail;
3035
3036         /* Also check accessibility of one struct frame_tail beyond */
3037         if (!access_ok(VERIFY_READ, tail, sizeof(buftail)))
3038                 return NULL;
3039         if (__copy_from_user_inatomic(&buftail, tail, sizeof(buftail)))
3040                 return NULL;
3041
3042         callchain_store(entry, buftail.lr);
3043
3044         /*
3045          * Frame pointers should strictly progress back up the stack
3046          * (towards higher addresses).
3047          */
3048         if (tail >= buftail.fp)
3049                 return NULL;
3050
3051         return buftail.fp - 1;
3052 }
3053
3054 static void
3055 perf_callchain_user(struct pt_regs *regs,
3056                     struct perf_callchain_entry *entry)
3057 {
3058         struct frame_tail *tail;
3059
3060         callchain_store(entry, PERF_CONTEXT_USER);
3061
3062         if (!user_mode(regs))
3063                 regs = task_pt_regs(current);
3064
3065         tail = (struct frame_tail *)regs->ARM_fp - 1;
3066
3067         while (tail && !((unsigned long)tail & 0x3))
3068                 tail = user_backtrace(tail, entry);
3069 }
3070
3071 /*
3072  * Gets called by walk_stackframe() for every stackframe. This will be called
3073  * whist unwinding the stackframe and is like a subroutine return so we use
3074  * the PC.
3075  */
3076 static int
3077 callchain_trace(struct stackframe *fr,
3078                 void *data)
3079 {
3080         struct perf_callchain_entry *entry = data;
3081         callchain_store(entry, fr->pc);
3082         return 0;
3083 }
3084
3085 static void
3086 perf_callchain_kernel(struct pt_regs *regs,
3087                       struct perf_callchain_entry *entry)
3088 {
3089         struct stackframe fr;
3090
3091         callchain_store(entry, PERF_CONTEXT_KERNEL);
3092         fr.fp = regs->ARM_fp;
3093         fr.sp = regs->ARM_sp;
3094         fr.lr = regs->ARM_lr;
3095         fr.pc = regs->ARM_pc;
3096         walk_stackframe(&fr, callchain_trace, entry);
3097 }
3098
3099 static void
3100 perf_do_callchain(struct pt_regs *regs,
3101                   struct perf_callchain_entry *entry)
3102 {
3103         int is_user;
3104
3105         if (!regs)
3106                 return;
3107
3108         is_user = user_mode(regs);
3109
3110         if (!current || !current->pid)
3111                 return;
3112
3113         if (is_user && current->state != TASK_RUNNING)
3114                 return;
3115
3116         if (!is_user)
3117                 perf_callchain_kernel(regs, entry);
3118
3119         if (current->mm)
3120                 perf_callchain_user(regs, entry);
3121 }
3122
3123 static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry);
3124
3125 struct perf_callchain_entry *
3126 perf_callchain(struct pt_regs *regs)
3127 {
3128         struct perf_callchain_entry *entry = &__get_cpu_var(pmc_irq_entry);
3129
3130         entry->nr = 0;
3131         perf_do_callchain(regs, entry);
3132         return entry;
3133 }