98bea2401a9a958ccf790a87c57e6898244b592a
[linux-3.10.git] / drivers / misc / tegra-profiler / armv7_pmu.c
1 /*
2  * drivers/misc/tegra-profiler/armv7_pmu.c
3  *
4  * Copyright (c) 2014-2017, NVIDIA CORPORATION.  All rights reserved.
5  *
6  * This program is free software; you can redistribute it and/or modify it
7  * under the terms and conditions of the GNU General Public License,
8  * version 2, as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope it will be useful, but WITHOUT
11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13  * more details.
14  *
15  */
16
17 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
18
19 #include <linux/err.h>
20 #include <linux/bitmap.h>
21 #include <linux/slab.h>
22 #include <asm/cputype.h>
23 #include <asm/cpu.h>
24
25 #include <linux/tegra_profiler.h>
26
27 #include "arm_pmu.h"
28 #include "armv7_pmu.h"
29 #include "armv7_events.h"
30 #include "quadd.h"
31 #include "debug.h"
32
33 static DEFINE_PER_CPU(struct quadd_pmu_ctx, pmu_ctx);
34
35 enum {
36         QUADD_ARM_CPU_TYPE_UNKNOWN,
37         QUADD_ARM_CPU_TYPE_CORTEX_A5,
38         QUADD_ARM_CPU_TYPE_CORTEX_A8,
39         QUADD_ARM_CPU_TYPE_CORTEX_A9,
40         QUADD_ARM_CPU_TYPE_CORTEX_A15,
41 };
42
43 struct quadd_pmu_info {
44         DECLARE_BITMAP(used_cntrs, QUADD_MAX_PMU_COUNTERS);
45         u32 prev_vals[QUADD_MAX_PMU_COUNTERS];
46         int is_already_active;
47 };
48
49 struct quadd_cntrs_info {
50         int pcntrs;
51         int ccntr;
52
53         spinlock_t lock;
54 };
55
56 static DEFINE_PER_CPU(struct quadd_pmu_info, cpu_pmu_info);
57
58 static unsigned quadd_armv7_a9_events_map[QUADD_EVENT_TYPE_MAX] = {
59         [QUADD_EVENT_TYPE_INSTRUCTIONS] =
60                 QUADD_ARMV7_A9_HW_EVENT_INST_OUT_OF_RENAME_STAGE,
61         [QUADD_EVENT_TYPE_BRANCH_INSTRUCTIONS] =
62                 QUADD_ARMV7_HW_EVENT_PC_WRITE,
63         [QUADD_EVENT_TYPE_BRANCH_MISSES] =
64                 QUADD_ARMV7_HW_EVENT_PC_BRANCH_MIS_PRED,
65         [QUADD_EVENT_TYPE_BUS_CYCLES] =
66                 QUADD_ARMV7_HW_EVENT_CLOCK_CYCLES,
67
68         [QUADD_EVENT_TYPE_L1_DCACHE_READ_MISSES] =
69                 QUADD_ARMV7_HW_EVENT_DCACHE_REFILL,
70         [QUADD_EVENT_TYPE_L1_DCACHE_WRITE_MISSES] =
71                 QUADD_ARMV7_HW_EVENT_DCACHE_REFILL,
72         [QUADD_EVENT_TYPE_L1_ICACHE_MISSES] =
73                 QUADD_ARMV7_HW_EVENT_IFETCH_MISS,
74
75         [QUADD_EVENT_TYPE_L2_DCACHE_READ_MISSES] =
76                 QUADD_ARMV7_UNSUPPORTED_EVENT,
77         [QUADD_EVENT_TYPE_L2_DCACHE_WRITE_MISSES] =
78                 QUADD_ARMV7_UNSUPPORTED_EVENT,
79         [QUADD_EVENT_TYPE_L2_ICACHE_MISSES] =
80                 QUADD_ARMV7_UNSUPPORTED_EVENT,
81 };
82
83 static unsigned quadd_armv7_a15_events_map[QUADD_EVENT_TYPE_MAX] = {
84         [QUADD_EVENT_TYPE_INSTRUCTIONS] =
85                                 QUADD_ARMV7_HW_EVENT_INSTR_EXECUTED,
86         [QUADD_EVENT_TYPE_BRANCH_INSTRUCTIONS] =
87                                 QUADD_ARMV7_A15_HW_EVENT_SPEC_PC_WRITE,
88         [QUADD_EVENT_TYPE_BRANCH_MISSES] =
89                                 QUADD_ARMV7_HW_EVENT_PC_BRANCH_MIS_PRED,
90         [QUADD_EVENT_TYPE_BUS_CYCLES] = QUADD_ARMV7_HW_EVENT_BUS_CYCLES,
91
92         [QUADD_EVENT_TYPE_L1_DCACHE_READ_MISSES] =
93                                 QUADD_ARMV7_A15_HW_EVENT_L1_DCACHE_READ_REFILL,
94         [QUADD_EVENT_TYPE_L1_DCACHE_WRITE_MISSES] =
95                                 QUADD_ARMV7_A15_HW_EVENT_L1_DCACHE_WRITE_REFILL,
96         [QUADD_EVENT_TYPE_L1_ICACHE_MISSES] =
97                                 QUADD_ARMV7_HW_EVENT_IFETCH_MISS,
98
99         [QUADD_EVENT_TYPE_L2_DCACHE_READ_MISSES] =
100                                 QUADD_ARMV7_A15_HW_EVENT_L2_DCACHE_READ_REFILL,
101         [QUADD_EVENT_TYPE_L2_DCACHE_WRITE_MISSES] =
102                                 QUADD_ARMV7_A15_HW_EVENT_L2_DCACHE_WRITE_REFILL,
103         [QUADD_EVENT_TYPE_L2_ICACHE_MISSES] =
104                                 QUADD_ARMV7_UNSUPPORTED_EVENT,
105 };
106
107 static inline u32
108 armv7_pmu_pmnc_read(void)
109 {
110         u32 val;
111
112         /* Read Performance MoNitor Control (PMNC) register */
113         asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r"(val));
114         return val;
115 }
116
117 static inline void
118 armv7_pmu_pmnc_write(u32 val)
119 {
120         isb();
121
122         /* Write Performance MoNitor Control (PMNC) register */
123         asm volatile("mcr p15, 0, %0, c9, c12, 0" : :
124                      "r"(val & QUADD_ARMV7_PMNC_MASK));
125 }
126
127 static inline u32
128 armv7_pmu_cntens_read(void)
129 {
130         u32 val;
131
132         /* Read CouNT ENable Set (CNTENS) register */
133         asm volatile("mrc p15, 0, %0, c9, c12, 1" : "=r"(val));
134         return val;
135 }
136
137 static inline void
138 armv7_pmu_cntens_write(u32 val)
139 {
140         /* Write CouNT ENable Set (CNTENS) register */
141         asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r" (val));
142 }
143
144 static inline void
145 armv7_pmu_cntenc_write(u32 val)
146 {
147         /* Write CouNT ENable Clear (CNTENC) register */
148         asm volatile("mcr p15, 0, %0, c9, c12, 2" : : "r" (val));
149 }
150
151 static inline void
152 armv7_pmu_pmnxsel_write(u32 val)
153 {
154         /* Write Performance Counter SELection (PMNXSEL) register */
155         asm volatile("mcr p15, 0, %0, c9, c12, 5" : :
156                      "r" (val & QUADD_ARMV7_SELECT_MASK));
157         isb();
158 }
159
160 static inline u32
161 armv7_pmu_ccnt_read(void)
162 {
163         u32 val;
164
165         /* Read Cycle CouNT (CCNT) register */
166         asm volatile ("mrc p15, 0, %0, c9, c13, 0" : "=r"(val));
167         return val;
168 }
169
170 static inline void
171 armv7_pmu_ccnt_write(u32 val)
172 {
173         /* Write Cycle CouNT (CCNT) register */
174         asm volatile ("mcr p15, 0, %0, c9, c13, 0" : : "r"(val));
175 }
176
177 static inline u32
178 armv7_pmu_pmcnt_read(void)
179 {
180         u32 val;
181
182         /* Read Performance Monitor CouNT (PMCNTx) registers */
183         asm volatile ("mrc p15, 0, %0, c9, c13, 2" : "=r"(val));
184         return val;
185 }
186
187 static inline void
188 armv7_pmu_pmcnt_write(u32 val)
189 {
190         /* Write Performance Monitor CouNT (PMCNTx) registers */
191         asm volatile ("mcr p15, 0, %0, c9, c13, 2" : : "r"(val));
192 }
193
194 static inline void
195 armv7_pmu_evtsel_write(u32 event)
196 {
197         /* Write Event SELection (EVTSEL) register */
198         asm volatile("mcr p15, 0, %0, c9, c13, 1" : :
199                      "r" (event & QUADD_ARMV7_EVTSEL_MASK));
200 }
201
202 static inline u32
203 armv7_pmu_intens_read(void)
204 {
205         u32 val;
206
207         /* Read INTerrupt ENable Set (INTENS) register */
208         asm volatile ("mrc p15, 0, %0, c9, c14, 1" : "=r"(val));
209         return val;
210 }
211
212 static inline void
213 armv7_pmu_intens_write(u32 val)
214 {
215         /* Write INTerrupt ENable Set (INTENS) register */
216         asm volatile ("mcr p15, 0, %0, c9, c14, 1" : : "r"(val));
217 }
218
219 static inline void
220 armv7_pmu_intenc_write(u32 val)
221 {
222         /* Write INTerrupt ENable Clear (INTENC) register */
223         asm volatile ("mcr p15, 0, %0, c9, c14, 2" : : "r"(val));
224 }
225
226 static void enable_counter(int idx)
227 {
228         armv7_pmu_cntens_write(1UL << idx);
229 }
230
231 static void disable_counter(int idx)
232 {
233         armv7_pmu_cntenc_write(1UL << idx);
234 }
235
236 static void select_counter(unsigned int counter)
237 {
238         armv7_pmu_pmnxsel_write(counter);
239 }
240
241 static int is_pmu_enabled(void)
242 {
243         struct quadd_pmu_ctx *local_pmu_ctx = &__get_cpu_var(pmu_ctx);
244         u32 pmnc = armv7_pmu_pmnc_read();
245
246         if (pmnc & QUADD_ARMV7_PMNC_E) {
247                 u32 cnten = armv7_pmu_cntens_read();
248
249                 cnten &= local_pmu_ctx->counters_mask | QUADD_ARMV7_CCNT;
250                 return cnten ? 1 : 0;
251         }
252
253         return 0;
254 }
255
256 static u32 read_counter(int idx)
257 {
258         u32 val;
259
260         if (idx == QUADD_ARMV7_CCNT_BIT) {
261                 val = armv7_pmu_ccnt_read();
262         } else {
263                 select_counter(idx);
264                 val = armv7_pmu_pmcnt_read();
265         }
266
267         return val;
268 }
269
270 static void write_counter(int idx, u32 value)
271 {
272         if (idx == QUADD_ARMV7_CCNT_BIT) {
273                 armv7_pmu_ccnt_write(value);
274         } else {
275                 select_counter(idx);
276                 armv7_pmu_pmcnt_write(value);
277         }
278 }
279
280 static int
281 get_free_counters(unsigned long *bitmap, int nbits, int *ccntr)
282 {
283         int cc;
284         u32 cntens;
285
286         struct quadd_pmu_ctx *local_pmu_ctx = &__get_cpu_var(pmu_ctx);
287
288         cntens = armv7_pmu_cntens_read();
289         cntens = ~cntens & (local_pmu_ctx->counters_mask | QUADD_ARMV7_CCNT);
290
291         bitmap_zero(bitmap, nbits);
292         bitmap_copy(bitmap, (unsigned long *)&cntens,
293                     BITS_PER_BYTE * sizeof(u32));
294
295         cc = (cntens & QUADD_ARMV7_CCNT) ? 1 : 0;
296
297         if (ccntr)
298                 *ccntr = cc;
299
300         return bitmap_weight(bitmap, BITS_PER_BYTE * sizeof(u32)) - cc;
301 }
302
303 static u32 armv7_pmu_adjust_value(u32 value, int event_id)
304 {
305         struct quadd_pmu_ctx *local_pmu_ctx = &__get_cpu_var(pmu_ctx);
306         /*
307         * Cortex A8/A9: l1 cache performance counters
308         * don't differentiate between read and write data accesses/misses,
309         * so currently we are devided by two
310         */
311         if (local_pmu_ctx->l1_cache_rw &&
312             (local_pmu_ctx->arch.type == QUADD_ARM_CPU_TYPE_CORTEX_A8 ||
313             local_pmu_ctx->arch.type == QUADD_ARM_CPU_TYPE_CORTEX_A9) &&
314             (event_id == QUADD_EVENT_TYPE_L1_DCACHE_READ_MISSES ||
315             event_id == QUADD_EVENT_TYPE_L1_DCACHE_WRITE_MISSES)) {
316                 return value / 2;
317         }
318         return value;
319 }
320
321 static void __maybe_unused
322 disable_interrupt(int idx)
323 {
324         armv7_pmu_intenc_write(1UL << idx);
325 }
326
327 static void
328 disable_all_interrupts(void)
329 {
330         struct quadd_pmu_ctx *local_pmu_ctx = &__get_cpu_var(pmu_ctx);
331         u32 val = QUADD_ARMV7_CCNT | local_pmu_ctx->counters_mask;
332
333         armv7_pmu_intenc_write(val);
334 }
335
336 static void
337 armv7_pmnc_reset_overflow_flags(void)
338 {
339         struct quadd_pmu_ctx *local_pmu_ctx = &__get_cpu_var(pmu_ctx);
340         u32 val = QUADD_ARMV7_CCNT | local_pmu_ctx->counters_mask;
341
342         asm volatile("mcr p15, 0, %0, c9, c12, 3" : : "r" (val));
343 }
344
345 static void
346 select_event(unsigned int idx, unsigned int event)
347 {
348         select_counter(idx);
349         armv7_pmu_evtsel_write(event);
350 }
351
352 static void disable_all_counters(void)
353 {
354         u32 val;
355         struct quadd_pmu_ctx *local_pmu_ctx = &__get_cpu_var(pmu_ctx);
356
357         /* Disable all counters */
358         val = armv7_pmu_pmnc_read();
359         if (val & QUADD_ARMV7_PMNC_E)
360                 armv7_pmu_pmnc_write(val & ~QUADD_ARMV7_PMNC_E);
361
362         armv7_pmu_cntenc_write(QUADD_ARMV7_CCNT | local_pmu_ctx->counters_mask);
363 }
364
365 static void enable_all_counters(void)
366 {
367         u32 val;
368
369         /* Enable all counters */
370         val = armv7_pmu_pmnc_read();
371         val |= QUADD_ARMV7_PMNC_E | QUADD_ARMV7_PMNC_X;
372         armv7_pmu_pmnc_write(val);
373 }
374
375 static void reset_all_counters(void)
376 {
377         u32 val;
378
379         val = armv7_pmu_pmnc_read();
380         val |= QUADD_ARMV7_PMNC_P | QUADD_ARMV7_PMNC_C;
381         armv7_pmu_pmnc_write(val);
382 }
383
384 static void quadd_init_pmu(void)
385 {
386         armv7_pmnc_reset_overflow_flags();
387         disable_all_interrupts();
388 }
389
390 static int pmu_enable(void)
391 {
392         pr_info("pmu was reserved\n");
393         return 0;
394 }
395
396 static void __pmu_disable(void *arg)
397 {
398         struct quadd_pmu_info *pi = &__get_cpu_var(cpu_pmu_info);
399
400         if (!pi->is_already_active) {
401                 pr_info("[%d] reset all counters\n",
402                         smp_processor_id());
403
404                 disable_all_counters();
405                 reset_all_counters();
406         } else {
407                 int idx;
408
409                 for_each_set_bit(idx, pi->used_cntrs, QUADD_MAX_PMU_COUNTERS) {
410                         pr_info("[%d] reset counter: %d\n",
411                                 smp_processor_id(), idx);
412
413                         disable_counter(idx);
414                         write_counter(idx, 0);
415                 }
416         }
417 }
418
419 static void pmu_disable(void)
420 {
421         on_each_cpu(__pmu_disable, NULL, 1);
422         pr_info("pmu was released\n");
423 }
424
425 static void pmu_start(void)
426 {
427         int idx = 0, pcntrs, ccntr;
428         u32 event;
429         DECLARE_BITMAP(free_bitmap, QUADD_MAX_PMU_COUNTERS);
430         struct quadd_pmu_info *pi = &__get_cpu_var(cpu_pmu_info);
431         struct quadd_pmu_ctx *local_pmu_ctx = &__get_cpu_var(pmu_ctx);
432         u32 *prevp = pi->prev_vals;
433         struct quadd_pmu_event_info *ei;
434
435         bitmap_zero(pi->used_cntrs, QUADD_MAX_PMU_COUNTERS);
436
437         if (is_pmu_enabled()) {
438                 pi->is_already_active = 1;
439         } else {
440                 disable_all_counters();
441                 quadd_init_pmu();
442
443                 pi->is_already_active = 0;
444         }
445
446         pcntrs = get_free_counters(free_bitmap, QUADD_MAX_PMU_COUNTERS, &ccntr);
447
448         list_for_each_entry(ei, &local_pmu_ctx->used_events, list) {
449                 int index;
450
451                 *prevp++ = 0;
452
453                 event = ei->hw_value;
454
455                 if (ei->quadd_event_id == QUADD_EVENT_TYPE_CPU_CYCLES) {
456                         if (!ccntr) {
457                                 pr_err_once("Error: cpu cycles counter is already occupied\n");
458                                 return;
459                         }
460                         index = QUADD_ARMV7_CCNT_BIT;
461                 } else {
462                         if (!pcntrs--) {
463                                 pr_err_once("Error: too many performance events\n");
464                                 return;
465                         }
466
467                         index = find_next_bit(free_bitmap,
468                                               QUADD_MAX_PMU_COUNTERS, idx);
469                         if (index >= QUADD_MAX_PMU_COUNTERS) {
470                                 pr_err_once("Error: too many events\n");
471                                 return;
472                         }
473                         idx = index + 1;
474                         select_event(index, event);
475                 }
476                 set_bit(index, pi->used_cntrs);
477
478                 write_counter(index, 0);
479                 enable_counter(index);
480         }
481
482         if (!pi->is_already_active) {
483                 reset_all_counters();
484                 enable_all_counters();
485         }
486
487         qm_debug_start_source(QUADD_EVENT_SOURCE_PMU);
488 }
489
490 static void pmu_stop(void)
491 {
492         int idx;
493         struct quadd_pmu_info *pi = &__get_cpu_var(cpu_pmu_info);
494
495         if (!pi->is_already_active) {
496                 disable_all_counters();
497                 reset_all_counters();
498         } else {
499                 for_each_set_bit(idx, pi->used_cntrs, QUADD_MAX_PMU_COUNTERS) {
500                         disable_counter(idx);
501                         write_counter(idx, 0);
502                 }
503         }
504
505         qm_debug_stop_source(QUADD_EVENT_SOURCE_PMU);
506 }
507
508 static int __maybe_unused
509 pmu_read(struct event_data *events, int max_events)
510 {
511         u32 val;
512         int idx = 0, i = 0;
513         struct quadd_pmu_info *pi = &__get_cpu_var(cpu_pmu_info);
514         struct quadd_pmu_ctx *local_pmu_ctx = &__get_cpu_var(pmu_ctx);
515         u32 *prevp = pi->prev_vals;
516         struct quadd_pmu_event_info *ei;
517
518         if (bitmap_empty(pi->used_cntrs, QUADD_MAX_PMU_COUNTERS)) {
519                 pr_err_once("Error: counters were not initialized\n");
520                 return 0;
521         }
522
523         list_for_each_entry(ei, &local_pmu_ctx->used_events, list) {
524                 int index;
525
526                 if (ei->quadd_event_id == QUADD_EVENT_TYPE_CPU_CYCLES) {
527                         if (!test_bit(QUADD_ARMV7_CCNT_BIT, pi->used_cntrs)) {
528                                 pr_err_once("Error: ccntr is not used\n");
529                                 return 0;
530                         }
531                         index = QUADD_ARMV7_CCNT_BIT;
532                 } else {
533                         index = find_next_bit(pi->used_cntrs,
534                                               QUADD_MAX_PMU_COUNTERS, idx);
535                         idx = index + 1;
536
537                         if (index >= QUADD_MAX_PMU_COUNTERS) {
538                                 pr_err_once("Error: perf counter is not used\n");
539                                 return 0;
540                         }
541                 }
542
543                 val = read_counter(index);
544                 val = armv7_pmu_adjust_value(val, ei->quadd_event_id);
545
546                 events->event_source = QUADD_EVENT_SOURCE_PMU;
547                 events->event_id = ei->quadd_event_id;
548
549                 events->val = val;
550                 events->prev_val = *prevp;
551
552                 *prevp = val;
553
554                 qm_debug_read_counter(events->event_id, events->prev_val,
555                                       events->val);
556
557                 if (++i >= max_events)
558                         break;
559
560                 events++;
561                 prevp++;
562         }
563
564         return i;
565 }
566
567 static int __maybe_unused
568 pmu_read_emulate(struct event_data *events, int max_events)
569 {
570         int i = 0;
571         static u32 val = 100;
572         struct quadd_pmu_info *pi = &__get_cpu_var(cpu_pmu_info);
573         struct quadd_pmu_ctx *local_pmu_ctx = &__get_cpu_var(pmu_ctx);
574         u32 *prevp = pi->prev_vals;
575         struct quadd_pmu_event_info *ei;
576
577         list_for_each_entry(ei, &local_pmu_ctx->used_events, list) {
578                 if (val > 200)
579                         val = 100;
580
581                 events->event_id = *prevp;
582                 events->val = val;
583
584                 *prevp = val;
585                 val += 5;
586
587                 if (++i >= max_events)
588                         break;
589
590                 events++;
591                 prevp++;
592         }
593
594         return i;
595 }
596
597 static void __get_free_counters(void *arg)
598 {
599         int pcntrs, ccntr;
600         DECLARE_BITMAP(free_bitmap, QUADD_MAX_PMU_COUNTERS);
601         struct quadd_cntrs_info *ci = arg;
602
603         pcntrs = get_free_counters(free_bitmap, QUADD_MAX_PMU_COUNTERS, &ccntr);
604
605         spin_lock(&ci->lock);
606
607         ci->pcntrs = min_t(int, pcntrs, ci->pcntrs);
608
609         if (!ccntr)
610                 ci->ccntr = 0;
611
612         pr_info("[%d] pcntrs/ccntr: %d/%d, free_bitmap: %#lx\n",
613                 smp_processor_id(), pcntrs, ccntr, free_bitmap[0]);
614
615         spin_unlock(&ci->lock);
616 }
617
618 static void free_events(struct list_head *head)
619 {
620         struct quadd_pmu_event_info *entry, *next;
621
622         list_for_each_entry_safe(entry, next, head, list) {
623                 list_del(&entry->list);
624                 kfree(entry);
625         }
626 }
627
628 static int set_events(int cpuid, int *events, int size)
629 {
630         int free_pcntrs, err;
631         int i, nr_l1_r = 0, nr_l1_w = 0;
632         struct quadd_cntrs_info free_ci;
633
634         struct quadd_pmu_ctx *local_pmu_ctx = &per_cpu(pmu_ctx, cpuid);
635
636         local_pmu_ctx->l1_cache_rw = 0;
637
638         free_events(&local_pmu_ctx->used_events);
639
640         if (!events || !size)
641                 return 0;
642
643         if (!local_pmu_ctx->current_map) {
644                 pr_err("Invalid current_map\n");
645                 return -ENODEV;
646         }
647
648         spin_lock_init(&free_ci.lock);
649         free_ci.pcntrs = QUADD_MAX_PMU_COUNTERS;
650         free_ci.ccntr = 1;
651
652         on_each_cpu(__get_free_counters, &free_ci, 1);
653
654         free_pcntrs = free_ci.pcntrs;
655         pr_info("free counters: pcntrs/ccntr: %d/%d\n",
656                 free_pcntrs, free_ci.ccntr);
657
658         for (i = 0; i < size; i++) {
659                 struct quadd_pmu_event_info *ei;
660
661                 if (events[i] > QUADD_EVENT_TYPE_MAX) {
662                         pr_err("Error event: %d\n", events[i]);
663                         err = -EINVAL;
664                         goto out_free;
665                 }
666
667                 ei = kzalloc(sizeof(*ei), GFP_KERNEL);
668                 if (!ei) {
669                         err = -ENOMEM;
670                         goto out_free;
671                 }
672
673                 INIT_LIST_HEAD(&ei->list);
674                 list_add_tail(&ei->list, &local_pmu_ctx->used_events);
675
676                 if (events[i] == QUADD_EVENT_TYPE_CPU_CYCLES) {
677                         ei->hw_value = QUADD_ARMV7_CPU_CYCLE_EVENT;
678                         if (!free_ci.ccntr) {
679                                 pr_err("Error: cpu cycles counter is already occupied\n");
680                                 err = -EBUSY;
681                                 goto out_free;
682                         }
683                 } else {
684                         if (!free_pcntrs--) {
685                                 pr_err("Error: too many performance events\n");
686                                 err = -ENOSPC;
687                                 goto out_free;
688                         }
689
690                         ei->hw_value = local_pmu_ctx->current_map[events[i]];
691                 }
692
693                 ei->quadd_event_id = events[i];
694
695                 if (events[i] == QUADD_EVENT_TYPE_L1_DCACHE_READ_MISSES)
696                         nr_l1_r++;
697                 else if (events[i] == QUADD_EVENT_TYPE_L1_DCACHE_WRITE_MISSES)
698                         nr_l1_w++;
699
700                 pr_info("Event has been added: id/pmu value: %s/%#x\n",
701                         quadd_get_event_str(events[i]),
702                         ei->hw_value);
703         }
704
705         if (nr_l1_r > 0 && nr_l1_w > 0)
706                 local_pmu_ctx->l1_cache_rw = 1;
707
708         return 0;
709
710 out_free:
711         free_events(&local_pmu_ctx->used_events);
712         return err;
713 }
714
715 static int get_supported_events(int cpuid, int *events, int max_events)
716 {
717         int i, nr_events = 0;
718         struct quadd_pmu_ctx *local_pmu_ctx = &__get_cpu_var(pmu_ctx);
719
720         if (!local_pmu_ctx->current_map)
721                 return 0;
722
723         max_events = min_t(int, QUADD_EVENT_TYPE_MAX, max_events);
724
725         for (i = 0; i < max_events; i++) {
726                 unsigned int event = local_pmu_ctx->current_map[i];
727
728                 if (event != QUADD_ARMV7_UNSUPPORTED_EVENT)
729                         events[nr_events++] = i;
730         }
731
732         return nr_events;
733 }
734
735 static int get_current_events(int cpuid, int *events, int max_events)
736 {
737         int i = 0;
738         struct quadd_pmu_event_info *ei;
739         struct quadd_pmu_ctx *local_pmu_ctx = &__get_cpu_var(pmu_ctx);
740
741         list_for_each_entry(ei, &local_pmu_ctx->used_events, list) {
742                 events[i++] = ei->quadd_event_id;
743
744                 if (i >= max_events)
745                         break;
746         }
747
748         return i;
749 }
750
751 static struct quadd_arch_info *get_arch(int cpuid)
752 {
753         struct quadd_pmu_ctx *local_pmu_ctx = &__get_cpu_var(pmu_ctx);
754
755         return local_pmu_ctx->current_map ? &local_pmu_ctx->arch : NULL;
756 }
757
758 static struct quadd_event_source_interface pmu_armv7_int = {
759         .enable                 = pmu_enable,
760         .disable                = pmu_disable,
761
762         .start                  = pmu_start,
763         .stop                   = pmu_stop,
764
765 #ifndef QUADD_USE_EMULATE_COUNTERS
766         .read                   = pmu_read,
767 #else
768         .read                   = pmu_read_emulate,
769 #endif
770         .set_events             = set_events,
771         .get_supported_events   = get_supported_events,
772         .get_current_events     = get_current_events,
773         .get_arch               = get_arch,
774 };
775
776 static int quadd_armv7_pmu_init_for_cpu(int cpu)
777 {
778         int err = 0;
779         unsigned long cpuid, cpu_implementer, part_number;
780
781         struct cpuinfo_arm *local_cpu_data = &per_cpu(cpu_data, cpu);
782         struct quadd_pmu_ctx *local_pmu_ctx = &per_cpu(pmu_ctx, cpu);
783
784         local_pmu_ctx->arch.type = QUADD_ARM_CPU_TYPE_UNKNOWN;
785         local_pmu_ctx->arch.ver = 0;
786         local_pmu_ctx->current_map = NULL;
787         strncpy(local_pmu_ctx->arch.name, "Unknown",
788                 sizeof(local_pmu_ctx->arch.name));
789
790         INIT_LIST_HEAD(&local_pmu_ctx->used_events);
791
792         cpuid = local_cpu_data->cpuid;
793
794         if (!cpuid)
795                 return 0;
796
797         cpu_implementer = cpuid >> 24;
798         part_number = cpuid & 0xFFF0;
799
800         if (cpu_implementer == ARM_CPU_IMP_ARM) {
801                 switch (part_number) {
802                 case ARM_CPU_PART_CORTEX_A9:
803                         local_pmu_ctx->arch.type = QUADD_ARM_CPU_TYPE_CORTEX_A9;
804                         strncpy(local_pmu_ctx->arch.name, "Cortex A9",
805                                 sizeof(local_pmu_ctx->arch.name));
806
807                         local_pmu_ctx->counters_mask =
808                                 QUADD_ARMV7_COUNTERS_MASK_CORTEX_A9;
809                         local_pmu_ctx->current_map = quadd_armv7_a9_events_map;
810                         break;
811
812                 case ARM_CPU_PART_CORTEX_A15:
813                         local_pmu_ctx->arch.type =
814                                 QUADD_ARM_CPU_TYPE_CORTEX_A15;
815
816                         strncpy(local_pmu_ctx->arch.name, "Cortex A15",
817                                 sizeof(local_pmu_ctx->arch.name));
818
819                         local_pmu_ctx->counters_mask =
820                                 QUADD_ARMV7_COUNTERS_MASK_CORTEX_A15;
821                         local_pmu_ctx->current_map = quadd_armv7_a15_events_map;
822                         break;
823
824                 default:
825                         local_pmu_ctx->arch.type = QUADD_ARM_CPU_TYPE_UNKNOWN;
826                         local_pmu_ctx->current_map = NULL;
827                         err = 1;
828                         break;
829                 }
830         } else {
831                 err = 1;
832         }
833
834         local_pmu_ctx->arch.name[sizeof(local_pmu_ctx->arch.name) - 1] = '\0';
835         pr_info("[%d] arch: %s, type: %d, ver: %d\n",
836                 cpu, local_pmu_ctx->arch.name, local_pmu_ctx->arch.type,
837                 local_pmu_ctx->arch.ver);
838
839         return err;
840 }
841
842 struct quadd_event_source_interface *quadd_armv7_pmu_init(void)
843 {
844         struct quadd_event_source_interface *pmu = NULL;
845         int cpu, err, initialized = 1;
846
847         for_each_possible_cpu(cpu) {
848                 err = quadd_armv7_pmu_init_for_cpu(cpu);
849                 if (err) {
850                         initialized = 0;
851                         break;
852                 }
853         }
854
855         if (initialized == 1)
856                 pmu = &pmu_armv7_int;
857         else
858                 pr_err("error: incorrect PMUVer\n");
859
860         return pmu;
861 }
862
863 void quadd_armv7_pmu_deinit(void)
864 {
865         int cpu;
866
867         for_each_possible_cpu(cpu) {
868                 struct quadd_pmu_ctx *local_pmu_ctx = &per_cpu(pmu_ctx, cpu);
869
870                 if (local_pmu_ctx->current_map)
871                         free_events(&local_pmu_ctx->used_events);
872         }
873 }