misc: tegra-profiler: support raw hardware events
[linux-3.10.git] / drivers / misc / tegra-profiler / armv8_pmu.c
1 /*
2  * drivers/misc/tegra-profiler/armv8_pmu.c
3  *
4  * Copyright (c) 2014-2017, NVIDIA CORPORATION.  All rights reserved.
5  *
6  * This program is free software; you can redistribute it and/or modify it
7  * under the terms and conditions of the GNU General Public License,
8  * version 2, as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope it will be useful, but WITHOUT
11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13  * more details.
14  *
15  */
16
17 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
18
19 #include <linux/printk.h>
20 #include <linux/types.h>
21 #include <linux/string.h>
22
23 #include <linux/version.h>
24 #include <linux/err.h>
25 #include <linux/bitmap.h>
26 #include <linux/slab.h>
27
28 #include <asm/cputype.h>
29 #include <asm/cpu.h>
30
31 #include "arm_pmu.h"
32 #include "armv8_pmu.h"
33 #include "armv8_events.h"
34 #include "quadd.h"
35 #include "debug.h"
36
37 struct quadd_pmu_info {
38         DECLARE_BITMAP(used_cntrs, QUADD_MAX_PMU_COUNTERS);
39         u32 prev_vals[QUADD_MAX_PMU_COUNTERS];
40         int is_already_active;
41 };
42
43 struct quadd_cntrs_info {
44         int pcntrs;
45         int ccntr;
46
47         spinlock_t lock;
48 };
49
50 static DEFINE_PER_CPU(struct quadd_pmu_info, cpu_pmu_info);
51
52 static DEFINE_PER_CPU(struct quadd_pmu_ctx, pmu_ctx);
53
54 static unsigned
55 quadd_armv8_pmuv3_arm_events_map[QUADD_EVENT_HW_MAX] = {
56         [QUADD_EVENT_HW_INSTRUCTIONS] =
57                 QUADD_ARMV8_HW_EVENT_INSTR_EXECUTED,
58         [QUADD_EVENT_HW_BRANCH_INSTRUCTIONS] =
59                 QUADD_ARMV8_UNSUPPORTED_EVENT,
60         [QUADD_EVENT_HW_BRANCH_MISSES] =
61                 QUADD_ARMV8_HW_EVENT_PC_BRANCH_MIS_PRED,
62         [QUADD_EVENT_HW_BUS_CYCLES] =
63                 QUADD_ARMV8_UNSUPPORTED_EVENT,
64
65         [QUADD_EVENT_HW_L1_DCACHE_READ_MISSES] =
66                 QUADD_ARMV8_HW_EVENT_L1_DCACHE_REFILL,
67         [QUADD_EVENT_HW_L1_DCACHE_WRITE_MISSES] =
68                 QUADD_ARMV8_HW_EVENT_L1_DCACHE_REFILL,
69         [QUADD_EVENT_HW_L1_ICACHE_MISSES] =
70                 QUADD_ARMV8_HW_EVENT_L1_ICACHE_REFILL,
71
72         [QUADD_EVENT_HW_L2_DCACHE_READ_MISSES] =
73                 QUADD_ARMV8_HW_EVENT_L2_CACHE_REFILL,
74         [QUADD_EVENT_HW_L2_DCACHE_WRITE_MISSES] =
75                 QUADD_ARMV8_HW_EVENT_L2_CACHE_REFILL,
76         [QUADD_EVENT_HW_L2_ICACHE_MISSES] =
77                 QUADD_ARMV8_UNSUPPORTED_EVENT,
78 };
79
80 static unsigned
81 quadd_armv8_pmuv3_a57_events_map[QUADD_EVENT_HW_MAX] = {
82         [QUADD_EVENT_HW_INSTRUCTIONS] =
83                 QUADD_ARMV8_HW_EVENT_INSTR_EXECUTED,
84         [QUADD_EVENT_HW_BRANCH_INSTRUCTIONS] =
85                 QUADD_ARMV8_UNSUPPORTED_EVENT,
86         [QUADD_EVENT_HW_BRANCH_MISSES] =
87                 QUADD_ARMV8_HW_EVENT_PC_BRANCH_MIS_PRED,
88         [QUADD_EVENT_HW_BUS_CYCLES] =
89                 QUADD_ARMV8_UNSUPPORTED_EVENT,
90
91         [QUADD_EVENT_HW_L1_DCACHE_READ_MISSES] =
92                 QUADD_ARMV8_A57_HW_EVENT_L1D_CACHE_REFILL_LD,
93         [QUADD_EVENT_HW_L1_DCACHE_WRITE_MISSES] =
94                 QUADD_ARMV8_A57_HW_EVENT_L1D_CACHE_REFILL_ST,
95         [QUADD_EVENT_HW_L1_ICACHE_MISSES] =
96                 QUADD_ARMV8_HW_EVENT_L1_ICACHE_REFILL,
97
98         [QUADD_EVENT_HW_L2_DCACHE_READ_MISSES] =
99                 QUADD_ARMV8_A57_HW_EVENT_L2D_CACHE_REFILL_LD,
100         [QUADD_EVENT_HW_L2_DCACHE_WRITE_MISSES] =
101                 QUADD_ARMV8_A57_HW_EVENT_L2D_CACHE_REFILL_ST,
102         [QUADD_EVENT_HW_L2_ICACHE_MISSES] =
103                 QUADD_ARMV8_UNSUPPORTED_EVENT,
104 };
105
106 static unsigned
107 quadd_armv8_pmuv3_denver_events_map[QUADD_EVENT_HW_MAX] = {
108         [QUADD_EVENT_HW_INSTRUCTIONS] =
109                 QUADD_ARMV8_HW_EVENT_INSTR_EXECUTED,
110         [QUADD_EVENT_HW_BRANCH_INSTRUCTIONS] =
111                 QUADD_ARMV8_UNSUPPORTED_EVENT,
112         [QUADD_EVENT_HW_BRANCH_MISSES] =
113                 QUADD_ARMV8_HW_EVENT_PC_BRANCH_MIS_PRED,
114         [QUADD_EVENT_HW_BUS_CYCLES] =
115                 QUADD_ARMV8_UNSUPPORTED_EVENT,
116
117         [QUADD_EVENT_HW_L1_DCACHE_READ_MISSES] =
118                 QUADD_ARMV8_HW_EVENT_L1_DCACHE_REFILL,
119         [QUADD_EVENT_HW_L1_DCACHE_WRITE_MISSES] =
120                 QUADD_ARMV8_HW_EVENT_L1_DCACHE_REFILL,
121         [QUADD_EVENT_HW_L1_ICACHE_MISSES] =
122                 QUADD_ARMV8_HW_EVENT_L1_ICACHE_REFILL,
123
124         [QUADD_EVENT_HW_L2_DCACHE_READ_MISSES] =
125                 QUADD_ARMV8_UNSUPPORTED_EVENT,
126         [QUADD_EVENT_HW_L2_DCACHE_WRITE_MISSES] =
127                 QUADD_ARMV8_UNSUPPORTED_EVENT,
128         [QUADD_EVENT_HW_L2_ICACHE_MISSES] =
129                 QUADD_ARMV8_UNSUPPORTED_EVENT,
130 };
131
132 /*********************************************************************/
133
134 static inline u32
135 armv8_pmu_pmcr_read(void)
136 {
137         u32 val;
138
139         /* Read Performance Monitors Control Register */
140         asm volatile("mrs %0, pmcr_el0" : "=r" (val));
141         return val;
142 }
143
144 static inline void
145 armv8_pmu_pmcr_write(u32 val)
146 {
147         isb();
148
149         /* Write Performance Monitors Control Register */
150         asm volatile("msr pmcr_el0, %0" : :
151                      "r" (val & QUADD_ARMV8_PMCR_WR_MASK));
152 }
153
154 static inline u32
155 armv8_pmu_pmceid_read(void)
156 {
157         u32 val;
158
159         /* Read Performance Monitors Common Event Identification Register */
160         asm volatile("mrs %0, pmceid0_el0" : "=r" (val));
161         return val;
162 }
163
164 static inline u32
165 armv8_pmu_pmcntenset_read(void)
166 {
167         u32 val;
168
169         /* Read Performance Monitors Count Enable Set Register */
170         asm volatile("mrs %0, pmcntenset_el0" : "=r" (val));
171         return val;
172 }
173
174 static inline void
175 armv8_pmu_pmcntenset_write(u32 val)
176 {
177         /* Write Performance Monitors Count Enable Set Register */
178         asm volatile("msr pmcntenset_el0, %0" : : "r" (val));
179 }
180
181 static inline void
182 armv8_pmu_pmcntenclr_write(u32 val)
183 {
184         /* Write Performance Monitors Count Enable Clear Register */
185         asm volatile("msr pmcntenclr_el0, %0" : : "r" (val));
186 }
187
188 static inline void
189 armv8_pmu_pmselr_write(u32 val)
190 {
191         /* Write Performance Monitors Event Counter Selection Register */
192         asm volatile("msr pmselr_el0, %0" : :
193                      "r" (val & QUADD_ARMV8_SELECT_MASK));
194         isb();
195 }
196
197 static inline u64
198 armv8_pmu_pmccntr_read(void)
199 {
200         u64 val;
201
202         /* Read Performance Monitors Cycle Count Register */
203         asm volatile("mrs %0, pmccntr_el0" : "=r" (val));
204         return val;
205 }
206
207 static inline void
208 armv8_pmu_pmccntr_write(u64 val)
209 {
210         /* Write Performance Monitors Selected Event Count Register */
211         asm volatile("msr pmccntr_el0, %0" : : "r" (val));
212 }
213
214 static inline u32
215 armv8_pmu_pmxevcntr_read(void)
216 {
217         u32 val;
218
219         /* Read Performance Monitors Selected Event Count Register */
220         asm volatile("mrs %0, pmxevcntr_el0" : "=r" (val));
221         return val;
222 }
223
224 static inline void
225 armv8_pmu_pmxevcntr_write(u32 val)
226 {
227         /* Write Performance Monitors Selected Event Count Register */
228         asm volatile("msr pmxevcntr_el0, %0" : : "r" (val));
229 }
230
231 static inline void
232 armv8_pmu_pmxevtyper_write(u32 event)
233 {
234         /* Write Performance Monitors Selected Event Type Register */
235         asm volatile("msr pmxevtyper_el0, %0" : :
236                      "r" (event & QUADD_ARMV8_EVTSEL_MASK));
237 }
238
239 static inline u32 __maybe_unused
240 armv8_pmu_pmintenset_read(void)
241 {
242         u32 val;
243
244         /* Read Performance Monitors Interrupt Enable Set Register */
245         asm volatile("mrs %0, pmintenset_el1" : "=r" (val));
246         return val;
247 }
248
249 static inline void __maybe_unused
250 armv8_pmu_pmintenset_write(u32 val)
251 {
252         /* Write Performance Monitors Interrupt Enable Set Register */
253         asm volatile("msr pmintenset_el1, %0" : : "r" (val));
254 }
255
256 static inline void __maybe_unused
257 armv8_pmu_pmintenclr_write(u32 val)
258 {
259         /* Write Performance Monitors Interrupt Enable Clear Register */
260         asm volatile("msr pmintenclr_el1, %0" : : "r" (val));
261 }
262
263 static inline u32 __maybe_unused
264 armv8_pmu_pmovsclr_read(void)
265 {
266         u32 val;
267
268         /* Read Performance Monitors Overflow Flag Status Register */
269         asm volatile("mrs %0, pmovsclr_el0" : "=r" (val));
270         return val;
271 }
272
273 static inline void
274 armv8_pmu_pmovsclr_write(int idx)
275 {
276         /* Write Performance Monitors Overflow Flag Status Register */
277         asm volatile("msr pmovsclr_el0, %0" : : "r" (BIT(idx)));
278 }
279
280 static inline u32
281 armv8_id_afr0_el1_read(void)
282 {
283         u32 val;
284
285         /* Read Auxiliary Feature Register 0 */
286         asm volatile("mrs %0, id_afr0_el1" : "=r" (val));
287         return val;
288 }
289
290 static void enable_counter(int idx)
291 {
292         armv8_pmu_pmcntenset_write(BIT(idx));
293 }
294
295 static void disable_counter(int idx)
296 {
297         armv8_pmu_pmcntenclr_write(BIT(idx));
298 }
299
300 static void select_counter(unsigned int counter)
301 {
302         armv8_pmu_pmselr_write(counter);
303 }
304
305 static int is_pmu_enabled(void)
306 {
307         struct quadd_pmu_ctx *local_pmu_ctx = &__get_cpu_var(pmu_ctx);
308         u32 pmcr = armv8_pmu_pmcr_read();
309
310         if (pmcr & QUADD_ARMV8_PMCR_E) {
311                 u32 pmcnten = armv8_pmu_pmcntenset_read();
312
313                 pmcnten &= local_pmu_ctx->counters_mask | QUADD_ARMV8_CCNT;
314                 return pmcnten ? 1 : 0;
315         }
316
317         return 0;
318 }
319
320 static u32 read_counter(int idx)
321 {
322         u32 val;
323
324         if (idx == QUADD_ARMV8_CCNT_BIT) {
325                 val = armv8_pmu_pmccntr_read();
326         } else {
327                 select_counter(idx);
328                 val = armv8_pmu_pmxevcntr_read();
329         }
330
331         return val;
332 }
333
334 static void write_counter(int idx, u32 value)
335 {
336         if (idx == QUADD_ARMV8_CCNT_BIT) {
337                 armv8_pmu_pmccntr_write(value);
338         } else {
339                 select_counter(idx);
340                 armv8_pmu_pmxevcntr_write(value);
341         }
342 }
343
344 static int
345 get_free_counters(unsigned long *bitmap, int nbits, int *ccntr)
346 {
347         int cc;
348         u32 cntens;
349         unsigned long cntens_bitmap;
350
351         struct quadd_pmu_ctx *local_pmu_ctx = &__get_cpu_var(pmu_ctx);
352
353         cntens = armv8_pmu_pmcntenset_read();
354         cntens = ~cntens & (local_pmu_ctx->counters_mask | QUADD_ARMV8_CCNT);
355
356         cntens_bitmap = cntens;
357
358         bitmap_zero(bitmap, nbits);
359         bitmap_copy(bitmap, &cntens_bitmap, BITS_PER_BYTE * sizeof(u32));
360
361         cc = (cntens & QUADD_ARMV8_CCNT) ? 1 : 0;
362
363         if (ccntr)
364                 *ccntr = cc;
365
366         return bitmap_weight(bitmap, BITS_PER_BYTE * sizeof(u32)) - cc;
367 }
368
369 static void __maybe_unused
370 disable_interrupt(int idx)
371 {
372         armv8_pmu_pmintenclr_write(BIT(idx));
373 }
374
375 static void
376 disable_all_interrupts(void)
377 {
378         struct quadd_pmu_ctx *local_pmu_ctx = &__get_cpu_var(pmu_ctx);
379         u32 val = QUADD_ARMV8_CCNT | local_pmu_ctx->counters_mask;
380
381         armv8_pmu_pmintenclr_write(val);
382 }
383
384 static void
385 reset_overflow_flags(void)
386 {
387         struct quadd_pmu_ctx *local_pmu_ctx = &__get_cpu_var(pmu_ctx);
388         u32 val = QUADD_ARMV8_CCNT | local_pmu_ctx->counters_mask;
389
390         armv8_pmu_pmovsclr_write(val);
391 }
392
393 static void
394 select_event(unsigned int idx, unsigned int event)
395 {
396         select_counter(idx);
397         armv8_pmu_pmxevtyper_write(event);
398 }
399
400 static void disable_all_counters(void)
401 {
402         u32 val;
403         u32 masked;
404         struct quadd_pmu_ctx *local_pmu_ctx = &__get_cpu_var(pmu_ctx);
405
406         /* Disable all counters */
407         val = armv8_pmu_pmcr_read();
408         if (val & QUADD_ARMV8_PMCR_E)
409                 armv8_pmu_pmcr_write(val & ~QUADD_ARMV8_PMCR_E);
410
411         masked = QUADD_ARMV8_CCNT | local_pmu_ctx->counters_mask;
412         armv8_pmu_pmcntenclr_write(masked);
413 }
414
415 static void enable_all_counters(void)
416 {
417         u32 val;
418
419         /* Enable all counters */
420         val = armv8_pmu_pmcr_read();
421         val |= QUADD_ARMV8_PMCR_E | QUADD_ARMV8_PMCR_X;
422         armv8_pmu_pmcr_write(val);
423 }
424
425 static void reset_all_counters(void)
426 {
427         u32 val;
428
429         val = armv8_pmu_pmcr_read();
430         val |= QUADD_ARMV8_PMCR_P | QUADD_ARMV8_PMCR_C;
431         armv8_pmu_pmcr_write(val);
432 }
433
434 static void quadd_init_pmu(void)
435 {
436         reset_overflow_flags();
437         disable_all_interrupts();
438 }
439
440 static int pmu_enable(void)
441 {
442         pr_info("pmu was reserved\n");
443         return 0;
444 }
445
446 static void __pmu_disable(void *arg)
447 {
448         struct quadd_pmu_info *pi = &__get_cpu_var(cpu_pmu_info);
449
450         if (!pi->is_already_active) {
451                 pr_info("[%d] reset all counters\n",
452                         smp_processor_id());
453
454                 disable_all_counters();
455                 reset_all_counters();
456         } else {
457                 int idx;
458
459                 for_each_set_bit(idx, pi->used_cntrs, QUADD_MAX_PMU_COUNTERS) {
460                         pr_info("[%d] reset counter: %d\n",
461                                 smp_processor_id(), idx);
462
463                         disable_counter(idx);
464                         write_counter(idx, 0);
465                 }
466         }
467 }
468
469 static void pmu_disable(void)
470 {
471         on_each_cpu(__pmu_disable, NULL, 1);
472         pr_info("pmu was released\n");
473 }
474
475 static void pmu_start(void)
476 {
477         int idx = 0, pcntrs, ccntr;
478         u32 event;
479         struct quadd_pmu_ctx *local_pmu_ctx;
480         DECLARE_BITMAP(free_bitmap, QUADD_MAX_PMU_COUNTERS);
481         struct quadd_pmu_info *pi = &__get_cpu_var(cpu_pmu_info);
482         u32 *prevp = pi->prev_vals;
483         struct quadd_pmu_event_info *ei;
484
485         bitmap_zero(pi->used_cntrs, QUADD_MAX_PMU_COUNTERS);
486
487         if (is_pmu_enabled()) {
488                 pi->is_already_active = 1;
489         } else {
490                 disable_all_counters();
491                 quadd_init_pmu();
492
493                 pi->is_already_active = 0;
494         }
495
496         pcntrs = get_free_counters(free_bitmap, QUADD_MAX_PMU_COUNTERS, &ccntr);
497
498         local_pmu_ctx = &__get_cpu_var(pmu_ctx);
499         list_for_each_entry(ei, &local_pmu_ctx->used_events, list) {
500                 int index;
501
502                 *prevp++ = 0;
503
504                 event = ei->hw_value;
505
506                 if (is_cpu_cycles(&ei->event)) {
507                         if (!ccntr) {
508                                 pr_err_once("Error: cpu cycles counter is already occupied\n");
509                                 return;
510                         }
511                         index = QUADD_ARMV8_CCNT_BIT;
512                 } else {
513                         if (!pcntrs--) {
514                                 pr_err_once("Error: too many performance events\n");
515                                 return;
516                         }
517
518                         index = find_next_bit(free_bitmap,
519                                               QUADD_MAX_PMU_COUNTERS, idx);
520                         if (index >= QUADD_MAX_PMU_COUNTERS) {
521                                 pr_err_once("Error: too many events\n");
522                                 return;
523                         }
524                         idx = index + 1;
525                         select_event(index, event);
526                 }
527                 set_bit(index, pi->used_cntrs);
528
529                 write_counter(index, 0);
530                 enable_counter(index);
531         }
532
533         if (!pi->is_already_active) {
534                 reset_all_counters();
535                 enable_all_counters();
536         }
537
538         qm_debug_start_source(QUADD_EVENT_SOURCE_PMU);
539 }
540
541 static void pmu_stop(void)
542 {
543         int idx;
544         struct quadd_pmu_info *pi = &__get_cpu_var(cpu_pmu_info);
545
546         if (!pi->is_already_active) {
547                 disable_all_counters();
548                 reset_all_counters();
549         } else {
550                 for_each_set_bit(idx, pi->used_cntrs, QUADD_MAX_PMU_COUNTERS) {
551                         disable_counter(idx);
552                         write_counter(idx, 0);
553                 }
554         }
555
556         qm_debug_stop_source(QUADD_EVENT_SOURCE_PMU);
557 }
558
559 static int __maybe_unused
560 pmu_read(struct event_data *events, int max_events)
561 {
562         u32 val;
563         int idx = 0, i = 0;
564         struct quadd_pmu_info *pi = &__get_cpu_var(cpu_pmu_info);
565         struct quadd_pmu_ctx *local_pmu_ctx = &__get_cpu_var(pmu_ctx);
566         u32 *prevp = pi->prev_vals;
567         struct quadd_pmu_event_info *ei;
568
569         if (bitmap_empty(pi->used_cntrs, QUADD_MAX_PMU_COUNTERS)) {
570                 pr_err_once("Error: counters were not initialized\n");
571                 return 0;
572         }
573
574         list_for_each_entry(ei, &local_pmu_ctx->used_events, list) {
575                 int index;
576
577                 if (is_cpu_cycles(&ei->event)) {
578                         if (!test_bit(QUADD_ARMV8_CCNT_BIT, pi->used_cntrs)) {
579                                 pr_err_once("Error: ccntr is not used\n");
580                                 return 0;
581                         }
582                         index = QUADD_ARMV8_CCNT_BIT;
583                 } else {
584                         index = find_next_bit(pi->used_cntrs,
585                                               QUADD_MAX_PMU_COUNTERS, idx);
586                         idx = index + 1;
587
588                         if (index >= QUADD_MAX_PMU_COUNTERS) {
589                                 pr_err_once("Error: perf counter is not used\n");
590                                 return 0;
591                         }
592                 }
593
594                 val = read_counter(index);
595
596                 events->event_source = QUADD_EVENT_SOURCE_PMU;
597                 events->event = ei->event;
598
599                 events->val = val;
600                 events->prev_val = *prevp;
601
602                 *prevp = val;
603
604                 qm_debug_read_counter(&events->event, events->prev_val,
605                                       events->val);
606
607                 if (++i >= max_events)
608                         break;
609
610                 events++;
611                 prevp++;
612         }
613
614         return i;
615 }
616
617 static int __maybe_unused
618 pmu_read_emulate(struct event_data *events, int max_events)
619 {
620         int i = 0;
621         static u32 val = 100;
622         struct quadd_pmu_info *pi = &__get_cpu_var(cpu_pmu_info);
623         u32 *prevp = pi->prev_vals;
624         struct quadd_pmu_event_info *ei;
625
626         struct quadd_pmu_ctx *local_pmu_ctx = &__get_cpu_var(pmu_ctx);
627
628         list_for_each_entry(ei, &local_pmu_ctx->used_events, list) {
629                 if (val > 200)
630                         val = 100;
631
632                 events->event.id = *prevp;
633                 events->val = val;
634
635                 *prevp = val;
636                 val += 5;
637
638                 if (++i >= max_events)
639                         break;
640
641                 events++;
642                 prevp++;
643         }
644
645         return i;
646 }
647
648 static void __get_free_counters(void *arg)
649 {
650         int pcntrs, ccntr;
651         DECLARE_BITMAP(free_bitmap, QUADD_MAX_PMU_COUNTERS);
652         struct quadd_cntrs_info *ci = arg;
653
654         pcntrs = get_free_counters(free_bitmap, QUADD_MAX_PMU_COUNTERS, &ccntr);
655
656         spin_lock(&ci->lock);
657
658         ci->pcntrs = min_t(int, pcntrs, ci->pcntrs);
659
660         if (!ccntr)
661                 ci->ccntr = 0;
662
663         pr_info("[%d] pcntrs/ccntr: %d/%d, free_bitmap: %#lx\n",
664                 smp_processor_id(), pcntrs, ccntr, free_bitmap[0]);
665
666         spin_unlock(&ci->lock);
667 }
668
669 static void free_events(struct list_head *head)
670 {
671         struct quadd_pmu_event_info *entry, *next;
672
673         list_for_each_entry_safe(entry, next, head, list) {
674                 list_del(&entry->list);
675                 kfree(entry);
676         }
677 }
678
679 static int
680 set_events(int cpuid, const struct quadd_event *events, int size)
681 {
682         int i, free_pcntrs, err;
683         struct quadd_cntrs_info free_ci;
684         struct quadd_pmu_ctx *local_pmu_ctx = &per_cpu(pmu_ctx, cpuid);
685
686         free_events(&local_pmu_ctx->used_events);
687
688         if (!events || !size)
689                 return 0;
690
691         if (!local_pmu_ctx->current_map) {
692                 pr_err("Invalid current_map\n");
693                 return -ENODEV;
694         }
695
696         spin_lock_init(&free_ci.lock);
697         free_ci.pcntrs = QUADD_MAX_PMU_COUNTERS;
698         free_ci.ccntr = 1;
699
700         smp_call_function_single(cpuid, __get_free_counters, &free_ci, 1);
701
702         free_pcntrs = free_ci.pcntrs;
703         pr_info("free counters: pcntrs/ccntr: %d/%d\n",
704                 free_pcntrs, free_ci.ccntr);
705
706         for (i = 0; i < size; i++) {
707                 unsigned int type, id;
708                 struct quadd_pmu_event_info *ei;
709
710                 type = events[i].type;
711                 id = events[i].id;
712
713                 if (type == QUADD_EVENT_TYPE_HARDWARE) {
714                         if (id >= QUADD_EVENT_HW_MAX) {
715                                 err = -EINVAL;
716                                 goto out_free;
717                         }
718                 } else if (type == QUADD_EVENT_TYPE_RAW) {
719                         if (id & ~local_pmu_ctx->raw_event_mask) {
720                                 err = -EINVAL;
721                                 goto out_free;
722                         }
723                 } else {
724                         err = -EINVAL;
725                         goto out_free;
726                 }
727
728                 ei = kzalloc(sizeof(*ei), GFP_KERNEL);
729                 if (!ei) {
730                         err = -ENOMEM;
731                         goto out_free;
732                 }
733
734                 INIT_LIST_HEAD(&ei->list);
735                 list_add_tail(&ei->list, &local_pmu_ctx->used_events);
736
737                 if (is_cpu_cycles(&events[i])) {
738                         ei->hw_value = QUADD_ARMV8_CPU_CYCLE_EVENT;
739                         if (!free_ci.ccntr) {
740                                 pr_err("error: cpu cycles counter is already occupied\n");
741                                 err = -EBUSY;
742                                 goto out_free;
743                         }
744                 } else {
745                         if (!free_pcntrs--) {
746                                 pr_err("error: too many performance events\n");
747                                 err = -ENOSPC;
748                                 goto out_free;
749                         }
750
751                         ei->hw_value = (type == QUADD_EVENT_TYPE_RAW) ? id :
752                                 local_pmu_ctx->current_map[id];
753                 }
754
755                 ei->event = events[i];
756
757                 pr_info("[%d] Event has been added: id: %#x (%s), hw value: %#x\n",
758                         cpuid, id, type == QUADD_EVENT_TYPE_RAW ? "raw" : "hw",
759                         ei->hw_value);
760         }
761
762         return 0;
763
764 out_free:
765         free_events(&local_pmu_ctx->used_events);
766         return err;
767 }
768
769 static int
770 get_supported_events(int cpuid, struct quadd_event *events,
771                      int max_events, unsigned int *raw_event_mask)
772 {
773         int i, nr_events = 0;
774
775         struct quadd_pmu_ctx *local_pmu_ctx = &per_cpu(pmu_ctx, cpuid);
776
777         if (!local_pmu_ctx->current_map)
778                 return 0;
779
780         max_events = min_t(int, QUADD_EVENT_HW_MAX, max_events);
781
782         for (i = 0; i < max_events; i++) {
783                 unsigned int event = local_pmu_ctx->current_map[i];
784
785                 if (event != QUADD_ARMV8_UNSUPPORTED_EVENT) {
786                         events[nr_events].type = QUADD_EVENT_TYPE_HARDWARE;
787                         events[nr_events].id = i;
788
789                         nr_events++;
790                 }
791         }
792
793         *raw_event_mask = local_pmu_ctx->raw_event_mask;
794
795         return nr_events;
796 }
797
798 static int
799 get_current_events(int cpuid, struct quadd_event *events, int max_events)
800 {
801         int i = 0;
802         struct quadd_pmu_event_info *ei;
803         struct quadd_pmu_ctx *local_pmu_ctx = &per_cpu(pmu_ctx, cpuid);
804
805         list_for_each_entry(ei, &local_pmu_ctx->used_events, list) {
806                 events[i++] = ei->event;
807
808                 if (i >= max_events)
809                         break;
810         }
811
812         return i;
813 }
814
815 static struct quadd_arch_info *get_arch(int cpuid)
816 {
817         struct quadd_pmu_ctx *local_pmu_ctx = &per_cpu(pmu_ctx, cpuid);
818
819         return local_pmu_ctx->current_map ? &local_pmu_ctx->arch : NULL;
820 }
821
822 static struct quadd_event_source_interface pmu_armv8_int = {
823         .enable                 = pmu_enable,
824         .disable                = pmu_disable,
825
826         .start                  = pmu_start,
827         .stop                   = pmu_stop,
828
829 #ifndef QUADD_USE_EMULATE_COUNTERS
830         .read                   = pmu_read,
831 #else
832         .read                   = pmu_read_emulate,
833 #endif
834         .set_events             = set_events,
835         .get_supported_events   = get_supported_events,
836         .get_current_events     = get_current_events,
837         .get_arch               = get_arch,
838 };
839
840 static int quadd_armv8_pmu_init_for_cpu(int cpuid)
841 {
842         int idx, err = 0;
843         u32 pmcr, ext_ver, idcode = 0;
844         u64 aa64_dfr;
845         u8 implementer;
846         struct cpuinfo_arm64 *local_cpu_data = &per_cpu(cpu_data, cpuid);
847         struct quadd_pmu_ctx *local_pmu_ctx = &per_cpu(pmu_ctx, cpuid);
848         u32 reg_midr = local_cpu_data->reg_midr;
849
850         strncpy(local_pmu_ctx->arch.name, "Unknown",
851                         sizeof(local_pmu_ctx->arch.name));
852
853         local_pmu_ctx->arch.type = QUADD_AA64_CPU_TYPE_UNKNOWN;
854         local_pmu_ctx->arch.ver = 0;
855         local_pmu_ctx->current_map = NULL;
856
857         INIT_LIST_HEAD(&local_pmu_ctx->used_events);
858
859         if (!reg_midr)
860                 return 0;
861
862         implementer = (reg_midr >> 24) & 0xFF;
863
864         aa64_dfr = read_cpuid(ID_AA64DFR0_EL1);
865         aa64_dfr = (aa64_dfr >> 8) & 0x0f;
866
867         if (aa64_dfr != QUADD_AA64_PMUVER_PMUV3)
868                 err = 1;
869
870         if (err == 0 && (implementer == 'A' || implementer == 'N')) {
871
872                 strncpy(local_pmu_ctx->arch.name, "AA64 PmuV3",
873                                 sizeof(local_pmu_ctx->arch.name));
874
875                 idx = sizeof(local_pmu_ctx->arch.name) - 1;
876                 local_pmu_ctx->arch.name[idx] = '\0';
877
878                 local_pmu_ctx->counters_mask =
879                         QUADD_ARMV8_COUNTERS_MASK_PMUV3;
880                 local_pmu_ctx->raw_event_mask =
881                         QUADD_ARMV8_EVTSEL_MASK;
882                 local_pmu_ctx->current_map =
883                         quadd_armv8_pmuv3_arm_events_map;
884
885                 pmcr = armv8_pmu_pmcr_read();
886
887                 idcode = (pmcr >> QUADD_ARMV8_PMCR_IDCODE_SHIFT) &
888                         QUADD_ARMV8_PMCR_IDCODE_MASK;
889
890                 pr_info("imp: %#x, idcode: %#x\n", implementer, idcode);
891         }
892
893         if (err == 0) {
894                 switch (implementer) {
895                 case 'A':
896                         strncat(local_pmu_ctx->arch.name, " ARM",
897                                 sizeof(local_pmu_ctx->arch.name) -
898                                 strlen(local_pmu_ctx->arch.name));
899                         idx = sizeof(local_pmu_ctx->arch.name) - 1;
900                         local_pmu_ctx->arch.name[idx] = '\0';
901
902                         if (idcode == QUADD_AA64_CPU_IDCODE_CORTEX_A53) {
903                                 local_pmu_ctx->arch.type =
904                                         QUADD_AA64_CPU_TYPE_CORTEX_A53;
905
906                                 strncat(local_pmu_ctx->arch.name, " CORTEX-A53",
907                                         sizeof(local_pmu_ctx->arch.name) -
908                                         strlen(local_pmu_ctx->arch.name));
909
910                         } else if (idcode == QUADD_AA64_CPU_IDCODE_CORTEX_A57) {
911                                 local_pmu_ctx->arch.type =
912                                         QUADD_AA64_CPU_TYPE_CORTEX_A57;
913                                 local_pmu_ctx->current_map =
914                                         quadd_armv8_pmuv3_a57_events_map;
915
916                                 strncat(local_pmu_ctx->arch.name, " CORTEX-A57",
917                                         sizeof(local_pmu_ctx->arch.name) -
918                                         strlen(local_pmu_ctx->arch.name));
919                         } else {
920                                 local_pmu_ctx->arch.type =
921                                         QUADD_AA64_CPU_TYPE_ARM;
922                         }
923                         break;
924                 case 'N':
925                         ext_ver = armv8_id_afr0_el1_read();
926
927                         ext_ver = (ext_ver >> QUADD_ARMV8_PMU_NVEXT_SHIFT) &
928                                 QUADD_ARMV8_PMU_NVEXT_MASK;
929
930                         strncat(local_pmu_ctx->arch.name, " NVIDIA (Denver)",
931                                 sizeof(local_pmu_ctx->arch.name) -
932                                 strlen(local_pmu_ctx->arch.name));
933                         local_pmu_ctx->arch.type = QUADD_AA64_CPU_TYPE_DENVER;
934                         local_pmu_ctx->arch.ver = ext_ver;
935                         local_pmu_ctx->current_map =
936                                 quadd_armv8_pmuv3_denver_events_map;
937                         break;
938                 default:
939                         strncat(local_pmu_ctx->arch.name,
940                                 " Unknown implementor code",
941                                 sizeof(local_pmu_ctx->arch.name) -
942                                 strlen(local_pmu_ctx->arch.name));
943                         local_pmu_ctx->arch.type =
944                                 QUADD_AA64_CPU_TYPE_UNKNOWN_IMP;
945                         err = 1;
946                         break;
947                 }
948         }
949
950         local_pmu_ctx->arch.name[sizeof(local_pmu_ctx->arch.name) - 1] = '\0';
951         pr_info("[%d] arch: %s, type: %d, ver: %d\n",
952                 cpuid, local_pmu_ctx->arch.name, local_pmu_ctx->arch.type,
953                 local_pmu_ctx->arch.ver);
954
955         return err;
956 }
957
958 struct quadd_event_source_interface *quadd_armv8_pmu_init(void)
959 {
960         struct quadd_event_source_interface *pmu = NULL;
961         int cpuid;
962         int err;
963         int initialized = 1;
964
965         for_each_possible_cpu(cpuid) {
966                 err = quadd_armv8_pmu_init_for_cpu(cpuid);
967                 if (err) {
968                         initialized = 0;
969                         break;
970                 }
971         }
972
973         if (initialized == 1)
974                 pmu = &pmu_armv8_int;
975         else
976                 pr_err("error: incorrect PMUVer\n");
977
978         return pmu;
979 }
980
981 void quadd_armv8_pmu_deinit(void)
982 {
983         int cpu_id;
984
985         for_each_possible_cpu(cpu_id) {
986                 struct quadd_pmu_ctx *local_pmu_ctx = &per_cpu(pmu_ctx, cpu_id);
987
988                 if (local_pmu_ctx->current_map)
989                         free_events(&local_pmu_ctx->used_events);
990         }
991 }