f4645a2b52c664333de37b41ee68e1cc73a33f1a
[linux-3.10.git] / drivers / misc / tegra-profiler / armv8_pmu.c
1 /*
2  * drivers/misc/tegra-profiler/armv8_pmu.c
3  *
4  * Copyright (c) 2014-2017, NVIDIA CORPORATION.  All rights reserved.
5  *
6  * This program is free software; you can redistribute it and/or modify it
7  * under the terms and conditions of the GNU General Public License,
8  * version 2, as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope it will be useful, but WITHOUT
11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13  * more details.
14  *
15  */
16
17 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
18
19 #include <linux/printk.h>
20 #include <linux/types.h>
21 #include <linux/string.h>
22
23 #include <linux/version.h>
24 #include <linux/err.h>
25 #include <linux/bitmap.h>
26 #include <linux/slab.h>
27
28 #include <asm/cputype.h>
29 #include <asm/cpu.h>
30
31 #include "arm_pmu.h"
32 #include "armv8_pmu.h"
33 #include "armv8_events.h"
34 #include "quadd.h"
35 #include "debug.h"
36
37 struct quadd_pmu_info {
38         DECLARE_BITMAP(used_cntrs, QUADD_MAX_PMU_COUNTERS);
39         u32 prev_vals[QUADD_MAX_PMU_COUNTERS];
40         int is_already_active;
41 };
42
43 struct quadd_cntrs_info {
44         int pcntrs;
45         int ccntr;
46
47         spinlock_t lock;
48 };
49
50 static DEFINE_PER_CPU(struct quadd_pmu_info, cpu_pmu_info);
51
52 static DEFINE_PER_CPU(struct quadd_pmu_ctx, pmu_ctx);
53
54 static unsigned
55 quadd_armv8_pmuv3_arm_events_map[QUADD_EVENT_TYPE_MAX] = {
56         [QUADD_EVENT_TYPE_INSTRUCTIONS] =
57                 QUADD_ARMV8_HW_EVENT_INSTR_EXECUTED,
58         [QUADD_EVENT_TYPE_BRANCH_INSTRUCTIONS] =
59                 QUADD_ARMV8_UNSUPPORTED_EVENT,
60         [QUADD_EVENT_TYPE_BRANCH_MISSES] =
61                 QUADD_ARMV8_HW_EVENT_PC_BRANCH_MIS_PRED,
62         [QUADD_EVENT_TYPE_BUS_CYCLES] =
63                 QUADD_ARMV8_UNSUPPORTED_EVENT,
64
65         [QUADD_EVENT_TYPE_L1_DCACHE_READ_MISSES] =
66                 QUADD_ARMV8_HW_EVENT_L1_DCACHE_REFILL,
67         [QUADD_EVENT_TYPE_L1_DCACHE_WRITE_MISSES] =
68                 QUADD_ARMV8_HW_EVENT_L1_DCACHE_REFILL,
69         [QUADD_EVENT_TYPE_L1_ICACHE_MISSES] =
70                 QUADD_ARMV8_HW_EVENT_L1_ICACHE_REFILL,
71
72         [QUADD_EVENT_TYPE_L2_DCACHE_READ_MISSES] =
73                 QUADD_ARMV8_HW_EVENT_L2_CACHE_REFILL,
74         [QUADD_EVENT_TYPE_L2_DCACHE_WRITE_MISSES] =
75                 QUADD_ARMV8_HW_EVENT_L2_CACHE_REFILL,
76         [QUADD_EVENT_TYPE_L2_ICACHE_MISSES] =
77                 QUADD_ARMV8_UNSUPPORTED_EVENT,
78 };
79
80 static unsigned
81 quadd_armv8_pmuv3_a57_events_map[QUADD_EVENT_TYPE_MAX] = {
82         [QUADD_EVENT_TYPE_INSTRUCTIONS] =
83                 QUADD_ARMV8_HW_EVENT_INSTR_EXECUTED,
84         [QUADD_EVENT_TYPE_BRANCH_INSTRUCTIONS] =
85                 QUADD_ARMV8_UNSUPPORTED_EVENT,
86         [QUADD_EVENT_TYPE_BRANCH_MISSES] =
87                 QUADD_ARMV8_HW_EVENT_PC_BRANCH_MIS_PRED,
88         [QUADD_EVENT_TYPE_BUS_CYCLES] =
89                 QUADD_ARMV8_UNSUPPORTED_EVENT,
90
91         [QUADD_EVENT_TYPE_L1_DCACHE_READ_MISSES] =
92                 QUADD_ARMV8_A57_HW_EVENT_L1D_CACHE_REFILL_LD,
93         [QUADD_EVENT_TYPE_L1_DCACHE_WRITE_MISSES] =
94                 QUADD_ARMV8_A57_HW_EVENT_L1D_CACHE_REFILL_ST,
95         [QUADD_EVENT_TYPE_L1_ICACHE_MISSES] =
96                 QUADD_ARMV8_HW_EVENT_L1_ICACHE_REFILL,
97
98         [QUADD_EVENT_TYPE_L2_DCACHE_READ_MISSES] =
99                 QUADD_ARMV8_A57_HW_EVENT_L2D_CACHE_REFILL_LD,
100         [QUADD_EVENT_TYPE_L2_DCACHE_WRITE_MISSES] =
101                 QUADD_ARMV8_A57_HW_EVENT_L2D_CACHE_REFILL_ST,
102         [QUADD_EVENT_TYPE_L2_ICACHE_MISSES] =
103                 QUADD_ARMV8_UNSUPPORTED_EVENT,
104 };
105
106 static unsigned
107 quadd_armv8_pmuv3_denver_events_map[QUADD_EVENT_TYPE_MAX] = {
108         [QUADD_EVENT_TYPE_INSTRUCTIONS] =
109                 QUADD_ARMV8_HW_EVENT_INSTR_EXECUTED,
110         [QUADD_EVENT_TYPE_BRANCH_INSTRUCTIONS] =
111                 QUADD_ARMV8_UNSUPPORTED_EVENT,
112         [QUADD_EVENT_TYPE_BRANCH_MISSES] =
113                 QUADD_ARMV8_HW_EVENT_PC_BRANCH_MIS_PRED,
114         [QUADD_EVENT_TYPE_BUS_CYCLES] =
115                 QUADD_ARMV8_UNSUPPORTED_EVENT,
116
117         [QUADD_EVENT_TYPE_L1_DCACHE_READ_MISSES] =
118                 QUADD_ARMV8_HW_EVENT_L1_DCACHE_REFILL,
119         [QUADD_EVENT_TYPE_L1_DCACHE_WRITE_MISSES] =
120                 QUADD_ARMV8_HW_EVENT_L1_DCACHE_REFILL,
121         [QUADD_EVENT_TYPE_L1_ICACHE_MISSES] =
122                 QUADD_ARMV8_HW_EVENT_L1_ICACHE_REFILL,
123
124         [QUADD_EVENT_TYPE_L2_DCACHE_READ_MISSES] =
125                 QUADD_ARMV8_UNSUPPORTED_EVENT,
126         [QUADD_EVENT_TYPE_L2_DCACHE_WRITE_MISSES] =
127                 QUADD_ARMV8_UNSUPPORTED_EVENT,
128         [QUADD_EVENT_TYPE_L2_ICACHE_MISSES] =
129                 QUADD_ARMV8_UNSUPPORTED_EVENT,
130 };
131
132 /*********************************************************************/
133
134 static inline u32
135 armv8_pmu_pmcr_read(void)
136 {
137         u32 val;
138
139         /* Read Performance Monitors Control Register */
140         asm volatile("mrs %0, pmcr_el0" : "=r" (val));
141         return val;
142 }
143
144 static inline void
145 armv8_pmu_pmcr_write(u32 val)
146 {
147         isb();
148
149         /* Write Performance Monitors Control Register */
150         asm volatile("msr pmcr_el0, %0" : :
151                      "r" (val & QUADD_ARMV8_PMCR_WR_MASK));
152 }
153
154 static inline u32
155 armv8_pmu_pmceid_read(void)
156 {
157         u32 val;
158
159         /* Read Performance Monitors Common Event Identification Register */
160         asm volatile("mrs %0, pmceid0_el0" : "=r" (val));
161         return val;
162 }
163
164 static inline u32
165 armv8_pmu_pmcntenset_read(void)
166 {
167         u32 val;
168
169         /* Read Performance Monitors Count Enable Set Register */
170         asm volatile("mrs %0, pmcntenset_el0" : "=r" (val));
171         return val;
172 }
173
174 static inline void
175 armv8_pmu_pmcntenset_write(u32 val)
176 {
177         /* Write Performance Monitors Count Enable Set Register */
178         asm volatile("msr pmcntenset_el0, %0" : : "r" (val));
179 }
180
181 static inline void
182 armv8_pmu_pmcntenclr_write(u32 val)
183 {
184         /* Write Performance Monitors Count Enable Clear Register */
185         asm volatile("msr pmcntenclr_el0, %0" : : "r" (val));
186 }
187
188 static inline void
189 armv8_pmu_pmselr_write(u32 val)
190 {
191         /* Write Performance Monitors Event Counter Selection Register */
192         asm volatile("msr pmselr_el0, %0" : :
193                      "r" (val & QUADD_ARMV8_SELECT_MASK));
194         isb();
195 }
196
197 static inline u64
198 armv8_pmu_pmccntr_read(void)
199 {
200         u64 val;
201
202         /* Read Performance Monitors Cycle Count Register */
203         asm volatile("mrs %0, pmccntr_el0" : "=r" (val));
204         return val;
205 }
206
207 static inline void
208 armv8_pmu_pmccntr_write(u64 val)
209 {
210         /* Write Performance Monitors Selected Event Count Register */
211         asm volatile("msr pmccntr_el0, %0" : : "r" (val));
212 }
213
214 static inline u32
215 armv8_pmu_pmxevcntr_read(void)
216 {
217         u32 val;
218
219         /* Read Performance Monitors Selected Event Count Register */
220         asm volatile("mrs %0, pmxevcntr_el0" : "=r" (val));
221         return val;
222 }
223
224 static inline void
225 armv8_pmu_pmxevcntr_write(u32 val)
226 {
227         /* Write Performance Monitors Selected Event Count Register */
228         asm volatile("msr pmxevcntr_el0, %0" : : "r" (val));
229 }
230
231 static inline void
232 armv8_pmu_pmxevtyper_write(u32 event)
233 {
234         /* Write Performance Monitors Selected Event Type Register */
235         asm volatile("msr pmxevtyper_el0, %0" : :
236                      "r" (event & QUADD_ARMV8_EVTSEL_MASK));
237 }
238
239 static inline u32 __maybe_unused
240 armv8_pmu_pmintenset_read(void)
241 {
242         u32 val;
243
244         /* Read Performance Monitors Interrupt Enable Set Register */
245         asm volatile("mrs %0, pmintenset_el1" : "=r" (val));
246         return val;
247 }
248
249 static inline void __maybe_unused
250 armv8_pmu_pmintenset_write(u32 val)
251 {
252         /* Write Performance Monitors Interrupt Enable Set Register */
253         asm volatile("msr pmintenset_el1, %0" : : "r" (val));
254 }
255
256 static inline void __maybe_unused
257 armv8_pmu_pmintenclr_write(u32 val)
258 {
259         /* Write Performance Monitors Interrupt Enable Clear Register */
260         asm volatile("msr pmintenclr_el1, %0" : : "r" (val));
261 }
262
263 static inline u32 __maybe_unused
264 armv8_pmu_pmovsclr_read(void)
265 {
266         u32 val;
267
268         /* Read Performance Monitors Overflow Flag Status Register */
269         asm volatile("mrs %0, pmovsclr_el0" : "=r" (val));
270         return val;
271 }
272
273 static inline void
274 armv8_pmu_pmovsclr_write(int idx)
275 {
276         /* Write Performance Monitors Overflow Flag Status Register */
277         asm volatile("msr pmovsclr_el0, %0" : : "r" (BIT(idx)));
278 }
279
280 static inline u32
281 armv8_id_afr0_el1_read(void)
282 {
283         u32 val;
284
285         /* Read Auxiliary Feature Register 0 */
286         asm volatile("mrs %0, id_afr0_el1" : "=r" (val));
287         return val;
288 }
289
290 static void enable_counter(int idx)
291 {
292         armv8_pmu_pmcntenset_write(BIT(idx));
293 }
294
295 static void disable_counter(int idx)
296 {
297         armv8_pmu_pmcntenclr_write(BIT(idx));
298 }
299
300 static void select_counter(unsigned int counter)
301 {
302         armv8_pmu_pmselr_write(counter);
303 }
304
305 static int is_pmu_enabled(void)
306 {
307         struct quadd_pmu_ctx *local_pmu_ctx = &__get_cpu_var(pmu_ctx);
308         u32 pmcr = armv8_pmu_pmcr_read();
309
310         if (pmcr & QUADD_ARMV8_PMCR_E) {
311                 u32 pmcnten = armv8_pmu_pmcntenset_read();
312
313                 pmcnten &= local_pmu_ctx->counters_mask | QUADD_ARMV8_CCNT;
314                 return pmcnten ? 1 : 0;
315         }
316
317         return 0;
318 }
319
320 static u32 read_counter(int idx)
321 {
322         u32 val;
323
324         if (idx == QUADD_ARMV8_CCNT_BIT) {
325                 val = armv8_pmu_pmccntr_read();
326         } else {
327                 select_counter(idx);
328                 val = armv8_pmu_pmxevcntr_read();
329         }
330
331         return val;
332 }
333
334 static void write_counter(int idx, u32 value)
335 {
336         if (idx == QUADD_ARMV8_CCNT_BIT) {
337                 armv8_pmu_pmccntr_write(value);
338         } else {
339                 select_counter(idx);
340                 armv8_pmu_pmxevcntr_write(value);
341         }
342 }
343
344 static int
345 get_free_counters(unsigned long *bitmap, int nbits, int *ccntr)
346 {
347         int cc;
348         u32 cntens;
349         unsigned long cntens_bitmap;
350
351         struct quadd_pmu_ctx *local_pmu_ctx = &__get_cpu_var(pmu_ctx);
352
353         cntens = armv8_pmu_pmcntenset_read();
354         cntens = ~cntens & (local_pmu_ctx->counters_mask | QUADD_ARMV8_CCNT);
355
356         cntens_bitmap = cntens;
357
358         bitmap_zero(bitmap, nbits);
359         bitmap_copy(bitmap, &cntens_bitmap, BITS_PER_BYTE * sizeof(u32));
360
361         cc = (cntens & QUADD_ARMV8_CCNT) ? 1 : 0;
362
363         if (ccntr)
364                 *ccntr = cc;
365
366         return bitmap_weight(bitmap, BITS_PER_BYTE * sizeof(u32)) - cc;
367 }
368
369 static void __maybe_unused
370 disable_interrupt(int idx)
371 {
372         armv8_pmu_pmintenclr_write(BIT(idx));
373 }
374
375 static void
376 disable_all_interrupts(void)
377 {
378         struct quadd_pmu_ctx *local_pmu_ctx = &__get_cpu_var(pmu_ctx);
379         u32 val = QUADD_ARMV8_CCNT | local_pmu_ctx->counters_mask;
380
381         armv8_pmu_pmintenclr_write(val);
382 }
383
384 static void
385 reset_overflow_flags(void)
386 {
387         struct quadd_pmu_ctx *local_pmu_ctx = &__get_cpu_var(pmu_ctx);
388         u32 val = QUADD_ARMV8_CCNT | local_pmu_ctx->counters_mask;
389
390         armv8_pmu_pmovsclr_write(val);
391 }
392
393 static void
394 select_event(unsigned int idx, unsigned int event)
395 {
396         select_counter(idx);
397         armv8_pmu_pmxevtyper_write(event);
398 }
399
400 static void disable_all_counters(void)
401 {
402         u32 val;
403         u32 masked;
404         struct quadd_pmu_ctx *local_pmu_ctx = &__get_cpu_var(pmu_ctx);
405
406         /* Disable all counters */
407         val = armv8_pmu_pmcr_read();
408         if (val & QUADD_ARMV8_PMCR_E)
409                 armv8_pmu_pmcr_write(val & ~QUADD_ARMV8_PMCR_E);
410
411         masked = QUADD_ARMV8_CCNT | local_pmu_ctx->counters_mask;
412         armv8_pmu_pmcntenclr_write(masked);
413 }
414
415 static void enable_all_counters(void)
416 {
417         u32 val;
418
419         /* Enable all counters */
420         val = armv8_pmu_pmcr_read();
421         val |= QUADD_ARMV8_PMCR_E | QUADD_ARMV8_PMCR_X;
422         armv8_pmu_pmcr_write(val);
423 }
424
425 static void reset_all_counters(void)
426 {
427         u32 val;
428
429         val = armv8_pmu_pmcr_read();
430         val |= QUADD_ARMV8_PMCR_P | QUADD_ARMV8_PMCR_C;
431         armv8_pmu_pmcr_write(val);
432 }
433
434 static void quadd_init_pmu(void)
435 {
436         reset_overflow_flags();
437         disable_all_interrupts();
438 }
439
440 static int pmu_enable(void)
441 {
442         pr_info("pmu was reserved\n");
443         return 0;
444 }
445
446 static void __pmu_disable(void *arg)
447 {
448         struct quadd_pmu_info *pi = &__get_cpu_var(cpu_pmu_info);
449
450         if (!pi->is_already_active) {
451                 pr_info("[%d] reset all counters\n",
452                         smp_processor_id());
453
454                 disable_all_counters();
455                 reset_all_counters();
456         } else {
457                 int idx;
458
459                 for_each_set_bit(idx, pi->used_cntrs, QUADD_MAX_PMU_COUNTERS) {
460                         pr_info("[%d] reset counter: %d\n",
461                                 smp_processor_id(), idx);
462
463                         disable_counter(idx);
464                         write_counter(idx, 0);
465                 }
466         }
467 }
468
469 static void pmu_disable(void)
470 {
471         on_each_cpu(__pmu_disable, NULL, 1);
472         pr_info("pmu was released\n");
473 }
474
475 static void pmu_start(void)
476 {
477         int idx = 0, pcntrs, ccntr;
478         u32 event;
479         struct quadd_pmu_ctx *local_pmu_ctx;
480         DECLARE_BITMAP(free_bitmap, QUADD_MAX_PMU_COUNTERS);
481         struct quadd_pmu_info *pi = &__get_cpu_var(cpu_pmu_info);
482         u32 *prevp = pi->prev_vals;
483         struct quadd_pmu_event_info *ei;
484
485         bitmap_zero(pi->used_cntrs, QUADD_MAX_PMU_COUNTERS);
486
487         if (is_pmu_enabled()) {
488                 pi->is_already_active = 1;
489         } else {
490                 disable_all_counters();
491                 quadd_init_pmu();
492
493                 pi->is_already_active = 0;
494         }
495
496         pcntrs = get_free_counters(free_bitmap, QUADD_MAX_PMU_COUNTERS, &ccntr);
497
498         local_pmu_ctx = &__get_cpu_var(pmu_ctx);
499         list_for_each_entry(ei, &local_pmu_ctx->used_events, list) {
500                 int index;
501
502                 *prevp++ = 0;
503
504                 event = ei->hw_value;
505
506                 if (ei->quadd_event_id == QUADD_EVENT_TYPE_CPU_CYCLES) {
507                         if (!ccntr) {
508                                 pr_err_once("Error: cpu cycles counter is already occupied\n");
509                                 return;
510                         }
511                         index = QUADD_ARMV8_CCNT_BIT;
512                 } else {
513                         if (!pcntrs--) {
514                                 pr_err_once("Error: too many performance events\n");
515                                 return;
516                         }
517
518                         index = find_next_bit(free_bitmap,
519                                               QUADD_MAX_PMU_COUNTERS, idx);
520                         if (index >= QUADD_MAX_PMU_COUNTERS) {
521                                 pr_err_once("Error: too many events\n");
522                                 return;
523                         }
524                         idx = index + 1;
525                         select_event(index, event);
526                 }
527                 set_bit(index, pi->used_cntrs);
528
529                 write_counter(index, 0);
530                 enable_counter(index);
531         }
532
533         if (!pi->is_already_active) {
534                 reset_all_counters();
535                 enable_all_counters();
536         }
537
538         qm_debug_start_source(QUADD_EVENT_SOURCE_PMU);
539 }
540
541 static void pmu_stop(void)
542 {
543         int idx;
544         struct quadd_pmu_info *pi = &__get_cpu_var(cpu_pmu_info);
545
546         if (!pi->is_already_active) {
547                 disable_all_counters();
548                 reset_all_counters();
549         } else {
550                 for_each_set_bit(idx, pi->used_cntrs, QUADD_MAX_PMU_COUNTERS) {
551                         disable_counter(idx);
552                         write_counter(idx, 0);
553                 }
554         }
555
556         qm_debug_stop_source(QUADD_EVENT_SOURCE_PMU);
557 }
558
559 static int __maybe_unused
560 pmu_read(struct event_data *events, int max_events)
561 {
562         u32 val;
563         int idx = 0, i = 0;
564         struct quadd_pmu_info *pi = &__get_cpu_var(cpu_pmu_info);
565         struct quadd_pmu_ctx *local_pmu_ctx = &__get_cpu_var(pmu_ctx);
566         u32 *prevp = pi->prev_vals;
567         struct quadd_pmu_event_info *ei;
568
569         if (bitmap_empty(pi->used_cntrs, QUADD_MAX_PMU_COUNTERS)) {
570                 pr_err_once("Error: counters were not initialized\n");
571                 return 0;
572         }
573
574         list_for_each_entry(ei, &local_pmu_ctx->used_events, list) {
575                 int index;
576
577                 if (ei->quadd_event_id == QUADD_EVENT_TYPE_CPU_CYCLES) {
578                         if (!test_bit(QUADD_ARMV8_CCNT_BIT, pi->used_cntrs)) {
579                                 pr_err_once("Error: ccntr is not used\n");
580                                 return 0;
581                         }
582                         index = QUADD_ARMV8_CCNT_BIT;
583                 } else {
584                         index = find_next_bit(pi->used_cntrs,
585                                               QUADD_MAX_PMU_COUNTERS, idx);
586                         idx = index + 1;
587
588                         if (index >= QUADD_MAX_PMU_COUNTERS) {
589                                 pr_err_once("Error: perf counter is not used\n");
590                                 return 0;
591                         }
592                 }
593
594                 val = read_counter(index);
595
596                 events->event_source = QUADD_EVENT_SOURCE_PMU;
597                 events->event_id = ei->quadd_event_id;
598
599                 events->val = val;
600                 events->prev_val = *prevp;
601
602                 *prevp = val;
603
604                 qm_debug_read_counter(events->event_id, events->prev_val,
605                                       events->val);
606
607                 if (++i >= max_events)
608                         break;
609
610                 events++;
611                 prevp++;
612         }
613
614         return i;
615 }
616
617 static int __maybe_unused
618 pmu_read_emulate(struct event_data *events, int max_events)
619 {
620         int i = 0;
621         static u32 val = 100;
622         struct quadd_pmu_info *pi = &__get_cpu_var(cpu_pmu_info);
623         u32 *prevp = pi->prev_vals;
624         struct quadd_pmu_event_info *ei;
625
626         struct quadd_pmu_ctx *local_pmu_ctx = &__get_cpu_var(pmu_ctx);
627
628         list_for_each_entry(ei, &local_pmu_ctx->used_events, list) {
629                 if (val > 200)
630                         val = 100;
631
632                 events->event_id = *prevp;
633                 events->val = val;
634
635                 *prevp = val;
636                 val += 5;
637
638                 if (++i >= max_events)
639                         break;
640
641                 events++;
642                 prevp++;
643         }
644
645         return i;
646 }
647
648 static void __get_free_counters(void *arg)
649 {
650         int pcntrs, ccntr;
651         DECLARE_BITMAP(free_bitmap, QUADD_MAX_PMU_COUNTERS);
652         struct quadd_cntrs_info *ci = arg;
653
654         pcntrs = get_free_counters(free_bitmap, QUADD_MAX_PMU_COUNTERS, &ccntr);
655
656         spin_lock(&ci->lock);
657
658         ci->pcntrs = min_t(int, pcntrs, ci->pcntrs);
659
660         if (!ccntr)
661                 ci->ccntr = 0;
662
663         pr_info("[%d] pcntrs/ccntr: %d/%d, free_bitmap: %#lx\n",
664                 smp_processor_id(), pcntrs, ccntr, free_bitmap[0]);
665
666         spin_unlock(&ci->lock);
667 }
668
669 static void free_events(struct list_head *head)
670 {
671         struct quadd_pmu_event_info *entry, *next;
672
673         list_for_each_entry_safe(entry, next, head, list) {
674                 list_del(&entry->list);
675                 kfree(entry);
676         }
677 }
678
679 static int set_events(int cpuid, int *events, int size)
680 {
681         int free_pcntrs, err;
682         int i, nr_l1_r = 0, nr_l1_w = 0;
683         struct quadd_cntrs_info free_ci;
684
685         struct quadd_pmu_ctx *local_pmu_ctx = &per_cpu(pmu_ctx, cpuid);
686
687
688         local_pmu_ctx->l1_cache_rw = 0;
689
690         free_events(&local_pmu_ctx->used_events);
691
692         if (!events || !size)
693                 return 0;
694
695         if (!local_pmu_ctx->current_map) {
696                 pr_err("Invalid current_map\n");
697                 return -ENODEV;
698         }
699
700         spin_lock_init(&free_ci.lock);
701         free_ci.pcntrs = QUADD_MAX_PMU_COUNTERS;
702         free_ci.ccntr = 1;
703
704         smp_call_function_single(cpuid, __get_free_counters, &free_ci, 1);
705
706         free_pcntrs = free_ci.pcntrs;
707         pr_info("free counters: pcntrs/ccntr: %d/%d\n",
708                 free_pcntrs, free_ci.ccntr);
709
710         pr_info("event identification register: %#x\n",
711                 armv8_pmu_pmceid_read());
712
713         for (i = 0; i < size; i++) {
714                 struct quadd_pmu_event_info *ei;
715
716                 if (events[i] > QUADD_EVENT_TYPE_MAX) {
717                         pr_err("error event: %d\n", events[i]);
718                         err = -EINVAL;
719                         goto out_free;
720                 }
721
722                 ei = kzalloc(sizeof(*ei), GFP_KERNEL);
723                 if (!ei) {
724                         err = -ENOMEM;
725                         goto out_free;
726                 }
727
728                 INIT_LIST_HEAD(&ei->list);
729                 list_add_tail(&ei->list, &local_pmu_ctx->used_events);
730
731                 if (events[i] == QUADD_EVENT_TYPE_CPU_CYCLES) {
732                         ei->hw_value = QUADD_ARMV8_CPU_CYCLE_EVENT;
733                         if (!free_ci.ccntr) {
734                                 pr_err("error: cpu cycles counter is already occupied\n");
735                                 err = -EBUSY;
736                                 goto out_free;
737                         }
738                 } else {
739                         if (!free_pcntrs--) {
740                                 pr_err("error: too many performance events\n");
741                                 err = -ENOSPC;
742                                 goto out_free;
743                         }
744
745                         ei->hw_value = local_pmu_ctx->current_map[events[i]];
746                 }
747
748                 ei->quadd_event_id = events[i];
749
750                 if (events[i] == QUADD_EVENT_TYPE_L1_DCACHE_READ_MISSES)
751                         nr_l1_r++;
752                 else if (events[i] == QUADD_EVENT_TYPE_L1_DCACHE_WRITE_MISSES)
753                         nr_l1_w++;
754
755                 pr_info("Event has been added: id/pmu value: %s/%#x\n",
756                         quadd_get_event_str(events[i]),
757                         ei->hw_value);
758         }
759
760         if (nr_l1_r > 0 && nr_l1_w > 0)
761                 local_pmu_ctx->l1_cache_rw = 1;
762
763         return 0;
764
765 out_free:
766         free_events(&local_pmu_ctx->used_events);
767         return err;
768 }
769
770 static int get_supported_events(int cpuid, int *events, int max_events)
771 {
772         int i, nr_events = 0;
773
774         struct quadd_pmu_ctx *local_pmu_ctx = &per_cpu(pmu_ctx, cpuid);
775
776         if (!local_pmu_ctx->current_map)
777                 return 0;
778
779         max_events = min_t(int, QUADD_EVENT_TYPE_MAX, max_events);
780
781         for (i = 0; i < max_events; i++) {
782                 unsigned int event = local_pmu_ctx->current_map[i];
783
784                 if (event != QUADD_ARMV8_UNSUPPORTED_EVENT)
785                         events[nr_events++] = i;
786         }
787
788         return nr_events;
789 }
790
791 static int get_current_events(int cpuid, int *events, int max_events)
792 {
793         int i = 0;
794         struct quadd_pmu_event_info *ei;
795
796         struct quadd_pmu_ctx *local_pmu_ctx = &per_cpu(pmu_ctx, cpuid);
797
798
799         list_for_each_entry(ei, &local_pmu_ctx->used_events, list) {
800                 events[i++] = ei->quadd_event_id;
801
802                 if (i >= max_events)
803                         break;
804         }
805
806         return i;
807 }
808
809 static struct quadd_arch_info *get_arch(int cpuid)
810 {
811         struct quadd_pmu_ctx *local_pmu_ctx = &per_cpu(pmu_ctx, cpuid);
812
813         return local_pmu_ctx->current_map ? &local_pmu_ctx->arch : NULL;
814 }
815
816 static struct quadd_event_source_interface pmu_armv8_int = {
817         .enable                 = pmu_enable,
818         .disable                = pmu_disable,
819
820         .start                  = pmu_start,
821         .stop                   = pmu_stop,
822
823 #ifndef QUADD_USE_EMULATE_COUNTERS
824         .read                   = pmu_read,
825 #else
826         .read                   = pmu_read_emulate,
827 #endif
828         .set_events             = set_events,
829         .get_supported_events   = get_supported_events,
830         .get_current_events     = get_current_events,
831         .get_arch               = get_arch,
832 };
833
834 static int quadd_armv8_pmu_init_for_cpu(int cpuid)
835 {
836         int idx, err = 0;
837         u32 pmcr, ext_ver, idcode = 0;
838         u64 aa64_dfr;
839         u8 implementer;
840         struct cpuinfo_arm64 *local_cpu_data = &per_cpu(cpu_data, cpuid);
841         struct quadd_pmu_ctx *local_pmu_ctx = &per_cpu(pmu_ctx, cpuid);
842         u32 reg_midr = local_cpu_data->reg_midr;
843
844         strncpy(local_pmu_ctx->arch.name, "Unknown",
845                         sizeof(local_pmu_ctx->arch.name));
846
847         local_pmu_ctx->arch.type = QUADD_AA64_CPU_TYPE_UNKNOWN;
848         local_pmu_ctx->arch.ver = 0;
849         local_pmu_ctx->current_map = NULL;
850
851         INIT_LIST_HEAD(&local_pmu_ctx->used_events);
852
853         if (!reg_midr)
854                 return 0;
855
856         implementer = (reg_midr >> 24) & 0xFF;
857
858         aa64_dfr = read_cpuid(ID_AA64DFR0_EL1);
859         aa64_dfr = (aa64_dfr >> 8) & 0x0f;
860
861         if (aa64_dfr != QUADD_AA64_PMUVER_PMUV3)
862                 err = 1;
863
864         if (err == 0 && (implementer == 'A' || implementer == 'N')) {
865
866                 strncpy(local_pmu_ctx->arch.name, "AA64 PmuV3",
867                                 sizeof(local_pmu_ctx->arch.name));
868
869                 idx = sizeof(local_pmu_ctx->arch.name) - 1;
870                 local_pmu_ctx->arch.name[idx] = '\0';
871
872                 local_pmu_ctx->counters_mask =
873                         QUADD_ARMV8_COUNTERS_MASK_PMUV3;
874                 local_pmu_ctx->current_map =
875                         quadd_armv8_pmuv3_arm_events_map;
876
877                 pmcr = armv8_pmu_pmcr_read();
878
879                 idcode = (pmcr >> QUADD_ARMV8_PMCR_IDCODE_SHIFT) &
880                         QUADD_ARMV8_PMCR_IDCODE_MASK;
881
882                 pr_info("imp: %#x, idcode: %#x\n", implementer, idcode);
883         }
884
885         if (err == 0) {
886                 switch (implementer) {
887                 case 'A':
888                         strncat(local_pmu_ctx->arch.name, " ARM",
889                                 sizeof(local_pmu_ctx->arch.name) -
890                                 strlen(local_pmu_ctx->arch.name));
891                         idx = sizeof(local_pmu_ctx->arch.name) - 1;
892                         local_pmu_ctx->arch.name[idx] = '\0';
893
894                         if (idcode == QUADD_AA64_CPU_IDCODE_CORTEX_A53) {
895                                 local_pmu_ctx->arch.type =
896                                         QUADD_AA64_CPU_TYPE_CORTEX_A53;
897
898                                 strncat(local_pmu_ctx->arch.name, " CORTEX-A53",
899                                         sizeof(local_pmu_ctx->arch.name) -
900                                         strlen(local_pmu_ctx->arch.name));
901
902                         } else if (idcode == QUADD_AA64_CPU_IDCODE_CORTEX_A57) {
903                                 local_pmu_ctx->arch.type =
904                                         QUADD_AA64_CPU_TYPE_CORTEX_A57;
905                                 local_pmu_ctx->current_map =
906                                         quadd_armv8_pmuv3_a57_events_map;
907
908                                 strncat(local_pmu_ctx->arch.name, " CORTEX-A57",
909                                         sizeof(local_pmu_ctx->arch.name) -
910                                         strlen(local_pmu_ctx->arch.name));
911                         } else {
912                                 local_pmu_ctx->arch.type =
913                                         QUADD_AA64_CPU_TYPE_ARM;
914                         }
915                         break;
916                 case 'N':
917                         ext_ver = armv8_id_afr0_el1_read();
918
919                         ext_ver = (ext_ver >> QUADD_ARMV8_PMU_NVEXT_SHIFT) &
920                                 QUADD_ARMV8_PMU_NVEXT_MASK;
921
922                         strncat(local_pmu_ctx->arch.name, " NVIDIA (Denver)",
923                                 sizeof(local_pmu_ctx->arch.name) -
924                                 strlen(local_pmu_ctx->arch.name));
925                         local_pmu_ctx->arch.type = QUADD_AA64_CPU_TYPE_DENVER;
926                         local_pmu_ctx->arch.ver = ext_ver;
927                         local_pmu_ctx->current_map =
928                                 quadd_armv8_pmuv3_denver_events_map;
929                         break;
930                 default:
931                         strncat(local_pmu_ctx->arch.name,
932                                 " Unknown implementor code",
933                                 sizeof(local_pmu_ctx->arch.name) -
934                                 strlen(local_pmu_ctx->arch.name));
935                         local_pmu_ctx->arch.type =
936                                 QUADD_AA64_CPU_TYPE_UNKNOWN_IMP;
937                         err = 1;
938                         break;
939                 }
940         }
941
942         local_pmu_ctx->arch.name[sizeof(local_pmu_ctx->arch.name) - 1] = '\0';
943         pr_info("[%d] arch: %s, type: %d, ver: %d\n",
944                 cpuid, local_pmu_ctx->arch.name, local_pmu_ctx->arch.type,
945                 local_pmu_ctx->arch.ver);
946
947         return err;
948 }
949
950 struct quadd_event_source_interface *quadd_armv8_pmu_init(void)
951 {
952         struct quadd_event_source_interface *pmu = NULL;
953         int cpuid;
954         int err;
955         int initialized = 1;
956
957         for_each_possible_cpu(cpuid) {
958                 err = quadd_armv8_pmu_init_for_cpu(cpuid);
959                 if (err) {
960                         initialized = 0;
961                         break;
962                 }
963         }
964
965         if (initialized == 1)
966                 pmu = &pmu_armv8_int;
967         else
968                 pr_err("error: incorrect PMUVer\n");
969
970         return pmu;
971 }
972
973 void quadd_armv8_pmu_deinit(void)
974 {
975         int cpu_id;
976
977         for_each_possible_cpu(cpu_id) {
978                 struct quadd_pmu_ctx *local_pmu_ctx = &per_cpu(pmu_ctx, cpu_id);
979
980                 if (local_pmu_ctx->current_map)
981                         free_events(&local_pmu_ctx->used_events);
982         }
983 }