64e4971a78372c80829ea18fc072625919190c0e
[linux-3.10.git] / drivers / misc / tegra-profiler / armv8_pmu.c
1 /*
2  * drivers/misc/tegra-profiler/armv8_pmu.c
3  *
4  * Copyright (c) 2014-2015, NVIDIA CORPORATION.  All rights reserved.
5  *
6  * This program is free software; you can redistribute it and/or modify it
7  * under the terms and conditions of the GNU General Public License,
8  * version 2, as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope it will be useful, but WITHOUT
11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13  * more details.
14  *
15  */
16
17 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
18
19 #include <linux/printk.h>
20 #include <linux/types.h>
21 #include <linux/string.h>
22
23 #include <linux/version.h>
24 #include <linux/err.h>
25 #include <linux/bitmap.h>
26 #include <linux/slab.h>
27
28 #include <asm/cputype.h>
29 #include <asm/cpu.h>
30
31 #include "arm_pmu.h"
32 #include "armv8_pmu.h"
33 #include "armv8_events.h"
34 #include "quadd.h"
35 #include "debug.h"
36
37 struct quadd_pmu_info {
38         DECLARE_BITMAP(used_cntrs, QUADD_MAX_PMU_COUNTERS);
39         u32 prev_vals[QUADD_MAX_PMU_COUNTERS];
40         int is_already_active;
41 };
42
43 struct quadd_cntrs_info {
44         int pcntrs;
45         int ccntr;
46
47         spinlock_t lock;
48 };
49
50 static DEFINE_PER_CPU(struct quadd_pmu_info, cpu_pmu_info);
51
52 static DEFINE_PER_CPU(struct quadd_pmu_ctx, pmu_ctx);
53
54 static unsigned
55 quadd_armv8_pmuv3_arm_events_map[QUADD_EVENT_TYPE_MAX] = {
56         [QUADD_EVENT_TYPE_INSTRUCTIONS] =
57                 QUADD_ARMV8_HW_EVENT_INSTR_EXECUTED,
58         [QUADD_EVENT_TYPE_BRANCH_INSTRUCTIONS] =
59                 QUADD_ARMV8_UNSUPPORTED_EVENT,
60         [QUADD_EVENT_TYPE_BRANCH_MISSES] =
61                 QUADD_ARMV8_HW_EVENT_PC_BRANCH_MIS_PRED,
62         [QUADD_EVENT_TYPE_BUS_CYCLES] =
63                 QUADD_ARMV8_UNSUPPORTED_EVENT,
64
65         [QUADD_EVENT_TYPE_L1_DCACHE_READ_MISSES] =
66                 QUADD_ARMV8_HW_EVENT_L1_DCACHE_REFILL,
67         [QUADD_EVENT_TYPE_L1_DCACHE_WRITE_MISSES] =
68                 QUADD_ARMV8_HW_EVENT_L1_DCACHE_REFILL,
69         [QUADD_EVENT_TYPE_L1_ICACHE_MISSES] =
70                 QUADD_ARMV8_HW_EVENT_L1_ICACHE_REFILL,
71
72         [QUADD_EVENT_TYPE_L2_DCACHE_READ_MISSES] =
73                 QUADD_ARMV8_HW_EVENT_L2_CACHE_REFILL,
74         [QUADD_EVENT_TYPE_L2_DCACHE_WRITE_MISSES] =
75                 QUADD_ARMV8_HW_EVENT_L2_CACHE_REFILL,
76         [QUADD_EVENT_TYPE_L2_ICACHE_MISSES] =
77                 QUADD_ARMV8_UNSUPPORTED_EVENT,
78 };
79
80 static unsigned
81 quadd_armv8_pmuv3_a57_events_map[QUADD_EVENT_TYPE_MAX] = {
82         [QUADD_EVENT_TYPE_INSTRUCTIONS] =
83                 QUADD_ARMV8_HW_EVENT_INSTR_EXECUTED,
84         [QUADD_EVENT_TYPE_BRANCH_INSTRUCTIONS] =
85                 QUADD_ARMV8_UNSUPPORTED_EVENT,
86         [QUADD_EVENT_TYPE_BRANCH_MISSES] =
87                 QUADD_ARMV8_HW_EVENT_PC_BRANCH_MIS_PRED,
88         [QUADD_EVENT_TYPE_BUS_CYCLES] =
89                 QUADD_ARMV8_UNSUPPORTED_EVENT,
90
91         [QUADD_EVENT_TYPE_L1_DCACHE_READ_MISSES] =
92                 QUADD_ARMV8_A57_HW_EVENT_L1D_CACHE_REFILL_LD,
93         [QUADD_EVENT_TYPE_L1_DCACHE_WRITE_MISSES] =
94                 QUADD_ARMV8_A57_HW_EVENT_L1D_CACHE_REFILL_ST,
95         [QUADD_EVENT_TYPE_L1_ICACHE_MISSES] =
96                 QUADD_ARMV8_HW_EVENT_L1_ICACHE_REFILL,
97
98         [QUADD_EVENT_TYPE_L2_DCACHE_READ_MISSES] =
99                 QUADD_ARMV8_A57_HW_EVENT_L2D_CACHE_REFILL_LD,
100         [QUADD_EVENT_TYPE_L2_DCACHE_WRITE_MISSES] =
101                 QUADD_ARMV8_A57_HW_EVENT_L2D_CACHE_REFILL_ST,
102         [QUADD_EVENT_TYPE_L2_ICACHE_MISSES] =
103                 QUADD_ARMV8_UNSUPPORTED_EVENT,
104 };
105
106 static unsigned
107 quadd_armv8_pmuv3_denver_events_map[QUADD_EVENT_TYPE_MAX] = {
108         [QUADD_EVENT_TYPE_INSTRUCTIONS] =
109                 QUADD_ARMV8_HW_EVENT_INSTR_EXECUTED,
110         [QUADD_EVENT_TYPE_BRANCH_INSTRUCTIONS] =
111                 QUADD_ARMV8_UNSUPPORTED_EVENT,
112         [QUADD_EVENT_TYPE_BRANCH_MISSES] =
113                 QUADD_ARMV8_HW_EVENT_PC_BRANCH_MIS_PRED,
114         [QUADD_EVENT_TYPE_BUS_CYCLES] =
115                 QUADD_ARMV8_UNSUPPORTED_EVENT,
116
117         [QUADD_EVENT_TYPE_L1_DCACHE_READ_MISSES] =
118                 QUADD_ARMV8_HW_EVENT_L1_DCACHE_REFILL,
119         [QUADD_EVENT_TYPE_L1_DCACHE_WRITE_MISSES] =
120                 QUADD_ARMV8_HW_EVENT_L1_DCACHE_REFILL,
121         [QUADD_EVENT_TYPE_L1_ICACHE_MISSES] =
122                 QUADD_ARMV8_HW_EVENT_L1_ICACHE_REFILL,
123
124         [QUADD_EVENT_TYPE_L2_DCACHE_READ_MISSES] =
125                 QUADD_ARMV8_UNSUPPORTED_EVENT,
126         [QUADD_EVENT_TYPE_L2_DCACHE_WRITE_MISSES] =
127                 QUADD_ARMV8_UNSUPPORTED_EVENT,
128         [QUADD_EVENT_TYPE_L2_ICACHE_MISSES] =
129                 QUADD_ARMV8_UNSUPPORTED_EVENT,
130 };
131
132 /*********************************************************************/
133
134 static inline u32
135 armv8_pmu_pmcr_read(void)
136 {
137         u32 val;
138
139         /* Read Performance Monitors Control Register */
140         asm volatile("mrs %0, pmcr_el0" : "=r" (val));
141         return val;
142 }
143
144 static inline void
145 armv8_pmu_pmcr_write(u32 val)
146 {
147         asm volatile("msr pmcr_el0, %0" : :
148                      "r" (val & QUADD_ARMV8_PMCR_WR_MASK));
149 }
150
151 static inline u32
152 armv8_pmu_pmceid_read(void)
153 {
154         u32 val;
155
156         /* Read Performance Monitors Common Event Identification Register */
157         asm volatile("mrs %0, pmceid0_el0" : "=r" (val));
158         return val;
159 }
160
161 static inline u32
162 armv8_pmu_pmcntenset_read(void)
163 {
164         u32 val;
165
166         /* Read Performance Monitors Count Enable Set Register */
167         asm volatile("mrs %0, pmcntenset_el0" : "=r" (val));
168         return val;
169 }
170
171 static inline void
172 armv8_pmu_pmcntenset_write(u32 val)
173 {
174         /* Write Performance Monitors Count Enable Set Register */
175         asm volatile("msr pmcntenset_el0, %0" : : "r" (val));
176 }
177
178 static inline void
179 armv8_pmu_pmcntenclr_write(u32 val)
180 {
181         /* Write Performance Monitors Count Enable Clear Register */
182         asm volatile("msr pmcntenclr_el0, %0" : : "r" (val));
183 }
184
185 static inline void
186 armv8_pmu_pmselr_write(u32 val)
187 {
188         /* Write Performance Monitors Event Counter Selection Register */
189         asm volatile("msr pmselr_el0, %0" : :
190                      "r" (val & QUADD_ARMV8_SELECT_MASK));
191 }
192
193 static inline u64
194 armv8_pmu_pmccntr_read(void)
195 {
196         u64 val;
197
198         /* Read Performance Monitors Cycle Count Register */
199         asm volatile("mrs %0, pmccntr_el0" : "=r" (val));
200         return val;
201 }
202
203 static inline void
204 armv8_pmu_pmccntr_write(u64 val)
205 {
206         /* Write Performance Monitors Selected Event Count Register */
207         asm volatile("msr pmccntr_el0, %0" : : "r" (val));
208 }
209
210 static inline u32
211 armv8_pmu_pmxevcntr_read(void)
212 {
213         u32 val;
214
215         /* Read Performance Monitors Selected Event Count Register */
216         asm volatile("mrs %0, pmxevcntr_el0" : "=r" (val));
217         return val;
218 }
219
220 static inline void
221 armv8_pmu_pmxevcntr_write(u32 val)
222 {
223         /* Write Performance Monitors Selected Event Count Register */
224         asm volatile("msr pmxevcntr_el0, %0" : : "r" (val));
225 }
226
227 static inline void
228 armv8_pmu_pmxevtyper_write(u32 event)
229 {
230         /* Write Performance Monitors Selected Event Type Register */
231         asm volatile("msr pmxevtyper_el0, %0" : :
232                      "r" (event & QUADD_ARMV8_EVTSEL_MASK));
233 }
234
235 static inline u32
236 armv8_pmu_pmintenset_read(void)
237 {
238         u32 val;
239
240         /* Read Performance Monitors Interrupt Enable Set Register */
241         asm volatile("mrs %0, pmintenset_el1" : "=r" (val));
242         return val;
243 }
244
245 static inline void
246 armv8_pmu_pmintenset_write(u32 val)
247 {
248         /* Write Performance Monitors Interrupt Enable Set Register */
249         asm volatile("msr pmintenset_el1, %0" : : "r" (val));
250 }
251
252 static inline void
253 armv8_pmu_pmintenclr_write(u32 val)
254 {
255         /* Write Performance Monitors Interrupt Enable Clear Register */
256         asm volatile("msr pmintenclr_el1, %0" : : "r" (val));
257 }
258
259 static inline u32
260 armv8_pmu_pmovsclr_read(void)
261 {
262         u32 val;
263
264         /* Read Performance Monitors Overflow Flag Status Register */
265         asm volatile("mrs %0, pmovsclr_el0" : "=r" (val));
266         return val;
267 }
268
269 static inline void
270 armv8_pmu_pmovsclr_write(int idx)
271 {
272         /* Write Performance Monitors Overflow Flag Status Register */
273         asm volatile("msr pmovsclr_el0, %0" : : "r" (BIT(idx)));
274 }
275
276 static inline u32
277 armv8_id_afr0_el1_read(void)
278 {
279         u32 val;
280
281         /* Read Auxiliary Feature Register 0 */
282         asm volatile("mrs %0, id_afr0_el1" : "=r" (val));
283         return val;
284 }
285
286 static void enable_counter(int idx)
287 {
288         armv8_pmu_pmcntenset_write(BIT(idx));
289 }
290
291 static void disable_counter(int idx)
292 {
293         armv8_pmu_pmcntenclr_write(BIT(idx));
294 }
295
296 static void select_counter(unsigned int counter)
297 {
298         armv8_pmu_pmselr_write(counter);
299 }
300
301 static int is_pmu_enabled(void)
302 {
303         struct quadd_pmu_ctx *local_pmu_ctx = &__get_cpu_var(pmu_ctx);
304         u32 pmcr = armv8_pmu_pmcr_read();
305
306         if (pmcr & QUADD_ARMV8_PMCR_E) {
307                 u32 pmcnten = armv8_pmu_pmcntenset_read();
308
309                 pmcnten &= local_pmu_ctx->counters_mask | QUADD_ARMV8_CCNT;
310                 return pmcnten ? 1 : 0;
311         }
312
313         return 0;
314 }
315
316 static u32 read_counter(int idx)
317 {
318         u32 val;
319
320         if (idx == QUADD_ARMV8_CCNT_BIT) {
321                 val = armv8_pmu_pmccntr_read();
322         } else {
323                 select_counter(idx);
324                 val = armv8_pmu_pmxevcntr_read();
325         }
326
327         return val;
328 }
329
330 static void write_counter(int idx, u32 value)
331 {
332         if (idx == QUADD_ARMV8_CCNT_BIT) {
333                 armv8_pmu_pmccntr_write(value);
334         } else {
335                 select_counter(idx);
336                 armv8_pmu_pmxevcntr_write(value);
337         }
338 }
339
340 static int
341 get_free_counters(unsigned long *bitmap, int nbits, int *ccntr)
342 {
343         int cc;
344         u32 cntens;
345
346         struct quadd_pmu_ctx *local_pmu_ctx = &__get_cpu_var(pmu_ctx);
347
348         cntens = armv8_pmu_pmcntenset_read();
349         cntens = ~cntens & (local_pmu_ctx->counters_mask | QUADD_ARMV8_CCNT);
350
351         bitmap_zero(bitmap, nbits);
352         bitmap_copy(bitmap, (unsigned long *)&cntens,
353                     BITS_PER_BYTE * sizeof(u32));
354
355         cc = (cntens & QUADD_ARMV8_CCNT) ? 1 : 0;
356
357         if (ccntr)
358                 *ccntr = cc;
359
360         return bitmap_weight(bitmap, BITS_PER_BYTE * sizeof(u32)) - cc;
361 }
362
363 static void __maybe_unused
364 disable_interrupt(int idx)
365 {
366         armv8_pmu_pmintenclr_write(BIT(idx));
367 }
368
369 static void
370 disable_all_interrupts(void)
371 {
372         struct quadd_pmu_ctx *local_pmu_ctx = &__get_cpu_var(pmu_ctx);
373         u32 val = QUADD_ARMV8_CCNT | local_pmu_ctx->counters_mask;
374
375         armv8_pmu_pmintenclr_write(val);
376 }
377
378 static void
379 reset_overflow_flags(void)
380 {
381         struct quadd_pmu_ctx *local_pmu_ctx = &__get_cpu_var(pmu_ctx);
382         u32 val = QUADD_ARMV8_CCNT | local_pmu_ctx->counters_mask;
383
384         armv8_pmu_pmovsclr_write(val);
385 }
386
387 static void
388 select_event(unsigned int idx, unsigned int event)
389 {
390         select_counter(idx);
391         armv8_pmu_pmxevtyper_write(event);
392 }
393
394 static void disable_all_counters(void)
395 {
396         u32 val;
397         u32 masked;
398         struct quadd_pmu_ctx *local_pmu_ctx = &__get_cpu_var(pmu_ctx);
399
400         /* Disable all counters */
401         val = armv8_pmu_pmcr_read();
402         if (val & QUADD_ARMV8_PMCR_E)
403                 armv8_pmu_pmcr_write(val & ~QUADD_ARMV8_PMCR_E);
404
405         masked = QUADD_ARMV8_CCNT | local_pmu_ctx->counters_mask;
406         armv8_pmu_pmcntenclr_write(masked);
407 }
408
409 static void enable_all_counters(void)
410 {
411         u32 val;
412
413         /* Enable all counters */
414         val = armv8_pmu_pmcr_read();
415         val |= QUADD_ARMV8_PMCR_E | QUADD_ARMV8_PMCR_X;
416         armv8_pmu_pmcr_write(val);
417 }
418
419 static void reset_all_counters(void)
420 {
421         u32 val;
422
423         val = armv8_pmu_pmcr_read();
424         val |= QUADD_ARMV8_PMCR_P | QUADD_ARMV8_PMCR_C;
425         armv8_pmu_pmcr_write(val);
426 }
427
428 static void quadd_init_pmu(void)
429 {
430         reset_overflow_flags();
431         disable_all_interrupts();
432 }
433
434 static int pmu_enable(void)
435 {
436         pr_info("pmu was reserved\n");
437         return 0;
438 }
439
440 static void __pmu_disable(void *arg)
441 {
442         struct quadd_pmu_info *pi = &__get_cpu_var(cpu_pmu_info);
443
444         if (!pi->is_already_active) {
445                 pr_info("[%d] reset all counters\n",
446                         smp_processor_id());
447
448                 disable_all_counters();
449                 reset_all_counters();
450         } else {
451                 int idx;
452
453                 for_each_set_bit(idx, pi->used_cntrs, QUADD_MAX_PMU_COUNTERS) {
454                         pr_info("[%d] reset counter: %d\n",
455                                 smp_processor_id(), idx);
456
457                         disable_counter(idx);
458                         write_counter(idx, 0);
459                 }
460         }
461 }
462
463 static void pmu_disable(void)
464 {
465         on_each_cpu(__pmu_disable, NULL, 1);
466         pr_info("pmu was released\n");
467 }
468
469 static void pmu_start(void)
470 {
471         int idx = 0, pcntrs, ccntr;
472         u32 event;
473         struct quadd_pmu_ctx *local_pmu_ctx;
474         DECLARE_BITMAP(free_bitmap, QUADD_MAX_PMU_COUNTERS);
475         struct quadd_pmu_info *pi = &__get_cpu_var(cpu_pmu_info);
476         u32 *prevp = pi->prev_vals;
477         struct quadd_pmu_event_info *ei;
478
479         bitmap_zero(pi->used_cntrs, QUADD_MAX_PMU_COUNTERS);
480
481         if (is_pmu_enabled()) {
482                 pi->is_already_active = 1;
483         } else {
484                 disable_all_counters();
485                 quadd_init_pmu();
486
487                 pi->is_already_active = 0;
488         }
489
490         pcntrs = get_free_counters(free_bitmap, QUADD_MAX_PMU_COUNTERS, &ccntr);
491
492         local_pmu_ctx = &__get_cpu_var(pmu_ctx);
493         list_for_each_entry(ei, &local_pmu_ctx->used_events, list) {
494                 int index;
495
496                 *prevp++ = 0;
497
498                 event = ei->hw_value;
499
500                 if (ei->quadd_event_id == QUADD_EVENT_TYPE_CPU_CYCLES) {
501                         if (!ccntr) {
502                                 pr_err_once("Error: cpu cycles counter is already occupied\n");
503                                 return;
504                         }
505                         index = QUADD_ARMV8_CCNT_BIT;
506                 } else {
507                         if (!pcntrs--) {
508                                 pr_err_once("Error: too many performance events\n");
509                                 return;
510                         }
511
512                         index = find_next_bit(free_bitmap,
513                                               QUADD_MAX_PMU_COUNTERS, idx);
514                         if (index >= QUADD_MAX_PMU_COUNTERS) {
515                                 pr_err_once("Error: too many events\n");
516                                 return;
517                         }
518                         idx = index + 1;
519                         select_event(index, event);
520                 }
521                 set_bit(index, pi->used_cntrs);
522
523                 write_counter(index, 0);
524                 enable_counter(index);
525         }
526
527         if (!pi->is_already_active) {
528                 reset_all_counters();
529                 enable_all_counters();
530         }
531
532         qm_debug_start_source(QUADD_EVENT_SOURCE_PMU);
533 }
534
535 static void pmu_stop(void)
536 {
537         int idx;
538         struct quadd_pmu_info *pi = &__get_cpu_var(cpu_pmu_info);
539
540         if (!pi->is_already_active) {
541                 disable_all_counters();
542                 reset_all_counters();
543         } else {
544                 for_each_set_bit(idx, pi->used_cntrs, QUADD_MAX_PMU_COUNTERS) {
545                         disable_counter(idx);
546                         write_counter(idx, 0);
547                 }
548         }
549
550         qm_debug_stop_source(QUADD_EVENT_SOURCE_PMU);
551 }
552
553 static int __maybe_unused
554 pmu_read(struct event_data *events, int max_events)
555 {
556         u32 val;
557         int idx = 0, i = 0;
558         struct quadd_pmu_info *pi = &__get_cpu_var(cpu_pmu_info);
559         struct quadd_pmu_ctx *local_pmu_ctx = &__get_cpu_var(pmu_ctx);
560         u32 *prevp = pi->prev_vals;
561         struct quadd_pmu_event_info *ei;
562
563         if (bitmap_empty(pi->used_cntrs, QUADD_MAX_PMU_COUNTERS)) {
564                 pr_err_once("Error: counters were not initialized\n");
565                 return 0;
566         }
567
568         list_for_each_entry(ei, &local_pmu_ctx->used_events, list) {
569                 int index;
570
571                 if (ei->quadd_event_id == QUADD_EVENT_TYPE_CPU_CYCLES) {
572                         if (!test_bit(QUADD_ARMV8_CCNT_BIT, pi->used_cntrs)) {
573                                 pr_err_once("Error: ccntr is not used\n");
574                                 return 0;
575                         }
576                         index = QUADD_ARMV8_CCNT_BIT;
577                 } else {
578                         index = find_next_bit(pi->used_cntrs,
579                                               QUADD_MAX_PMU_COUNTERS, idx);
580                         idx = index + 1;
581
582                         if (index >= QUADD_MAX_PMU_COUNTERS) {
583                                 pr_err_once("Error: perf counter is not used\n");
584                                 return 0;
585                         }
586                 }
587
588                 val = read_counter(index);
589
590                 events->event_source = QUADD_EVENT_SOURCE_PMU;
591                 events->event_id = ei->quadd_event_id;
592
593                 events->val = val;
594                 events->prev_val = *prevp;
595
596                 *prevp = val;
597
598                 qm_debug_read_counter(events->event_id, events->prev_val,
599                                       events->val);
600
601                 if (++i >= max_events)
602                         break;
603
604                 events++;
605                 prevp++;
606         }
607
608         return i;
609 }
610
611 static int __maybe_unused
612 pmu_read_emulate(struct event_data *events, int max_events)
613 {
614         int i = 0;
615         static u32 val = 100;
616         struct quadd_pmu_info *pi = &__get_cpu_var(cpu_pmu_info);
617         u32 *prevp = pi->prev_vals;
618         struct quadd_pmu_event_info *ei;
619
620         struct quadd_pmu_ctx *local_pmu_ctx = &__get_cpu_var(pmu_ctx);
621
622         list_for_each_entry(ei, &local_pmu_ctx->used_events, list) {
623                 if (val > 200)
624                         val = 100;
625
626                 events->event_id = *prevp;
627                 events->val = val;
628
629                 *prevp = val;
630                 val += 5;
631
632                 if (++i >= max_events)
633                         break;
634
635                 events++;
636                 prevp++;
637         }
638
639         return i;
640 }
641
642 static void __get_free_counters(void *arg)
643 {
644         int pcntrs, ccntr;
645         DECLARE_BITMAP(free_bitmap, QUADD_MAX_PMU_COUNTERS);
646         struct quadd_cntrs_info *ci = arg;
647
648         pcntrs = get_free_counters(free_bitmap, QUADD_MAX_PMU_COUNTERS, &ccntr);
649
650         spin_lock(&ci->lock);
651
652         ci->pcntrs = min_t(int, pcntrs, ci->pcntrs);
653
654         if (!ccntr)
655                 ci->ccntr = 0;
656
657         pr_info("[%d] pcntrs/ccntr: %d/%d, free_bitmap: %#lx\n",
658                 smp_processor_id(), pcntrs, ccntr, free_bitmap[0]);
659
660         spin_unlock(&ci->lock);
661 }
662
663 static void free_events(struct list_head *head)
664 {
665         struct quadd_pmu_event_info *entry, *next;
666
667         list_for_each_entry_safe(entry, next, head, list) {
668                 list_del(&entry->list);
669                 kfree(entry);
670         }
671 }
672
673 static int set_events(int cpuid, int *events, int size)
674 {
675         int free_pcntrs, err;
676         int i, nr_l1_r = 0, nr_l1_w = 0;
677         struct quadd_cntrs_info free_ci;
678
679         struct quadd_pmu_ctx *local_pmu_ctx = &per_cpu(pmu_ctx, cpuid);
680
681
682         local_pmu_ctx->l1_cache_rw = 0;
683
684         free_events(&local_pmu_ctx->used_events);
685
686         if (!events || !size)
687                 return 0;
688
689         if (!local_pmu_ctx->current_map) {
690                 pr_err("Invalid current_map\n");
691                 return -ENODEV;
692         }
693
694         spin_lock_init(&free_ci.lock);
695         free_ci.pcntrs = QUADD_MAX_PMU_COUNTERS;
696         free_ci.ccntr = 1;
697
698         smp_call_function_single(cpuid, __get_free_counters, &free_ci, 1);
699
700         free_pcntrs = free_ci.pcntrs;
701         pr_info("free counters: pcntrs/ccntr: %d/%d\n",
702                 free_pcntrs, free_ci.ccntr);
703
704         pr_info("event identification register: %#x\n",
705                 armv8_pmu_pmceid_read());
706
707         for (i = 0; i < size; i++) {
708                 struct quadd_pmu_event_info *ei;
709
710                 if (events[i] > QUADD_EVENT_TYPE_MAX) {
711                         pr_err("error event: %d\n", events[i]);
712                         err = -EINVAL;
713                         goto out_free;
714                 }
715
716                 ei = kzalloc(sizeof(*ei), GFP_KERNEL);
717                 if (!ei) {
718                         err = -ENOMEM;
719                         goto out_free;
720                 }
721
722                 INIT_LIST_HEAD(&ei->list);
723                 list_add_tail(&ei->list, &local_pmu_ctx->used_events);
724
725                 if (events[i] == QUADD_EVENT_TYPE_CPU_CYCLES) {
726                         ei->hw_value = QUADD_ARMV8_CPU_CYCLE_EVENT;
727                         if (!free_ci.ccntr) {
728                                 pr_err("error: cpu cycles counter is already occupied\n");
729                                 err = -EBUSY;
730                                 goto out_free;
731                         }
732                 } else {
733                         if (!free_pcntrs--) {
734                                 pr_err("error: too many performance events\n");
735                                 err = -ENOSPC;
736                                 goto out_free;
737                         }
738
739                         ei->hw_value = local_pmu_ctx->current_map[events[i]];
740                 }
741
742                 ei->quadd_event_id = events[i];
743
744                 if (events[i] == QUADD_EVENT_TYPE_L1_DCACHE_READ_MISSES)
745                         nr_l1_r++;
746                 else if (events[i] == QUADD_EVENT_TYPE_L1_DCACHE_WRITE_MISSES)
747                         nr_l1_w++;
748
749                 pr_info("Event has been added: id/pmu value: %s/%#x\n",
750                         quadd_get_event_str(events[i]),
751                         ei->hw_value);
752         }
753
754         if (nr_l1_r > 0 && nr_l1_w > 0)
755                 local_pmu_ctx->l1_cache_rw = 1;
756
757         return 0;
758
759 out_free:
760         free_events(&local_pmu_ctx->used_events);
761         return err;
762 }
763
764 static int get_supported_events(int cpuid, int *events, int max_events)
765 {
766         int i, nr_events = 0;
767
768         struct quadd_pmu_ctx *local_pmu_ctx = &per_cpu(pmu_ctx, cpuid);
769
770         max_events = min_t(int, QUADD_EVENT_TYPE_MAX, max_events);
771
772         for (i = 0; i < max_events; i++) {
773                 unsigned int event = local_pmu_ctx->current_map[i];
774
775                 if (event != QUADD_ARMV8_UNSUPPORTED_EVENT)
776                         events[nr_events++] = i;
777         }
778         return nr_events;
779 }
780
781 static int get_current_events(int cpuid, int *events, int max_events)
782 {
783         int i = 0;
784         struct quadd_pmu_event_info *ei;
785
786         struct quadd_pmu_ctx *local_pmu_ctx = &per_cpu(pmu_ctx, cpuid);
787
788
789         list_for_each_entry(ei, &local_pmu_ctx->used_events, list) {
790                 events[i++] = ei->quadd_event_id;
791
792                 if (i >= max_events)
793                         break;
794         }
795
796         return i;
797 }
798
799 static struct quadd_arch_info *get_arch(int cpuid)
800 {
801         struct quadd_pmu_ctx *local_pmu_ctx = &per_cpu(pmu_ctx, cpuid);
802
803         return &local_pmu_ctx->arch;
804 }
805
806 static struct quadd_event_source_interface pmu_armv8_int = {
807         .enable                 = pmu_enable,
808         .disable                = pmu_disable,
809
810         .start                  = pmu_start,
811         .stop                   = pmu_stop,
812
813 #ifndef QUADD_USE_EMULATE_COUNTERS
814         .read                   = pmu_read,
815 #else
816         .read                   = pmu_read_emulate,
817 #endif
818         .set_events             = set_events,
819         .get_supported_events   = get_supported_events,
820         .get_current_events     = get_current_events,
821         .get_arch               = get_arch,
822 };
823
824 static int quadd_armv8_pmu_init_for_cpu(int cpuid)
825 {
826         u32 pmcr;
827         u32 idcode = 0;
828         int err = 0;
829         int idx;
830         struct cpuinfo_arm64 *local_cpu_data = &per_cpu(cpu_data, cpuid);
831         struct quadd_pmu_ctx *local_pmu_ctx = &per_cpu(pmu_ctx, cpuid);
832         u32 reg_midr = local_cpu_data->reg_midr;
833         u32 ext_ver;
834         u64 aa64_dfr;
835
836         char implementer = (reg_midr >> 24) & 0xFF;
837
838         strncpy(local_pmu_ctx->arch.name, "Unknown",
839                         sizeof(local_pmu_ctx->arch.name));
840
841         local_pmu_ctx->arch.type = QUADD_AA64_CPU_TYPE_UNKNOWN;
842         local_pmu_ctx->arch.ver = 0;
843         local_pmu_ctx->current_map = NULL;
844
845         aa64_dfr = read_cpuid(ID_AA64DFR0_EL1);
846         aa64_dfr = (aa64_dfr >> 8) & 0x0f;
847
848         if (aa64_dfr != QUADD_AA64_PMUVER_PMUV3)
849                 err = 1;
850
851         if (err == 0 && (implementer == 'A' || implementer == 'N')) {
852
853                 strncpy(local_pmu_ctx->arch.name, "AA64 PmuV3",
854                                 sizeof(local_pmu_ctx->arch.name));
855
856                 idx = sizeof(local_pmu_ctx->arch.name) - 1;
857                 local_pmu_ctx->arch.name[idx] = '\0';
858
859                 local_pmu_ctx->counters_mask =
860                         QUADD_ARMV8_COUNTERS_MASK_PMUV3;
861                 local_pmu_ctx->current_map =
862                         quadd_armv8_pmuv3_arm_events_map;
863
864                 pmcr = armv8_pmu_pmcr_read();
865
866                 idcode = (pmcr >> QUADD_ARMV8_PMCR_IDCODE_SHIFT) &
867                         QUADD_ARMV8_PMCR_IDCODE_MASK;
868
869                 pr_info("imp: %#x, idcode: %#x\n", implementer, idcode);
870         }
871
872         if (err == 0) {
873                 switch (implementer) {
874                 case 'A':
875                         strncat(local_pmu_ctx->arch.name, " ARM",
876                                 sizeof(local_pmu_ctx->arch.name) -
877                                 strlen(local_pmu_ctx->arch.name));
878                         idx = sizeof(local_pmu_ctx->arch.name) - 1;
879                         local_pmu_ctx->arch.name[idx] = '\0';
880
881                         if (idcode == QUADD_AA64_CPU_IDCODE_CORTEX_A53) {
882                                 local_pmu_ctx->arch.type =
883                                         QUADD_AA64_CPU_TYPE_CORTEX_A53;
884
885                                 strncat(local_pmu_ctx->arch.name, " CORTEX-A53",
886                                         sizeof(local_pmu_ctx->arch.name) -
887                                         strlen(local_pmu_ctx->arch.name));
888
889                         } else if (idcode == QUADD_AA64_CPU_IDCODE_CORTEX_A57) {
890                                 local_pmu_ctx->arch.type =
891                                         QUADD_AA64_CPU_TYPE_CORTEX_A57;
892                                 local_pmu_ctx->current_map =
893                                         quadd_armv8_pmuv3_a57_events_map;
894
895                                 strncat(local_pmu_ctx->arch.name, " CORTEX-A57",
896                                         sizeof(local_pmu_ctx->arch.name) -
897                                         strlen(local_pmu_ctx->arch.name));
898                         } else {
899                                 local_pmu_ctx->arch.type =
900                                         QUADD_AA64_CPU_TYPE_ARM;
901                         }
902                         break;
903                 case 'N':
904                         ext_ver = armv8_id_afr0_el1_read();
905
906                         ext_ver = (ext_ver >> QUADD_ARMV8_PMU_NVEXT_SHIFT) &
907                                 QUADD_ARMV8_PMU_NVEXT_MASK;
908
909                         strncat(local_pmu_ctx->arch.name, " NVIDIA (Denver)",
910                                 sizeof(local_pmu_ctx->arch.name) -
911                                 strlen(local_pmu_ctx->arch.name));
912                         local_pmu_ctx->arch.type = QUADD_AA64_CPU_TYPE_DENVER;
913                         local_pmu_ctx->arch.ver = ext_ver;
914                         local_pmu_ctx->current_map =
915                                 quadd_armv8_pmuv3_denver_events_map;
916                         break;
917                 default:
918                         strncat(local_pmu_ctx->arch.name,
919                                 " Unknown implementor code",
920                                 sizeof(local_pmu_ctx->arch.name) -
921                                 strlen(local_pmu_ctx->arch.name));
922                         local_pmu_ctx->arch.type =
923                                 QUADD_AA64_CPU_TYPE_UNKNOWN_IMP;
924                         err = 1;
925                         break;
926                 }
927         }
928
929         local_pmu_ctx->arch.name[sizeof(local_pmu_ctx->arch.name) - 1] = '\0';
930         pr_info("arch: %s, type: %d, ver: %d\n",
931                 local_pmu_ctx->arch.name, local_pmu_ctx->arch.type,
932                 local_pmu_ctx->arch.ver);
933
934         INIT_LIST_HEAD(&local_pmu_ctx->used_events);
935         return err;
936 }
937
938 struct quadd_event_source_interface *quadd_armv8_pmu_init(void)
939 {
940         struct quadd_event_source_interface *pmu = NULL;
941         int cpuid;
942         int err;
943         int initialized = 1;
944
945         for_each_possible_cpu(cpuid) {
946                 err = quadd_armv8_pmu_init_for_cpu(cpuid);
947                 if (err) {
948                         initialized = 0;
949                         break;
950                 }
951         }
952
953         if (initialized == 1)
954                 pmu = &pmu_armv8_int;
955         else
956                 pr_err("error: incorrect PMUVer\n");
957
958         return pmu;
959 }
960
961 void quadd_armv8_pmu_deinit(void)
962 {
963         int cpu_id;
964
965         for_each_possible_cpu(cpu_id) {
966                 struct quadd_pmu_ctx *local_pmu_ctx = &per_cpu(pmu_ctx, cpu_id);
967
968                 free_events(&local_pmu_ctx->used_events);
969         }
970 }