misc: tegra-profiler: fix coverity defect
[linux-3.10.git] / drivers / misc / tegra-profiler / armv8_pmu.c
1 /*
2  * drivers/misc/tegra-profiler/armv8_pmu.c
3  *
4  * Copyright (c) 2014-2016, NVIDIA CORPORATION.  All rights reserved.
5  *
6  * This program is free software; you can redistribute it and/or modify it
7  * under the terms and conditions of the GNU General Public License,
8  * version 2, as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope it will be useful, but WITHOUT
11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13  * more details.
14  *
15  */
16
17 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
18
19 #include <linux/printk.h>
20 #include <linux/types.h>
21 #include <linux/string.h>
22
23 #include <linux/version.h>
24 #include <linux/err.h>
25 #include <linux/bitmap.h>
26 #include <linux/slab.h>
27
28 #include <asm/cputype.h>
29 #include <asm/cpu.h>
30
31 #include "arm_pmu.h"
32 #include "armv8_pmu.h"
33 #include "armv8_events.h"
34 #include "quadd.h"
35 #include "debug.h"
36
37 struct quadd_pmu_info {
38         DECLARE_BITMAP(used_cntrs, QUADD_MAX_PMU_COUNTERS);
39         u32 prev_vals[QUADD_MAX_PMU_COUNTERS];
40         int is_already_active;
41 };
42
43 struct quadd_cntrs_info {
44         int pcntrs;
45         int ccntr;
46
47         spinlock_t lock;
48 };
49
50 static DEFINE_PER_CPU(struct quadd_pmu_info, cpu_pmu_info);
51
52 static DEFINE_PER_CPU(struct quadd_pmu_ctx, pmu_ctx);
53
54 static unsigned
55 quadd_armv8_pmuv3_arm_events_map[QUADD_EVENT_TYPE_MAX] = {
56         [QUADD_EVENT_TYPE_INSTRUCTIONS] =
57                 QUADD_ARMV8_HW_EVENT_INSTR_EXECUTED,
58         [QUADD_EVENT_TYPE_BRANCH_INSTRUCTIONS] =
59                 QUADD_ARMV8_UNSUPPORTED_EVENT,
60         [QUADD_EVENT_TYPE_BRANCH_MISSES] =
61                 QUADD_ARMV8_HW_EVENT_PC_BRANCH_MIS_PRED,
62         [QUADD_EVENT_TYPE_BUS_CYCLES] =
63                 QUADD_ARMV8_UNSUPPORTED_EVENT,
64
65         [QUADD_EVENT_TYPE_L1_DCACHE_READ_MISSES] =
66                 QUADD_ARMV8_HW_EVENT_L1_DCACHE_REFILL,
67         [QUADD_EVENT_TYPE_L1_DCACHE_WRITE_MISSES] =
68                 QUADD_ARMV8_HW_EVENT_L1_DCACHE_REFILL,
69         [QUADD_EVENT_TYPE_L1_ICACHE_MISSES] =
70                 QUADD_ARMV8_HW_EVENT_L1_ICACHE_REFILL,
71
72         [QUADD_EVENT_TYPE_L2_DCACHE_READ_MISSES] =
73                 QUADD_ARMV8_HW_EVENT_L2_CACHE_REFILL,
74         [QUADD_EVENT_TYPE_L2_DCACHE_WRITE_MISSES] =
75                 QUADD_ARMV8_HW_EVENT_L2_CACHE_REFILL,
76         [QUADD_EVENT_TYPE_L2_ICACHE_MISSES] =
77                 QUADD_ARMV8_UNSUPPORTED_EVENT,
78 };
79
80 static unsigned
81 quadd_armv8_pmuv3_a57_events_map[QUADD_EVENT_TYPE_MAX] = {
82         [QUADD_EVENT_TYPE_INSTRUCTIONS] =
83                 QUADD_ARMV8_HW_EVENT_INSTR_EXECUTED,
84         [QUADD_EVENT_TYPE_BRANCH_INSTRUCTIONS] =
85                 QUADD_ARMV8_UNSUPPORTED_EVENT,
86         [QUADD_EVENT_TYPE_BRANCH_MISSES] =
87                 QUADD_ARMV8_HW_EVENT_PC_BRANCH_MIS_PRED,
88         [QUADD_EVENT_TYPE_BUS_CYCLES] =
89                 QUADD_ARMV8_UNSUPPORTED_EVENT,
90
91         [QUADD_EVENT_TYPE_L1_DCACHE_READ_MISSES] =
92                 QUADD_ARMV8_A57_HW_EVENT_L1D_CACHE_REFILL_LD,
93         [QUADD_EVENT_TYPE_L1_DCACHE_WRITE_MISSES] =
94                 QUADD_ARMV8_A57_HW_EVENT_L1D_CACHE_REFILL_ST,
95         [QUADD_EVENT_TYPE_L1_ICACHE_MISSES] =
96                 QUADD_ARMV8_HW_EVENT_L1_ICACHE_REFILL,
97
98         [QUADD_EVENT_TYPE_L2_DCACHE_READ_MISSES] =
99                 QUADD_ARMV8_A57_HW_EVENT_L2D_CACHE_REFILL_LD,
100         [QUADD_EVENT_TYPE_L2_DCACHE_WRITE_MISSES] =
101                 QUADD_ARMV8_A57_HW_EVENT_L2D_CACHE_REFILL_ST,
102         [QUADD_EVENT_TYPE_L2_ICACHE_MISSES] =
103                 QUADD_ARMV8_UNSUPPORTED_EVENT,
104 };
105
106 static unsigned
107 quadd_armv8_pmuv3_denver_events_map[QUADD_EVENT_TYPE_MAX] = {
108         [QUADD_EVENT_TYPE_INSTRUCTIONS] =
109                 QUADD_ARMV8_HW_EVENT_INSTR_EXECUTED,
110         [QUADD_EVENT_TYPE_BRANCH_INSTRUCTIONS] =
111                 QUADD_ARMV8_UNSUPPORTED_EVENT,
112         [QUADD_EVENT_TYPE_BRANCH_MISSES] =
113                 QUADD_ARMV8_HW_EVENT_PC_BRANCH_MIS_PRED,
114         [QUADD_EVENT_TYPE_BUS_CYCLES] =
115                 QUADD_ARMV8_UNSUPPORTED_EVENT,
116
117         [QUADD_EVENT_TYPE_L1_DCACHE_READ_MISSES] =
118                 QUADD_ARMV8_HW_EVENT_L1_DCACHE_REFILL,
119         [QUADD_EVENT_TYPE_L1_DCACHE_WRITE_MISSES] =
120                 QUADD_ARMV8_HW_EVENT_L1_DCACHE_REFILL,
121         [QUADD_EVENT_TYPE_L1_ICACHE_MISSES] =
122                 QUADD_ARMV8_HW_EVENT_L1_ICACHE_REFILL,
123
124         [QUADD_EVENT_TYPE_L2_DCACHE_READ_MISSES] =
125                 QUADD_ARMV8_UNSUPPORTED_EVENT,
126         [QUADD_EVENT_TYPE_L2_DCACHE_WRITE_MISSES] =
127                 QUADD_ARMV8_UNSUPPORTED_EVENT,
128         [QUADD_EVENT_TYPE_L2_ICACHE_MISSES] =
129                 QUADD_ARMV8_UNSUPPORTED_EVENT,
130 };
131
132 /*********************************************************************/
133
134 static inline u32
135 armv8_pmu_pmcr_read(void)
136 {
137         u32 val;
138
139         /* Read Performance Monitors Control Register */
140         asm volatile("mrs %0, pmcr_el0" : "=r" (val));
141         return val;
142 }
143
144 static inline void
145 armv8_pmu_pmcr_write(u32 val)
146 {
147         asm volatile("msr pmcr_el0, %0" : :
148                      "r" (val & QUADD_ARMV8_PMCR_WR_MASK));
149 }
150
151 static inline u32
152 armv8_pmu_pmceid_read(void)
153 {
154         u32 val;
155
156         /* Read Performance Monitors Common Event Identification Register */
157         asm volatile("mrs %0, pmceid0_el0" : "=r" (val));
158         return val;
159 }
160
161 static inline u32
162 armv8_pmu_pmcntenset_read(void)
163 {
164         u32 val;
165
166         /* Read Performance Monitors Count Enable Set Register */
167         asm volatile("mrs %0, pmcntenset_el0" : "=r" (val));
168         return val;
169 }
170
171 static inline void
172 armv8_pmu_pmcntenset_write(u32 val)
173 {
174         /* Write Performance Monitors Count Enable Set Register */
175         asm volatile("msr pmcntenset_el0, %0" : : "r" (val));
176 }
177
178 static inline void
179 armv8_pmu_pmcntenclr_write(u32 val)
180 {
181         /* Write Performance Monitors Count Enable Clear Register */
182         asm volatile("msr pmcntenclr_el0, %0" : : "r" (val));
183 }
184
185 static inline void
186 armv8_pmu_pmselr_write(u32 val)
187 {
188         /* Write Performance Monitors Event Counter Selection Register */
189         asm volatile("msr pmselr_el0, %0" : :
190                      "r" (val & QUADD_ARMV8_SELECT_MASK));
191 }
192
193 static inline u64
194 armv8_pmu_pmccntr_read(void)
195 {
196         u64 val;
197
198         /* Read Performance Monitors Cycle Count Register */
199         asm volatile("mrs %0, pmccntr_el0" : "=r" (val));
200         return val;
201 }
202
203 static inline void
204 armv8_pmu_pmccntr_write(u64 val)
205 {
206         /* Write Performance Monitors Selected Event Count Register */
207         asm volatile("msr pmccntr_el0, %0" : : "r" (val));
208 }
209
210 static inline u32
211 armv8_pmu_pmxevcntr_read(void)
212 {
213         u32 val;
214
215         /* Read Performance Monitors Selected Event Count Register */
216         asm volatile("mrs %0, pmxevcntr_el0" : "=r" (val));
217         return val;
218 }
219
220 static inline void
221 armv8_pmu_pmxevcntr_write(u32 val)
222 {
223         /* Write Performance Monitors Selected Event Count Register */
224         asm volatile("msr pmxevcntr_el0, %0" : : "r" (val));
225 }
226
227 static inline void
228 armv8_pmu_pmxevtyper_write(u32 event)
229 {
230         /* Write Performance Monitors Selected Event Type Register */
231         asm volatile("msr pmxevtyper_el0, %0" : :
232                      "r" (event & QUADD_ARMV8_EVTSEL_MASK));
233 }
234
235 static inline u32 __maybe_unused
236 armv8_pmu_pmintenset_read(void)
237 {
238         u32 val;
239
240         /* Read Performance Monitors Interrupt Enable Set Register */
241         asm volatile("mrs %0, pmintenset_el1" : "=r" (val));
242         return val;
243 }
244
245 static inline void __maybe_unused
246 armv8_pmu_pmintenset_write(u32 val)
247 {
248         /* Write Performance Monitors Interrupt Enable Set Register */
249         asm volatile("msr pmintenset_el1, %0" : : "r" (val));
250 }
251
252 static inline void __maybe_unused
253 armv8_pmu_pmintenclr_write(u32 val)
254 {
255         /* Write Performance Monitors Interrupt Enable Clear Register */
256         asm volatile("msr pmintenclr_el1, %0" : : "r" (val));
257 }
258
259 static inline u32 __maybe_unused
260 armv8_pmu_pmovsclr_read(void)
261 {
262         u32 val;
263
264         /* Read Performance Monitors Overflow Flag Status Register */
265         asm volatile("mrs %0, pmovsclr_el0" : "=r" (val));
266         return val;
267 }
268
269 static inline void
270 armv8_pmu_pmovsclr_write(int idx)
271 {
272         /* Write Performance Monitors Overflow Flag Status Register */
273         asm volatile("msr pmovsclr_el0, %0" : : "r" (BIT(idx)));
274 }
275
276 static inline u32
277 armv8_id_afr0_el1_read(void)
278 {
279         u32 val;
280
281         /* Read Auxiliary Feature Register 0 */
282         asm volatile("mrs %0, id_afr0_el1" : "=r" (val));
283         return val;
284 }
285
286 static void enable_counter(int idx)
287 {
288         armv8_pmu_pmcntenset_write(BIT(idx));
289 }
290
291 static void disable_counter(int idx)
292 {
293         armv8_pmu_pmcntenclr_write(BIT(idx));
294 }
295
296 static void select_counter(unsigned int counter)
297 {
298         armv8_pmu_pmselr_write(counter);
299 }
300
301 static int is_pmu_enabled(void)
302 {
303         struct quadd_pmu_ctx *local_pmu_ctx = &__get_cpu_var(pmu_ctx);
304         u32 pmcr = armv8_pmu_pmcr_read();
305
306         if (pmcr & QUADD_ARMV8_PMCR_E) {
307                 u32 pmcnten = armv8_pmu_pmcntenset_read();
308
309                 pmcnten &= local_pmu_ctx->counters_mask | QUADD_ARMV8_CCNT;
310                 return pmcnten ? 1 : 0;
311         }
312
313         return 0;
314 }
315
316 static u32 read_counter(int idx)
317 {
318         u32 val;
319
320         if (idx == QUADD_ARMV8_CCNT_BIT) {
321                 val = armv8_pmu_pmccntr_read();
322         } else {
323                 select_counter(idx);
324                 val = armv8_pmu_pmxevcntr_read();
325         }
326
327         return val;
328 }
329
330 static void write_counter(int idx, u32 value)
331 {
332         if (idx == QUADD_ARMV8_CCNT_BIT) {
333                 armv8_pmu_pmccntr_write(value);
334         } else {
335                 select_counter(idx);
336                 armv8_pmu_pmxevcntr_write(value);
337         }
338 }
339
340 static int
341 get_free_counters(unsigned long *bitmap, int nbits, int *ccntr)
342 {
343         int cc;
344         u32 cntens;
345         unsigned long cntens_bitmap;
346
347         struct quadd_pmu_ctx *local_pmu_ctx = &__get_cpu_var(pmu_ctx);
348
349         cntens = armv8_pmu_pmcntenset_read();
350         cntens = ~cntens & (local_pmu_ctx->counters_mask | QUADD_ARMV8_CCNT);
351
352         cntens_bitmap = cntens;
353
354         bitmap_zero(bitmap, nbits);
355         bitmap_copy(bitmap, &cntens_bitmap, BITS_PER_BYTE * sizeof(u32));
356
357         cc = (cntens & QUADD_ARMV8_CCNT) ? 1 : 0;
358
359         if (ccntr)
360                 *ccntr = cc;
361
362         return bitmap_weight(bitmap, BITS_PER_BYTE * sizeof(u32)) - cc;
363 }
364
365 static void __maybe_unused
366 disable_interrupt(int idx)
367 {
368         armv8_pmu_pmintenclr_write(BIT(idx));
369 }
370
371 static void
372 disable_all_interrupts(void)
373 {
374         struct quadd_pmu_ctx *local_pmu_ctx = &__get_cpu_var(pmu_ctx);
375         u32 val = QUADD_ARMV8_CCNT | local_pmu_ctx->counters_mask;
376
377         armv8_pmu_pmintenclr_write(val);
378 }
379
380 static void
381 reset_overflow_flags(void)
382 {
383         struct quadd_pmu_ctx *local_pmu_ctx = &__get_cpu_var(pmu_ctx);
384         u32 val = QUADD_ARMV8_CCNT | local_pmu_ctx->counters_mask;
385
386         armv8_pmu_pmovsclr_write(val);
387 }
388
389 static void
390 select_event(unsigned int idx, unsigned int event)
391 {
392         select_counter(idx);
393         armv8_pmu_pmxevtyper_write(event);
394 }
395
396 static void disable_all_counters(void)
397 {
398         u32 val;
399         u32 masked;
400         struct quadd_pmu_ctx *local_pmu_ctx = &__get_cpu_var(pmu_ctx);
401
402         /* Disable all counters */
403         val = armv8_pmu_pmcr_read();
404         if (val & QUADD_ARMV8_PMCR_E)
405                 armv8_pmu_pmcr_write(val & ~QUADD_ARMV8_PMCR_E);
406
407         masked = QUADD_ARMV8_CCNT | local_pmu_ctx->counters_mask;
408         armv8_pmu_pmcntenclr_write(masked);
409 }
410
411 static void enable_all_counters(void)
412 {
413         u32 val;
414
415         /* Enable all counters */
416         val = armv8_pmu_pmcr_read();
417         val |= QUADD_ARMV8_PMCR_E | QUADD_ARMV8_PMCR_X;
418         armv8_pmu_pmcr_write(val);
419 }
420
421 static void reset_all_counters(void)
422 {
423         u32 val;
424
425         val = armv8_pmu_pmcr_read();
426         val |= QUADD_ARMV8_PMCR_P | QUADD_ARMV8_PMCR_C;
427         armv8_pmu_pmcr_write(val);
428 }
429
430 static void quadd_init_pmu(void)
431 {
432         reset_overflow_flags();
433         disable_all_interrupts();
434 }
435
436 static int pmu_enable(void)
437 {
438         pr_info("pmu was reserved\n");
439         return 0;
440 }
441
442 static void __pmu_disable(void *arg)
443 {
444         struct quadd_pmu_info *pi = &__get_cpu_var(cpu_pmu_info);
445
446         if (!pi->is_already_active) {
447                 pr_info("[%d] reset all counters\n",
448                         smp_processor_id());
449
450                 disable_all_counters();
451                 reset_all_counters();
452         } else {
453                 int idx;
454
455                 for_each_set_bit(idx, pi->used_cntrs, QUADD_MAX_PMU_COUNTERS) {
456                         pr_info("[%d] reset counter: %d\n",
457                                 smp_processor_id(), idx);
458
459                         disable_counter(idx);
460                         write_counter(idx, 0);
461                 }
462         }
463 }
464
465 static void pmu_disable(void)
466 {
467         on_each_cpu(__pmu_disable, NULL, 1);
468         pr_info("pmu was released\n");
469 }
470
471 static void pmu_start(void)
472 {
473         int idx = 0, pcntrs, ccntr;
474         u32 event;
475         struct quadd_pmu_ctx *local_pmu_ctx;
476         DECLARE_BITMAP(free_bitmap, QUADD_MAX_PMU_COUNTERS);
477         struct quadd_pmu_info *pi = &__get_cpu_var(cpu_pmu_info);
478         u32 *prevp = pi->prev_vals;
479         struct quadd_pmu_event_info *ei;
480
481         bitmap_zero(pi->used_cntrs, QUADD_MAX_PMU_COUNTERS);
482
483         if (is_pmu_enabled()) {
484                 pi->is_already_active = 1;
485         } else {
486                 disable_all_counters();
487                 quadd_init_pmu();
488
489                 pi->is_already_active = 0;
490         }
491
492         pcntrs = get_free_counters(free_bitmap, QUADD_MAX_PMU_COUNTERS, &ccntr);
493
494         local_pmu_ctx = &__get_cpu_var(pmu_ctx);
495         list_for_each_entry(ei, &local_pmu_ctx->used_events, list) {
496                 int index;
497
498                 *prevp++ = 0;
499
500                 event = ei->hw_value;
501
502                 if (ei->quadd_event_id == QUADD_EVENT_TYPE_CPU_CYCLES) {
503                         if (!ccntr) {
504                                 pr_err_once("Error: cpu cycles counter is already occupied\n");
505                                 return;
506                         }
507                         index = QUADD_ARMV8_CCNT_BIT;
508                 } else {
509                         if (!pcntrs--) {
510                                 pr_err_once("Error: too many performance events\n");
511                                 return;
512                         }
513
514                         index = find_next_bit(free_bitmap,
515                                               QUADD_MAX_PMU_COUNTERS, idx);
516                         if (index >= QUADD_MAX_PMU_COUNTERS) {
517                                 pr_err_once("Error: too many events\n");
518                                 return;
519                         }
520                         idx = index + 1;
521                         select_event(index, event);
522                 }
523                 set_bit(index, pi->used_cntrs);
524
525                 write_counter(index, 0);
526                 enable_counter(index);
527         }
528
529         if (!pi->is_already_active) {
530                 reset_all_counters();
531                 enable_all_counters();
532         }
533
534         qm_debug_start_source(QUADD_EVENT_SOURCE_PMU);
535 }
536
537 static void pmu_stop(void)
538 {
539         int idx;
540         struct quadd_pmu_info *pi = &__get_cpu_var(cpu_pmu_info);
541
542         if (!pi->is_already_active) {
543                 disable_all_counters();
544                 reset_all_counters();
545         } else {
546                 for_each_set_bit(idx, pi->used_cntrs, QUADD_MAX_PMU_COUNTERS) {
547                         disable_counter(idx);
548                         write_counter(idx, 0);
549                 }
550         }
551
552         qm_debug_stop_source(QUADD_EVENT_SOURCE_PMU);
553 }
554
555 static int __maybe_unused
556 pmu_read(struct event_data *events, int max_events)
557 {
558         u32 val;
559         int idx = 0, i = 0;
560         struct quadd_pmu_info *pi = &__get_cpu_var(cpu_pmu_info);
561         struct quadd_pmu_ctx *local_pmu_ctx = &__get_cpu_var(pmu_ctx);
562         u32 *prevp = pi->prev_vals;
563         struct quadd_pmu_event_info *ei;
564
565         if (bitmap_empty(pi->used_cntrs, QUADD_MAX_PMU_COUNTERS)) {
566                 pr_err_once("Error: counters were not initialized\n");
567                 return 0;
568         }
569
570         list_for_each_entry(ei, &local_pmu_ctx->used_events, list) {
571                 int index;
572
573                 if (ei->quadd_event_id == QUADD_EVENT_TYPE_CPU_CYCLES) {
574                         if (!test_bit(QUADD_ARMV8_CCNT_BIT, pi->used_cntrs)) {
575                                 pr_err_once("Error: ccntr is not used\n");
576                                 return 0;
577                         }
578                         index = QUADD_ARMV8_CCNT_BIT;
579                 } else {
580                         index = find_next_bit(pi->used_cntrs,
581                                               QUADD_MAX_PMU_COUNTERS, idx);
582                         idx = index + 1;
583
584                         if (index >= QUADD_MAX_PMU_COUNTERS) {
585                                 pr_err_once("Error: perf counter is not used\n");
586                                 return 0;
587                         }
588                 }
589
590                 val = read_counter(index);
591
592                 events->event_source = QUADD_EVENT_SOURCE_PMU;
593                 events->event_id = ei->quadd_event_id;
594
595                 events->val = val;
596                 events->prev_val = *prevp;
597
598                 *prevp = val;
599
600                 qm_debug_read_counter(events->event_id, events->prev_val,
601                                       events->val);
602
603                 if (++i >= max_events)
604                         break;
605
606                 events++;
607                 prevp++;
608         }
609
610         return i;
611 }
612
613 static int __maybe_unused
614 pmu_read_emulate(struct event_data *events, int max_events)
615 {
616         int i = 0;
617         static u32 val = 100;
618         struct quadd_pmu_info *pi = &__get_cpu_var(cpu_pmu_info);
619         u32 *prevp = pi->prev_vals;
620         struct quadd_pmu_event_info *ei;
621
622         struct quadd_pmu_ctx *local_pmu_ctx = &__get_cpu_var(pmu_ctx);
623
624         list_for_each_entry(ei, &local_pmu_ctx->used_events, list) {
625                 if (val > 200)
626                         val = 100;
627
628                 events->event_id = *prevp;
629                 events->val = val;
630
631                 *prevp = val;
632                 val += 5;
633
634                 if (++i >= max_events)
635                         break;
636
637                 events++;
638                 prevp++;
639         }
640
641         return i;
642 }
643
644 static void __get_free_counters(void *arg)
645 {
646         int pcntrs, ccntr;
647         DECLARE_BITMAP(free_bitmap, QUADD_MAX_PMU_COUNTERS);
648         struct quadd_cntrs_info *ci = arg;
649
650         pcntrs = get_free_counters(free_bitmap, QUADD_MAX_PMU_COUNTERS, &ccntr);
651
652         spin_lock(&ci->lock);
653
654         ci->pcntrs = min_t(int, pcntrs, ci->pcntrs);
655
656         if (!ccntr)
657                 ci->ccntr = 0;
658
659         pr_info("[%d] pcntrs/ccntr: %d/%d, free_bitmap: %#lx\n",
660                 smp_processor_id(), pcntrs, ccntr, free_bitmap[0]);
661
662         spin_unlock(&ci->lock);
663 }
664
665 static void free_events(struct list_head *head)
666 {
667         struct quadd_pmu_event_info *entry, *next;
668
669         list_for_each_entry_safe(entry, next, head, list) {
670                 list_del(&entry->list);
671                 kfree(entry);
672         }
673 }
674
675 static int set_events(int cpuid, int *events, int size)
676 {
677         int free_pcntrs, err;
678         int i, nr_l1_r = 0, nr_l1_w = 0;
679         struct quadd_cntrs_info free_ci;
680
681         struct quadd_pmu_ctx *local_pmu_ctx = &per_cpu(pmu_ctx, cpuid);
682
683
684         local_pmu_ctx->l1_cache_rw = 0;
685
686         free_events(&local_pmu_ctx->used_events);
687
688         if (!events || !size)
689                 return 0;
690
691         if (!local_pmu_ctx->current_map) {
692                 pr_err("Invalid current_map\n");
693                 return -ENODEV;
694         }
695
696         spin_lock_init(&free_ci.lock);
697         free_ci.pcntrs = QUADD_MAX_PMU_COUNTERS;
698         free_ci.ccntr = 1;
699
700         smp_call_function_single(cpuid, __get_free_counters, &free_ci, 1);
701
702         free_pcntrs = free_ci.pcntrs;
703         pr_info("free counters: pcntrs/ccntr: %d/%d\n",
704                 free_pcntrs, free_ci.ccntr);
705
706         pr_info("event identification register: %#x\n",
707                 armv8_pmu_pmceid_read());
708
709         for (i = 0; i < size; i++) {
710                 struct quadd_pmu_event_info *ei;
711
712                 if (events[i] > QUADD_EVENT_TYPE_MAX) {
713                         pr_err("error event: %d\n", events[i]);
714                         err = -EINVAL;
715                         goto out_free;
716                 }
717
718                 ei = kzalloc(sizeof(*ei), GFP_KERNEL);
719                 if (!ei) {
720                         err = -ENOMEM;
721                         goto out_free;
722                 }
723
724                 INIT_LIST_HEAD(&ei->list);
725                 list_add_tail(&ei->list, &local_pmu_ctx->used_events);
726
727                 if (events[i] == QUADD_EVENT_TYPE_CPU_CYCLES) {
728                         ei->hw_value = QUADD_ARMV8_CPU_CYCLE_EVENT;
729                         if (!free_ci.ccntr) {
730                                 pr_err("error: cpu cycles counter is already occupied\n");
731                                 err = -EBUSY;
732                                 goto out_free;
733                         }
734                 } else {
735                         if (!free_pcntrs--) {
736                                 pr_err("error: too many performance events\n");
737                                 err = -ENOSPC;
738                                 goto out_free;
739                         }
740
741                         ei->hw_value = local_pmu_ctx->current_map[events[i]];
742                 }
743
744                 ei->quadd_event_id = events[i];
745
746                 if (events[i] == QUADD_EVENT_TYPE_L1_DCACHE_READ_MISSES)
747                         nr_l1_r++;
748                 else if (events[i] == QUADD_EVENT_TYPE_L1_DCACHE_WRITE_MISSES)
749                         nr_l1_w++;
750
751                 pr_info("Event has been added: id/pmu value: %s/%#x\n",
752                         quadd_get_event_str(events[i]),
753                         ei->hw_value);
754         }
755
756         if (nr_l1_r > 0 && nr_l1_w > 0)
757                 local_pmu_ctx->l1_cache_rw = 1;
758
759         return 0;
760
761 out_free:
762         free_events(&local_pmu_ctx->used_events);
763         return err;
764 }
765
766 static int get_supported_events(int cpuid, int *events, int max_events)
767 {
768         int i, nr_events = 0;
769
770         struct quadd_pmu_ctx *local_pmu_ctx = &per_cpu(pmu_ctx, cpuid);
771
772         max_events = min_t(int, QUADD_EVENT_TYPE_MAX, max_events);
773
774         for (i = 0; i < max_events; i++) {
775                 unsigned int event = local_pmu_ctx->current_map[i];
776
777                 if (event != QUADD_ARMV8_UNSUPPORTED_EVENT)
778                         events[nr_events++] = i;
779         }
780         return nr_events;
781 }
782
783 static int get_current_events(int cpuid, int *events, int max_events)
784 {
785         int i = 0;
786         struct quadd_pmu_event_info *ei;
787
788         struct quadd_pmu_ctx *local_pmu_ctx = &per_cpu(pmu_ctx, cpuid);
789
790
791         list_for_each_entry(ei, &local_pmu_ctx->used_events, list) {
792                 events[i++] = ei->quadd_event_id;
793
794                 if (i >= max_events)
795                         break;
796         }
797
798         return i;
799 }
800
801 static struct quadd_arch_info *get_arch(int cpuid)
802 {
803         struct quadd_pmu_ctx *local_pmu_ctx = &per_cpu(pmu_ctx, cpuid);
804
805         return &local_pmu_ctx->arch;
806 }
807
808 static struct quadd_event_source_interface pmu_armv8_int = {
809         .enable                 = pmu_enable,
810         .disable                = pmu_disable,
811
812         .start                  = pmu_start,
813         .stop                   = pmu_stop,
814
815 #ifndef QUADD_USE_EMULATE_COUNTERS
816         .read                   = pmu_read,
817 #else
818         .read                   = pmu_read_emulate,
819 #endif
820         .set_events             = set_events,
821         .get_supported_events   = get_supported_events,
822         .get_current_events     = get_current_events,
823         .get_arch               = get_arch,
824 };
825
826 static int quadd_armv8_pmu_init_for_cpu(int cpuid)
827 {
828         u32 pmcr;
829         u32 idcode = 0;
830         int err = 0;
831         int idx;
832         struct cpuinfo_arm64 *local_cpu_data = &per_cpu(cpu_data, cpuid);
833         struct quadd_pmu_ctx *local_pmu_ctx = &per_cpu(pmu_ctx, cpuid);
834         u32 reg_midr = local_cpu_data->reg_midr;
835         u32 ext_ver;
836         u64 aa64_dfr;
837
838         char implementer = (reg_midr >> 24) & 0xFF;
839
840         strncpy(local_pmu_ctx->arch.name, "Unknown",
841                         sizeof(local_pmu_ctx->arch.name));
842
843         local_pmu_ctx->arch.type = QUADD_AA64_CPU_TYPE_UNKNOWN;
844         local_pmu_ctx->arch.ver = 0;
845         local_pmu_ctx->current_map = NULL;
846
847         aa64_dfr = read_cpuid(ID_AA64DFR0_EL1);
848         aa64_dfr = (aa64_dfr >> 8) & 0x0f;
849
850         if (aa64_dfr != QUADD_AA64_PMUVER_PMUV3)
851                 err = 1;
852
853         if (err == 0 && (implementer == 'A' || implementer == 'N')) {
854
855                 strncpy(local_pmu_ctx->arch.name, "AA64 PmuV3",
856                                 sizeof(local_pmu_ctx->arch.name));
857
858                 idx = sizeof(local_pmu_ctx->arch.name) - 1;
859                 local_pmu_ctx->arch.name[idx] = '\0';
860
861                 local_pmu_ctx->counters_mask =
862                         QUADD_ARMV8_COUNTERS_MASK_PMUV3;
863                 local_pmu_ctx->current_map =
864                         quadd_armv8_pmuv3_arm_events_map;
865
866                 pmcr = armv8_pmu_pmcr_read();
867
868                 idcode = (pmcr >> QUADD_ARMV8_PMCR_IDCODE_SHIFT) &
869                         QUADD_ARMV8_PMCR_IDCODE_MASK;
870
871                 pr_info("imp: %#x, idcode: %#x\n", implementer, idcode);
872         }
873
874         if (err == 0) {
875                 switch (implementer) {
876                 case 'A':
877                         strncat(local_pmu_ctx->arch.name, " ARM",
878                                 sizeof(local_pmu_ctx->arch.name) -
879                                 strlen(local_pmu_ctx->arch.name));
880                         idx = sizeof(local_pmu_ctx->arch.name) - 1;
881                         local_pmu_ctx->arch.name[idx] = '\0';
882
883                         if (idcode == QUADD_AA64_CPU_IDCODE_CORTEX_A53) {
884                                 local_pmu_ctx->arch.type =
885                                         QUADD_AA64_CPU_TYPE_CORTEX_A53;
886
887                                 strncat(local_pmu_ctx->arch.name, " CORTEX-A53",
888                                         sizeof(local_pmu_ctx->arch.name) -
889                                         strlen(local_pmu_ctx->arch.name));
890
891                         } else if (idcode == QUADD_AA64_CPU_IDCODE_CORTEX_A57) {
892                                 local_pmu_ctx->arch.type =
893                                         QUADD_AA64_CPU_TYPE_CORTEX_A57;
894                                 local_pmu_ctx->current_map =
895                                         quadd_armv8_pmuv3_a57_events_map;
896
897                                 strncat(local_pmu_ctx->arch.name, " CORTEX-A57",
898                                         sizeof(local_pmu_ctx->arch.name) -
899                                         strlen(local_pmu_ctx->arch.name));
900                         } else {
901                                 local_pmu_ctx->arch.type =
902                                         QUADD_AA64_CPU_TYPE_ARM;
903                         }
904                         break;
905                 case 'N':
906                         ext_ver = armv8_id_afr0_el1_read();
907
908                         ext_ver = (ext_ver >> QUADD_ARMV8_PMU_NVEXT_SHIFT) &
909                                 QUADD_ARMV8_PMU_NVEXT_MASK;
910
911                         strncat(local_pmu_ctx->arch.name, " NVIDIA (Denver)",
912                                 sizeof(local_pmu_ctx->arch.name) -
913                                 strlen(local_pmu_ctx->arch.name));
914                         local_pmu_ctx->arch.type = QUADD_AA64_CPU_TYPE_DENVER;
915                         local_pmu_ctx->arch.ver = ext_ver;
916                         local_pmu_ctx->current_map =
917                                 quadd_armv8_pmuv3_denver_events_map;
918                         break;
919                 default:
920                         strncat(local_pmu_ctx->arch.name,
921                                 " Unknown implementor code",
922                                 sizeof(local_pmu_ctx->arch.name) -
923                                 strlen(local_pmu_ctx->arch.name));
924                         local_pmu_ctx->arch.type =
925                                 QUADD_AA64_CPU_TYPE_UNKNOWN_IMP;
926                         err = 1;
927                         break;
928                 }
929         }
930
931         local_pmu_ctx->arch.name[sizeof(local_pmu_ctx->arch.name) - 1] = '\0';
932         pr_info("arch: %s, type: %d, ver: %d\n",
933                 local_pmu_ctx->arch.name, local_pmu_ctx->arch.type,
934                 local_pmu_ctx->arch.ver);
935
936         INIT_LIST_HEAD(&local_pmu_ctx->used_events);
937         return err;
938 }
939
940 struct quadd_event_source_interface *quadd_armv8_pmu_init(void)
941 {
942         struct quadd_event_source_interface *pmu = NULL;
943         int cpuid;
944         int err;
945         int initialized = 1;
946
947         for_each_possible_cpu(cpuid) {
948                 err = quadd_armv8_pmu_init_for_cpu(cpuid);
949                 if (err) {
950                         initialized = 0;
951                         break;
952                 }
953         }
954
955         if (initialized == 1)
956                 pmu = &pmu_armv8_int;
957         else
958                 pr_err("error: incorrect PMUVer\n");
959
960         return pmu;
961 }
962
963 void quadd_armv8_pmu_deinit(void)
964 {
965         int cpu_id;
966
967         for_each_possible_cpu(cpu_id) {
968                 struct quadd_pmu_ctx *local_pmu_ctx = &per_cpu(pmu_ctx, cpu_id);
969
970                 free_events(&local_pmu_ctx->used_events);
971         }
972 }