perf: Rework the PMU methods
[linux-2.6.git] / arch / powerpc / kernel / perf_event_fsl_emb.c
1 /*
2  * Performance event support - Freescale Embedded Performance Monitor
3  *
4  * Copyright 2008-2009 Paul Mackerras, IBM Corporation.
5  * Copyright 2010 Freescale Semiconductor, Inc.
6  *
7  * This program is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License
9  * as published by the Free Software Foundation; either version
10  * 2 of the License, or (at your option) any later version.
11  */
12 #include <linux/kernel.h>
13 #include <linux/sched.h>
14 #include <linux/perf_event.h>
15 #include <linux/percpu.h>
16 #include <linux/hardirq.h>
17 #include <asm/reg_fsl_emb.h>
18 #include <asm/pmc.h>
19 #include <asm/machdep.h>
20 #include <asm/firmware.h>
21 #include <asm/ptrace.h>
22
23 struct cpu_hw_events {
24         int n_events;
25         int disabled;
26         u8  pmcs_enabled;
27         struct perf_event *event[MAX_HWEVENTS];
28 };
29 static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events);
30
31 static struct fsl_emb_pmu *ppmu;
32
33 /* Number of perf_events counting hardware events */
34 static atomic_t num_events;
35 /* Used to avoid races in calling reserve/release_pmc_hardware */
36 static DEFINE_MUTEX(pmc_reserve_mutex);
37
38 /*
39  * If interrupts were soft-disabled when a PMU interrupt occurs, treat
40  * it as an NMI.
41  */
42 static inline int perf_intr_is_nmi(struct pt_regs *regs)
43 {
44 #ifdef __powerpc64__
45         return !regs->softe;
46 #else
47         return 0;
48 #endif
49 }
50
51 static void perf_event_interrupt(struct pt_regs *regs);
52
53 /*
54  * Read one performance monitor counter (PMC).
55  */
56 static unsigned long read_pmc(int idx)
57 {
58         unsigned long val;
59
60         switch (idx) {
61         case 0:
62                 val = mfpmr(PMRN_PMC0);
63                 break;
64         case 1:
65                 val = mfpmr(PMRN_PMC1);
66                 break;
67         case 2:
68                 val = mfpmr(PMRN_PMC2);
69                 break;
70         case 3:
71                 val = mfpmr(PMRN_PMC3);
72                 break;
73         default:
74                 printk(KERN_ERR "oops trying to read PMC%d\n", idx);
75                 val = 0;
76         }
77         return val;
78 }
79
80 /*
81  * Write one PMC.
82  */
83 static void write_pmc(int idx, unsigned long val)
84 {
85         switch (idx) {
86         case 0:
87                 mtpmr(PMRN_PMC0, val);
88                 break;
89         case 1:
90                 mtpmr(PMRN_PMC1, val);
91                 break;
92         case 2:
93                 mtpmr(PMRN_PMC2, val);
94                 break;
95         case 3:
96                 mtpmr(PMRN_PMC3, val);
97                 break;
98         default:
99                 printk(KERN_ERR "oops trying to write PMC%d\n", idx);
100         }
101
102         isync();
103 }
104
105 /*
106  * Write one local control A register
107  */
108 static void write_pmlca(int idx, unsigned long val)
109 {
110         switch (idx) {
111         case 0:
112                 mtpmr(PMRN_PMLCA0, val);
113                 break;
114         case 1:
115                 mtpmr(PMRN_PMLCA1, val);
116                 break;
117         case 2:
118                 mtpmr(PMRN_PMLCA2, val);
119                 break;
120         case 3:
121                 mtpmr(PMRN_PMLCA3, val);
122                 break;
123         default:
124                 printk(KERN_ERR "oops trying to write PMLCA%d\n", idx);
125         }
126
127         isync();
128 }
129
130 /*
131  * Write one local control B register
132  */
133 static void write_pmlcb(int idx, unsigned long val)
134 {
135         switch (idx) {
136         case 0:
137                 mtpmr(PMRN_PMLCB0, val);
138                 break;
139         case 1:
140                 mtpmr(PMRN_PMLCB1, val);
141                 break;
142         case 2:
143                 mtpmr(PMRN_PMLCB2, val);
144                 break;
145         case 3:
146                 mtpmr(PMRN_PMLCB3, val);
147                 break;
148         default:
149                 printk(KERN_ERR "oops trying to write PMLCB%d\n", idx);
150         }
151
152         isync();
153 }
154
155 static void fsl_emb_pmu_read(struct perf_event *event)
156 {
157         s64 val, delta, prev;
158
159         if (event->hw.state & PERF_HES_STOPPED)
160                 return;
161
162         /*
163          * Performance monitor interrupts come even when interrupts
164          * are soft-disabled, as long as interrupts are hard-enabled.
165          * Therefore we treat them like NMIs.
166          */
167         do {
168                 prev = local64_read(&event->hw.prev_count);
169                 barrier();
170                 val = read_pmc(event->hw.idx);
171         } while (local64_cmpxchg(&event->hw.prev_count, prev, val) != prev);
172
173         /* The counters are only 32 bits wide */
174         delta = (val - prev) & 0xfffffffful;
175         local64_add(delta, &event->count);
176         local64_sub(delta, &event->hw.period_left);
177 }
178
179 /*
180  * Disable all events to prevent PMU interrupts and to allow
181  * events to be added or removed.
182  */
183 static void fsl_emb_pmu_disable(struct pmu *pmu)
184 {
185         struct cpu_hw_events *cpuhw;
186         unsigned long flags;
187
188         local_irq_save(flags);
189         cpuhw = &__get_cpu_var(cpu_hw_events);
190
191         if (!cpuhw->disabled) {
192                 cpuhw->disabled = 1;
193
194                 /*
195                  * Check if we ever enabled the PMU on this cpu.
196                  */
197                 if (!cpuhw->pmcs_enabled) {
198                         ppc_enable_pmcs();
199                         cpuhw->pmcs_enabled = 1;
200                 }
201
202                 if (atomic_read(&num_events)) {
203                         /*
204                          * Set the 'freeze all counters' bit, and disable
205                          * interrupts.  The barrier is to make sure the
206                          * mtpmr has been executed and the PMU has frozen
207                          * the events before we return.
208                          */
209
210                         mtpmr(PMRN_PMGC0, PMGC0_FAC);
211                         isync();
212                 }
213         }
214         local_irq_restore(flags);
215 }
216
217 /*
218  * Re-enable all events if disable == 0.
219  * If we were previously disabled and events were added, then
220  * put the new config on the PMU.
221  */
222 static void fsl_emb_pmu_enable(struct pmu *pmu)
223 {
224         struct cpu_hw_events *cpuhw;
225         unsigned long flags;
226
227         local_irq_save(flags);
228         cpuhw = &__get_cpu_var(cpu_hw_events);
229         if (!cpuhw->disabled)
230                 goto out;
231
232         cpuhw->disabled = 0;
233         ppc_set_pmu_inuse(cpuhw->n_events != 0);
234
235         if (cpuhw->n_events > 0) {
236                 mtpmr(PMRN_PMGC0, PMGC0_PMIE | PMGC0_FCECE);
237                 isync();
238         }
239
240  out:
241         local_irq_restore(flags);
242 }
243
244 static int collect_events(struct perf_event *group, int max_count,
245                           struct perf_event *ctrs[])
246 {
247         int n = 0;
248         struct perf_event *event;
249
250         if (!is_software_event(group)) {
251                 if (n >= max_count)
252                         return -1;
253                 ctrs[n] = group;
254                 n++;
255         }
256         list_for_each_entry(event, &group->sibling_list, group_entry) {
257                 if (!is_software_event(event) &&
258                     event->state != PERF_EVENT_STATE_OFF) {
259                         if (n >= max_count)
260                                 return -1;
261                         ctrs[n] = event;
262                         n++;
263                 }
264         }
265         return n;
266 }
267
268 /* context locked on entry */
269 static int fsl_emb_pmu_add(struct perf_event *event, int flags)
270 {
271         struct cpu_hw_events *cpuhw;
272         int ret = -EAGAIN;
273         int num_counters = ppmu->n_counter;
274         u64 val;
275         int i;
276
277         perf_pmu_disable(event->pmu);
278         cpuhw = &get_cpu_var(cpu_hw_events);
279
280         if (event->hw.config & FSL_EMB_EVENT_RESTRICTED)
281                 num_counters = ppmu->n_restricted;
282
283         /*
284          * Allocate counters from top-down, so that restricted-capable
285          * counters are kept free as long as possible.
286          */
287         for (i = num_counters - 1; i >= 0; i--) {
288                 if (cpuhw->event[i])
289                         continue;
290
291                 break;
292         }
293
294         if (i < 0)
295                 goto out;
296
297         event->hw.idx = i;
298         cpuhw->event[i] = event;
299         ++cpuhw->n_events;
300
301         val = 0;
302         if (event->hw.sample_period) {
303                 s64 left = local64_read(&event->hw.period_left);
304                 if (left < 0x80000000L)
305                         val = 0x80000000L - left;
306         }
307         local64_set(&event->hw.prev_count, val);
308
309         if (!(flags & PERF_EF_START)) {
310                 event->hw.state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
311                 val = 0;
312         }
313
314         write_pmc(i, val);
315         perf_event_update_userpage(event);
316
317         write_pmlcb(i, event->hw.config >> 32);
318         write_pmlca(i, event->hw.config_base);
319
320         ret = 0;
321  out:
322         put_cpu_var(cpu_hw_events);
323         perf_pmu_enable(event->pmu);
324         return ret;
325 }
326
327 /* context locked on entry */
328 static void fsl_emb_pmu_del(struct perf_event *event, int flags)
329 {
330         struct cpu_hw_events *cpuhw;
331         int i = event->hw.idx;
332
333         perf_pmu_disable(event->pmu);
334         if (i < 0)
335                 goto out;
336
337         fsl_emb_pmu_read(event);
338
339         cpuhw = &get_cpu_var(cpu_hw_events);
340
341         WARN_ON(event != cpuhw->event[event->hw.idx]);
342
343         write_pmlca(i, 0);
344         write_pmlcb(i, 0);
345         write_pmc(i, 0);
346
347         cpuhw->event[i] = NULL;
348         event->hw.idx = -1;
349
350         /*
351          * TODO: if at least one restricted event exists, and we
352          * just freed up a non-restricted-capable counter, and
353          * there is a restricted-capable counter occupied by
354          * a non-restricted event, migrate that event to the
355          * vacated counter.
356          */
357
358         cpuhw->n_events--;
359
360  out:
361         perf_pmu_enable(event->pmu);
362         put_cpu_var(cpu_hw_events);
363 }
364
365 static void fsl_emb_pmu_start(struct perf_event *event, int ef_flags)
366 {
367         unsigned long flags;
368         s64 left;
369
370         if (event->hw.idx < 0 || !event->hw.sample_period)
371                 return;
372
373         if (!(event->hw.state & PERF_HES_STOPPED))
374                 return;
375
376         if (ef_flags & PERF_EF_RELOAD)
377                 WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
378
379         local_irq_save(flags);
380         perf_pmu_disable(event->pmu);
381
382         event->hw.state = 0;
383         left = local64_read(&event->hw.period_left);
384         write_pmc(event->hw.idx, left);
385
386         perf_event_update_userpage(event);
387         perf_pmu_enable(event->pmu);
388         local_irq_restore(flags);
389 }
390
391 static void fsl_emb_pmu_stop(struct perf_event *event, int ef_flags)
392 {
393         unsigned long flags;
394
395         if (event->hw.idx < 0 || !event->hw.sample_period)
396                 return;
397
398         if (event->hw.state & PERF_HES_STOPPED)
399                 return;
400
401         local_irq_save(flags);
402         perf_pmu_disable(event->pmu);
403
404         fsl_emb_pmu_read(event);
405         event->hw.state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
406         write_pmc(event->hw.idx, 0);
407
408         perf_event_update_userpage(event);
409         perf_pmu_enable(event->pmu);
410         local_irq_restore(flags);
411 }
412
413 /*
414  * Release the PMU if this is the last perf_event.
415  */
416 static void hw_perf_event_destroy(struct perf_event *event)
417 {
418         if (!atomic_add_unless(&num_events, -1, 1)) {
419                 mutex_lock(&pmc_reserve_mutex);
420                 if (atomic_dec_return(&num_events) == 0)
421                         release_pmc_hardware();
422                 mutex_unlock(&pmc_reserve_mutex);
423         }
424 }
425
426 /*
427  * Translate a generic cache event_id config to a raw event_id code.
428  */
429 static int hw_perf_cache_event(u64 config, u64 *eventp)
430 {
431         unsigned long type, op, result;
432         int ev;
433
434         if (!ppmu->cache_events)
435                 return -EINVAL;
436
437         /* unpack config */
438         type = config & 0xff;
439         op = (config >> 8) & 0xff;
440         result = (config >> 16) & 0xff;
441
442         if (type >= PERF_COUNT_HW_CACHE_MAX ||
443             op >= PERF_COUNT_HW_CACHE_OP_MAX ||
444             result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
445                 return -EINVAL;
446
447         ev = (*ppmu->cache_events)[type][op][result];
448         if (ev == 0)
449                 return -EOPNOTSUPP;
450         if (ev == -1)
451                 return -EINVAL;
452         *eventp = ev;
453         return 0;
454 }
455
456 static int fsl_emb_pmu_event_init(struct perf_event *event)
457 {
458         u64 ev;
459         struct perf_event *events[MAX_HWEVENTS];
460         int n;
461         int err;
462         int num_restricted;
463         int i;
464
465         switch (event->attr.type) {
466         case PERF_TYPE_HARDWARE:
467                 ev = event->attr.config;
468                 if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0)
469                         return -EOPNOTSUPP;
470                 ev = ppmu->generic_events[ev];
471                 break;
472
473         case PERF_TYPE_HW_CACHE:
474                 err = hw_perf_cache_event(event->attr.config, &ev);
475                 if (err)
476                         return err;
477                 break;
478
479         case PERF_TYPE_RAW:
480                 ev = event->attr.config;
481                 break;
482
483         default:
484                 return -ENOENT;
485         }
486
487         event->hw.config = ppmu->xlate_event(ev);
488         if (!(event->hw.config & FSL_EMB_EVENT_VALID))
489                 return -EINVAL;
490
491         /*
492          * If this is in a group, check if it can go on with all the
493          * other hardware events in the group.  We assume the event
494          * hasn't been linked into its leader's sibling list at this point.
495          */
496         n = 0;
497         if (event->group_leader != event) {
498                 n = collect_events(event->group_leader,
499                                    ppmu->n_counter - 1, events);
500                 if (n < 0)
501                         return -EINVAL;
502         }
503
504         if (event->hw.config & FSL_EMB_EVENT_RESTRICTED) {
505                 num_restricted = 0;
506                 for (i = 0; i < n; i++) {
507                         if (events[i]->hw.config & FSL_EMB_EVENT_RESTRICTED)
508                                 num_restricted++;
509                 }
510
511                 if (num_restricted >= ppmu->n_restricted)
512                         return -EINVAL;
513         }
514
515         event->hw.idx = -1;
516
517         event->hw.config_base = PMLCA_CE | PMLCA_FCM1 |
518                                 (u32)((ev << 16) & PMLCA_EVENT_MASK);
519
520         if (event->attr.exclude_user)
521                 event->hw.config_base |= PMLCA_FCU;
522         if (event->attr.exclude_kernel)
523                 event->hw.config_base |= PMLCA_FCS;
524         if (event->attr.exclude_idle)
525                 return -ENOTSUPP;
526
527         event->hw.last_period = event->hw.sample_period;
528         local64_set(&event->hw.period_left, event->hw.last_period);
529
530         /*
531          * See if we need to reserve the PMU.
532          * If no events are currently in use, then we have to take a
533          * mutex to ensure that we don't race with another task doing
534          * reserve_pmc_hardware or release_pmc_hardware.
535          */
536         err = 0;
537         if (!atomic_inc_not_zero(&num_events)) {
538                 mutex_lock(&pmc_reserve_mutex);
539                 if (atomic_read(&num_events) == 0 &&
540                     reserve_pmc_hardware(perf_event_interrupt))
541                         err = -EBUSY;
542                 else
543                         atomic_inc(&num_events);
544                 mutex_unlock(&pmc_reserve_mutex);
545
546                 mtpmr(PMRN_PMGC0, PMGC0_FAC);
547                 isync();
548         }
549         event->destroy = hw_perf_event_destroy;
550
551         return err;
552 }
553
554 static struct pmu fsl_emb_pmu = {
555         .pmu_enable     = fsl_emb_pmu_enable,
556         .pmu_disable    = fsl_emb_pmu_disable,
557         .event_init     = fsl_emb_pmu_event_init,
558         .add            = fsl_emb_pmu_add,
559         .del            = fsl_emb_pmu_del,
560         .start          = fsl_emb_pmu_start,
561         .stop           = fsl_emb_pmu_stop,
562         .read           = fsl_emb_pmu_read,
563 };
564
565 /*
566  * A counter has overflowed; update its count and record
567  * things if requested.  Note that interrupts are hard-disabled
568  * here so there is no possibility of being interrupted.
569  */
570 static void record_and_restart(struct perf_event *event, unsigned long val,
571                                struct pt_regs *regs, int nmi)
572 {
573         u64 period = event->hw.sample_period;
574         s64 prev, delta, left;
575         int record = 0;
576
577         if (event->hw.state & PERF_HES_STOPPED) {
578                 write_pmc(event->hw.idx, 0);
579                 return;
580         }
581
582         /* we don't have to worry about interrupts here */
583         prev = local64_read(&event->hw.prev_count);
584         delta = (val - prev) & 0xfffffffful;
585         local64_add(delta, &event->count);
586
587         /*
588          * See if the total period for this event has expired,
589          * and update for the next period.
590          */
591         val = 0;
592         left = local64_read(&event->hw.period_left) - delta;
593         if (period) {
594                 if (left <= 0) {
595                         left += period;
596                         if (left <= 0)
597                                 left = period;
598                         record = 1;
599                 }
600                 if (left < 0x80000000LL)
601                         val = 0x80000000LL - left;
602         }
603
604         write_pmc(event->hw.idx, val);
605         local64_set(&event->hw.prev_count, val);
606         local64_set(&event->hw.period_left, left);
607         perf_event_update_userpage(event);
608
609         /*
610          * Finally record data if requested.
611          */
612         if (record) {
613                 struct perf_sample_data data;
614
615                 perf_sample_data_init(&data, 0);
616                 data.period = event->hw.last_period;
617
618                 if (perf_event_overflow(event, nmi, &data, regs))
619                         fsl_emb_pmu_stop(event, 0);
620         }
621 }
622
623 static void perf_event_interrupt(struct pt_regs *regs)
624 {
625         int i;
626         struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
627         struct perf_event *event;
628         unsigned long val;
629         int found = 0;
630         int nmi;
631
632         nmi = perf_intr_is_nmi(regs);
633         if (nmi)
634                 nmi_enter();
635         else
636                 irq_enter();
637
638         for (i = 0; i < ppmu->n_counter; ++i) {
639                 event = cpuhw->event[i];
640
641                 val = read_pmc(i);
642                 if ((int)val < 0) {
643                         if (event) {
644                                 /* event has overflowed */
645                                 found = 1;
646                                 record_and_restart(event, val, regs, nmi);
647                         } else {
648                                 /*
649                                  * Disabled counter is negative,
650                                  * reset it just in case.
651                                  */
652                                 write_pmc(i, 0);
653                         }
654                 }
655         }
656
657         /* PMM will keep counters frozen until we return from the interrupt. */
658         mtmsr(mfmsr() | MSR_PMM);
659         mtpmr(PMRN_PMGC0, PMGC0_PMIE | PMGC0_FCECE);
660         isync();
661
662         if (nmi)
663                 nmi_exit();
664         else
665                 irq_exit();
666 }
667
668 void hw_perf_event_setup(int cpu)
669 {
670         struct cpu_hw_events *cpuhw = &per_cpu(cpu_hw_events, cpu);
671
672         memset(cpuhw, 0, sizeof(*cpuhw));
673 }
674
675 int register_fsl_emb_pmu(struct fsl_emb_pmu *pmu)
676 {
677         if (ppmu)
678                 return -EBUSY;          /* something's already registered */
679
680         ppmu = pmu;
681         pr_info("%s performance monitor hardware support registered\n",
682                 pmu->name);
683
684         perf_pmu_register(&fsl_emb_pmu);
685
686         return 0;
687 }