sparc64: Add perf callchain support.
[linux-2.6.git] / arch / sparc / kernel / perf_event.c
index fa5936e..2386ac6 100644 (file)
@@ -1,6 +1,6 @@
 /* Performance event support for sparc64.
  *
- * Copyright (C) 2009 David S. Miller <davem@davemloft.net>
+ * Copyright (C) 2009, 2010 David S. Miller <davem@davemloft.net>
  *
  * This code is based almost entirely upon the x86 perf event
  * code, which is:
 #include <linux/kdebug.h>
 #include <linux/mutex.h>
 
+#include <asm/stacktrace.h>
 #include <asm/cpudata.h>
+#include <asm/uaccess.h>
 #include <asm/atomic.h>
 #include <asm/nmi.h>
 #include <asm/pcr.h>
 
+#include "kstack.h"
+
 /* Sparc64 chips have two performance counters, 32-bits each, with
  * overflow interrupts generated on transition from 0xffffffff to 0.
  * The counters are accessed in one go using a 64-bit register.
@@ -986,6 +990,17 @@ static int __kprobes perf_event_nmi_handler(struct notifier_block *self,
        data.addr = 0;
 
        cpuc = &__get_cpu_var(cpu_hw_events);
+
+       /* If the PMU has the TOE IRQ enable bits, we need to do a
+        * dummy write to the %pcr to clear the overflow bits and thus
+        * the interrupt.
+        *
+        * Do this before we peek at the counters to determine
+        * overflow so we don't lose any events.
+        */
+       if (sparc_pmu->irq_bit)
+               pcr_ops->write(cpuc->pcr);
+
        for (idx = 0; idx < MAX_HWEVENTS; idx++) {
                struct perf_event *event = cpuc->events[idx];
                struct hw_perf_event *hwc;
@@ -1051,3 +1066,117 @@ void __init init_hw_perf_events(void)
 
        register_die_notifier(&perf_event_nmi_notifier);
 }
+
+static inline void callchain_store(struct perf_callchain_entry *entry, u64 ip)
+{
+       if (entry->nr < PERF_MAX_STACK_DEPTH)
+               entry->ip[entry->nr++] = ip;
+}
+
+static void perf_callchain_kernel(struct pt_regs *regs,
+                                 struct perf_callchain_entry *entry)
+{
+       unsigned long ksp, fp;
+
+       callchain_store(entry, PERF_CONTEXT_KERNEL);
+       callchain_store(entry, regs->tpc);
+
+       ksp = regs->u_regs[UREG_I6];
+       fp = ksp + STACK_BIAS;
+       do {
+               struct sparc_stackf *sf;
+               struct pt_regs *regs;
+               unsigned long pc;
+
+               if (!kstack_valid(current_thread_info(), fp))
+                       break;
+
+               sf = (struct sparc_stackf *) fp;
+               regs = (struct pt_regs *) (sf + 1);
+
+               if (kstack_is_trap_frame(current_thread_info(), regs)) {
+                       if (user_mode(regs))
+                               break;
+                       pc = regs->tpc;
+                       fp = regs->u_regs[UREG_I6] + STACK_BIAS;
+               } else {
+                       pc = sf->callers_pc;
+                       fp = (unsigned long)sf->fp + STACK_BIAS;
+               }
+               callchain_store(entry, pc);
+       } while (entry->nr < PERF_MAX_STACK_DEPTH);
+}
+
+static void perf_callchain_user_64(struct pt_regs *regs,
+                                  struct perf_callchain_entry *entry)
+{
+       unsigned long ufp;
+
+       callchain_store(entry, PERF_CONTEXT_USER);
+       callchain_store(entry, regs->tpc);
+
+       ufp = regs->u_regs[UREG_I6] + STACK_BIAS;
+       do {
+               struct sparc_stackf *usf, sf;
+               unsigned long pc;
+
+               usf = (struct sparc_stackf *) ufp;
+               if (__copy_from_user_inatomic(&sf, usf, sizeof(sf)))
+                       break;
+
+               pc = sf.callers_pc;
+               ufp = (unsigned long)sf.fp + STACK_BIAS;
+               callchain_store(entry, pc);
+       } while (entry->nr < PERF_MAX_STACK_DEPTH);
+}
+
+static void perf_callchain_user_32(struct pt_regs *regs,
+                                  struct perf_callchain_entry *entry)
+{
+       unsigned long ufp;
+
+       callchain_store(entry, PERF_CONTEXT_USER);
+       callchain_store(entry, regs->tpc);
+
+       ufp = regs->u_regs[UREG_I6];
+       do {
+               struct sparc_stackf32 *usf, sf;
+               unsigned long pc;
+
+               usf = (struct sparc_stackf32 *) ufp;
+               if (__copy_from_user_inatomic(&sf, usf, sizeof(sf)))
+                       break;
+
+               pc = sf.callers_pc;
+               ufp = (unsigned long)sf.fp;
+               callchain_store(entry, pc);
+       } while (entry->nr < PERF_MAX_STACK_DEPTH);
+}
+
+/* Like powerpc we can't get PMU interrupts within the PMU handler,
+ * so no need for seperate NMI and IRQ chains as on x86.
+ */
+static DEFINE_PER_CPU(struct perf_callchain_entry, callchain);
+
+struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
+{
+       struct perf_callchain_entry *entry = &__get_cpu_var(callchain);
+
+       entry->nr = 0;
+       if (!user_mode(regs)) {
+               stack_trace_flush();
+               perf_callchain_kernel(regs, entry);
+               if (current->mm)
+                       regs = task_pt_regs(current);
+               else
+                       regs = NULL;
+       }
+       if (regs) {
+               flushw_user();
+               if (test_thread_flag(TIF_32BIT))
+                       perf_callchain_user_32(regs, entry);
+               else
+                       perf_callchain_user_64(regs, entry);
+       }
+       return entry;
+}