/* Performance event support for sparc64.
*
- * Copyright (C) 2009 David S. Miller <davem@davemloft.net>
+ * Copyright (C) 2009, 2010 David S. Miller <davem@davemloft.net>
*
* This code is based almost entirely upon the x86 perf event
* code, which is:
#include <linux/kdebug.h>
#include <linux/mutex.h>
+#include <asm/stacktrace.h>
#include <asm/cpudata.h>
+#include <asm/uaccess.h>
#include <asm/atomic.h>
#include <asm/nmi.h>
#include <asm/pcr.h>
+#include "kstack.h"
+
/* Sparc64 chips have two performance counters, 32-bits each, with
* overflow interrupts generated on transition from 0xffffffff to 0.
* The counters are accessed in one go using a 64-bit register.
struct perf_event *events[MAX_HWEVENTS];
unsigned long used_mask[BITS_TO_LONGS(MAX_HWEVENTS)];
unsigned long active_mask[BITS_TO_LONGS(MAX_HWEVENTS)];
- int enabled;
+ u64 pcr;
+ int enabled;
};
DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { .enabled = 1, };
.lower_shift = 6,
.event_mask = 0xfff,
.hv_bit = 0x8,
- .irq_bit = 0x03,
+ .irq_bit = 0x30,
.upper_nop = 0x220,
.lower_nop = 0x220,
};
sparc_pmu->lower_nop, idx);
}
-static inline void sparc_pmu_enable_event(struct hw_perf_event *hwc, int idx)
+static inline void sparc_pmu_enable_event(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc, int idx)
{
u64 val, mask = mask_for_index(idx);
- val = pcr_ops->read();
- pcr_ops->write((val & ~mask) | hwc->config);
+ val = cpuc->pcr;
+ val &= ~mask;
+ val |= hwc->config;
+ cpuc->pcr = val;
+
+ pcr_ops->write(cpuc->pcr);
}
-static inline void sparc_pmu_disable_event(struct hw_perf_event *hwc, int idx)
+static inline void sparc_pmu_disable_event(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc, int idx)
{
u64 mask = mask_for_index(idx);
u64 nop = nop_for_index(idx);
- u64 val = pcr_ops->read();
+ u64 val;
- pcr_ops->write((val & ~mask) | nop);
+ val = cpuc->pcr;
+ val &= ~mask;
+ val |= nop;
+ cpuc->pcr = val;
+
+ pcr_ops->write(cpuc->pcr);
}
void hw_perf_enable(void)
cpuc->enabled = 1;
barrier();
- val = pcr_ops->read();
+ val = cpuc->pcr;
for (i = 0; i < MAX_HWEVENTS; i++) {
struct perf_event *cp = cpuc->events[i];
val |= hwc->config_base;
}
- pcr_ops->write(val);
+ cpuc->pcr = val;
+
+ pcr_ops->write(cpuc->pcr);
}
void hw_perf_disable(void)
cpuc->enabled = 0;
- val = pcr_ops->read();
+ val = cpuc->pcr;
val &= ~(PCR_UTRACE | PCR_STRACE |
sparc_pmu->hv_bit | sparc_pmu->irq_bit);
- pcr_ops->write(val);
+ cpuc->pcr = val;
+
+ pcr_ops->write(cpuc->pcr);
}
static u32 read_pmc(int idx)
if (test_and_set_bit(idx, cpuc->used_mask))
return -EAGAIN;
- sparc_pmu_disable_event(hwc, idx);
+ sparc_pmu_disable_event(cpuc, hwc, idx);
cpuc->events[idx] = event;
set_bit(idx, cpuc->active_mask);
sparc_perf_event_set_period(event, hwc, idx);
- sparc_pmu_enable_event(hwc, idx);
+ sparc_pmu_enable_event(cpuc, hwc, idx);
perf_event_update_userpage(event);
return 0;
}
int idx = hwc->idx;
clear_bit(idx, cpuc->active_mask);
- sparc_pmu_disable_event(hwc, idx);
+ sparc_pmu_disable_event(cpuc, hwc, idx);
barrier();
static void sparc_pmu_read(struct perf_event *event)
{
struct hw_perf_event *hwc = &event->hw;
+
sparc_perf_event_update(event, hwc, hwc->idx);
}
static void sparc_pmu_unthrottle(struct perf_event *event)
{
+ struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
struct hw_perf_event *hwc = &event->hw;
- sparc_pmu_enable_event(hwc, hwc->idx);
+
+ sparc_pmu_enable_event(cpuc, hwc, hwc->idx);
}
static atomic_t active_events = ATOMIC_INIT(0);
static DEFINE_MUTEX(pmc_grab_mutex);
+static void perf_stop_nmi_watchdog(void *unused)
+{
+ struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+
+ stop_nmi_watchdog(NULL);
+ cpuc->pcr = pcr_ops->read();
+}
+
void perf_event_grab_pmc(void)
{
if (atomic_inc_not_zero(&active_events))
mutex_lock(&pmc_grab_mutex);
if (atomic_read(&active_events) == 0) {
if (atomic_read(&nmi_active) > 0) {
- on_each_cpu(stop_nmi_watchdog, NULL, 1);
+ on_each_cpu(perf_stop_nmi_watchdog, NULL, 1);
BUG_ON(atomic_read(&nmi_active) != 0);
}
atomic_inc(&active_events);
data.addr = 0;
cpuc = &__get_cpu_var(cpu_hw_events);
+
+ /* If the PMU has the TOE IRQ enable bits, we need to do a
+ * dummy write to the %pcr to clear the overflow bits and thus
+ * the interrupt.
+ *
+ * Do this before we peek at the counters to determine
+ * overflow so we don't lose any events.
+ */
+ if (sparc_pmu->irq_bit)
+ pcr_ops->write(cpuc->pcr);
+
for (idx = 0; idx < MAX_HWEVENTS; idx++) {
struct perf_event *event = cpuc->events[idx];
struct hw_perf_event *hwc;
continue;
if (perf_event_overflow(event, 1, &data, regs))
- sparc_pmu_disable_event(hwc, idx);
+ sparc_pmu_disable_event(cpuc, hwc, idx);
}
return NOTIFY_STOP;
register_die_notifier(&perf_event_nmi_notifier);
}
+
+static inline void callchain_store(struct perf_callchain_entry *entry, u64 ip)
+{
+ if (entry->nr < PERF_MAX_STACK_DEPTH)
+ entry->ip[entry->nr++] = ip;
+}
+
+static void perf_callchain_kernel(struct pt_regs *regs,
+ struct perf_callchain_entry *entry)
+{
+ unsigned long ksp, fp;
+
+ callchain_store(entry, PERF_CONTEXT_KERNEL);
+ callchain_store(entry, regs->tpc);
+
+ ksp = regs->u_regs[UREG_I6];
+ fp = ksp + STACK_BIAS;
+ do {
+ struct sparc_stackf *sf;
+ struct pt_regs *regs;
+ unsigned long pc;
+
+ if (!kstack_valid(current_thread_info(), fp))
+ break;
+
+ sf = (struct sparc_stackf *) fp;
+ regs = (struct pt_regs *) (sf + 1);
+
+ if (kstack_is_trap_frame(current_thread_info(), regs)) {
+ if (user_mode(regs))
+ break;
+ pc = regs->tpc;
+ fp = regs->u_regs[UREG_I6] + STACK_BIAS;
+ } else {
+ pc = sf->callers_pc;
+ fp = (unsigned long)sf->fp + STACK_BIAS;
+ }
+ callchain_store(entry, pc);
+ } while (entry->nr < PERF_MAX_STACK_DEPTH);
+}
+
+static void perf_callchain_user_64(struct pt_regs *regs,
+ struct perf_callchain_entry *entry)
+{
+ unsigned long ufp;
+
+ callchain_store(entry, PERF_CONTEXT_USER);
+ callchain_store(entry, regs->tpc);
+
+ ufp = regs->u_regs[UREG_I6] + STACK_BIAS;
+ do {
+ struct sparc_stackf *usf, sf;
+ unsigned long pc;
+
+ usf = (struct sparc_stackf *) ufp;
+ if (__copy_from_user_inatomic(&sf, usf, sizeof(sf)))
+ break;
+
+ pc = sf.callers_pc;
+ ufp = (unsigned long)sf.fp + STACK_BIAS;
+ callchain_store(entry, pc);
+ } while (entry->nr < PERF_MAX_STACK_DEPTH);
+}
+
+static void perf_callchain_user_32(struct pt_regs *regs,
+ struct perf_callchain_entry *entry)
+{
+ unsigned long ufp;
+
+ callchain_store(entry, PERF_CONTEXT_USER);
+ callchain_store(entry, regs->tpc);
+
+ ufp = regs->u_regs[UREG_I6];
+ do {
+ struct sparc_stackf32 *usf, sf;
+ unsigned long pc;
+
+ usf = (struct sparc_stackf32 *) ufp;
+ if (__copy_from_user_inatomic(&sf, usf, sizeof(sf)))
+ break;
+
+ pc = sf.callers_pc;
+ ufp = (unsigned long)sf.fp;
+ callchain_store(entry, pc);
+ } while (entry->nr < PERF_MAX_STACK_DEPTH);
+}
+
+/* Like powerpc we can't get PMU interrupts within the PMU handler,
+ * so no need for seperate NMI and IRQ chains as on x86.
+ */
+static DEFINE_PER_CPU(struct perf_callchain_entry, callchain);
+
+struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
+{
+ struct perf_callchain_entry *entry = &__get_cpu_var(callchain);
+
+ entry->nr = 0;
+ if (!user_mode(regs)) {
+ stack_trace_flush();
+ perf_callchain_kernel(regs, entry);
+ if (current->mm)
+ regs = task_pt_regs(current);
+ else
+ regs = NULL;
+ }
+ if (regs) {
+ flushw_user();
+ if (test_thread_flag(TIF_32BIT))
+ perf_callchain_user_32(regs, entry);
+ else
+ perf_callchain_user_64(regs, entry);
+ }
+ return entry;
+}