perf: Correctly align perf event tracing buffer
[linux-2.6.git] / kernel / trace / trace_event_perf.c
1 /*
2  * trace event based perf event profiling/tracing
3  *
4  * Copyright (C) 2009 Red Hat Inc, Peter Zijlstra <pzijlstr@redhat.com>
5  * Copyright (C) 2009-2010 Frederic Weisbecker <fweisbec@gmail.com>
6  */
7
8 #include <linux/module.h>
9 #include <linux/kprobes.h>
10 #include "trace.h"
11
12 DEFINE_PER_CPU(struct pt_regs, perf_trace_regs);
13 EXPORT_PER_CPU_SYMBOL_GPL(perf_trace_regs);
14
15 EXPORT_SYMBOL_GPL(perf_arch_fetch_caller_regs);
16
17 static char *perf_trace_buf;
18 static char *perf_trace_buf_nmi;
19
20 /*
21  * Force it to be aligned to unsigned long to avoid misaligned accesses
22  * suprises
23  */
24 typedef typeof(unsigned long [PERF_MAX_TRACE_SIZE / sizeof(unsigned long)])
25         perf_trace_t;
26
27 /* Count the events in use (per event id, not per instance) */
28 static int      total_ref_count;
29
30 static int perf_trace_event_enable(struct ftrace_event_call *event)
31 {
32         char *buf;
33         int ret = -ENOMEM;
34
35         if (event->perf_refcount++ > 0)
36                 return 0;
37
38         if (!total_ref_count) {
39                 buf = (char *)alloc_percpu(perf_trace_t);
40                 if (!buf)
41                         goto fail_buf;
42
43                 rcu_assign_pointer(perf_trace_buf, buf);
44
45                 buf = (char *)alloc_percpu(perf_trace_t);
46                 if (!buf)
47                         goto fail_buf_nmi;
48
49                 rcu_assign_pointer(perf_trace_buf_nmi, buf);
50         }
51
52         ret = event->perf_event_enable(event);
53         if (!ret) {
54                 total_ref_count++;
55                 return 0;
56         }
57
58 fail_buf_nmi:
59         if (!total_ref_count) {
60                 free_percpu(perf_trace_buf_nmi);
61                 free_percpu(perf_trace_buf);
62                 perf_trace_buf_nmi = NULL;
63                 perf_trace_buf = NULL;
64         }
65 fail_buf:
66         event->perf_refcount--;
67
68         return ret;
69 }
70
71 int perf_trace_enable(int event_id)
72 {
73         struct ftrace_event_call *event;
74         int ret = -EINVAL;
75
76         mutex_lock(&event_mutex);
77         list_for_each_entry(event, &ftrace_events, list) {
78                 if (event->id == event_id && event->perf_event_enable &&
79                     try_module_get(event->mod)) {
80                         ret = perf_trace_event_enable(event);
81                         break;
82                 }
83         }
84         mutex_unlock(&event_mutex);
85
86         return ret;
87 }
88
89 static void perf_trace_event_disable(struct ftrace_event_call *event)
90 {
91         char *buf, *nmi_buf;
92
93         if (--event->perf_refcount > 0)
94                 return;
95
96         event->perf_event_disable(event);
97
98         if (!--total_ref_count) {
99                 buf = perf_trace_buf;
100                 rcu_assign_pointer(perf_trace_buf, NULL);
101
102                 nmi_buf = perf_trace_buf_nmi;
103                 rcu_assign_pointer(perf_trace_buf_nmi, NULL);
104
105                 /*
106                  * Ensure every events in profiling have finished before
107                  * releasing the buffers
108                  */
109                 synchronize_sched();
110
111                 free_percpu(buf);
112                 free_percpu(nmi_buf);
113         }
114 }
115
116 void perf_trace_disable(int event_id)
117 {
118         struct ftrace_event_call *event;
119
120         mutex_lock(&event_mutex);
121         list_for_each_entry(event, &ftrace_events, list) {
122                 if (event->id == event_id) {
123                         perf_trace_event_disable(event);
124                         module_put(event->mod);
125                         break;
126                 }
127         }
128         mutex_unlock(&event_mutex);
129 }
130
131 __kprobes void *perf_trace_buf_prepare(int size, unsigned short type,
132                                        int *rctxp, unsigned long *irq_flags)
133 {
134         struct trace_entry *entry;
135         char *trace_buf, *raw_data;
136         int pc, cpu;
137
138         BUILD_BUG_ON(PERF_MAX_TRACE_SIZE % sizeof(unsigned long));
139
140         pc = preempt_count();
141
142         /* Protect the per cpu buffer, begin the rcu read side */
143         local_irq_save(*irq_flags);
144
145         *rctxp = perf_swevent_get_recursion_context();
146         if (*rctxp < 0)
147                 goto err_recursion;
148
149         cpu = smp_processor_id();
150
151         if (in_nmi())
152                 trace_buf = rcu_dereference_sched(perf_trace_buf_nmi);
153         else
154                 trace_buf = rcu_dereference_sched(perf_trace_buf);
155
156         if (!trace_buf)
157                 goto err;
158
159         raw_data = per_cpu_ptr(trace_buf, cpu);
160
161         /* zero the dead bytes from align to not leak stack to user */
162         memset(&raw_data[size - sizeof(u64)], 0, sizeof(u64));
163
164         entry = (struct trace_entry *)raw_data;
165         tracing_generic_entry_update(entry, *irq_flags, pc);
166         entry->type = type;
167
168         return raw_data;
169 err:
170         perf_swevent_put_recursion_context(*rctxp);
171 err_recursion:
172         local_irq_restore(*irq_flags);
173         return NULL;
174 }
175 EXPORT_SYMBOL_GPL(perf_trace_buf_prepare);