2 * drivers/misc/tegra-profiler/hrt.c
4 * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved.
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
17 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
19 #include <linux/module.h>
20 #include <linux/kallsyms.h>
21 #include <linux/sched.h>
22 #include <linux/hrtimer.h>
23 #include <linux/slab.h>
24 #include <linux/cpu.h>
25 #include <linux/ratelimit.h>
26 #include <linux/ptrace.h>
27 #include <linux/interrupt.h>
28 #include <linux/err.h>
30 #include <asm/cputype.h>
31 #include <asm/irq_regs.h>
33 #include <linux/tegra_profiler.h>
40 #include "power_clk.h"
44 static struct quadd_hrt_ctx hrt;
47 read_all_sources(struct pt_regs *regs, struct task_struct *task);
49 struct hrt_event_value {
54 static enum hrtimer_restart hrtimer_handler(struct hrtimer *hrtimer)
58 regs = get_irq_regs();
61 return HRTIMER_NORESTART;
63 qm_debug_handler_sample(regs);
66 read_all_sources(regs, NULL);
68 hrtimer_forward_now(hrtimer, ns_to_ktime(hrt.sample_period));
69 qm_debug_timer_forward(regs, hrt.sample_period);
71 return HRTIMER_RESTART;
74 static void start_hrtimer(struct quadd_cpu_context *cpu_ctx)
76 u64 period = hrt.sample_period;
78 hrtimer_start(&cpu_ctx->hrtimer, ns_to_ktime(period),
79 HRTIMER_MODE_REL_PINNED);
80 qm_debug_timer_start(NULL, period);
83 static void cancel_hrtimer(struct quadd_cpu_context *cpu_ctx)
85 hrtimer_cancel(&cpu_ctx->hrtimer);
86 qm_debug_timer_cancel();
89 static void init_hrtimer(struct quadd_cpu_context *cpu_ctx)
91 hrtimer_init(&cpu_ctx->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
92 cpu_ctx->hrtimer.function = hrtimer_handler;
95 u64 quadd_get_time(void)
99 do_posix_clock_monotonic_gettime(&ts);
100 return timespec_to_ns(&ts);
103 static void put_header(void)
105 int nr_events = 0, max_events = QUADD_MAX_COUNTERS;
106 unsigned int events[QUADD_MAX_COUNTERS];
107 struct quadd_record_data record;
108 struct quadd_header_data *hdr = &record.hdr;
109 struct quadd_parameters *param = &hrt.quadd_ctx->param;
110 unsigned int extra = param->reserved[QUADD_PARAM_IDX_EXTRA];
111 struct quadd_iovec vec;
112 struct quadd_ctx *ctx = hrt.quadd_ctx;
113 struct quadd_event_source_interface *pmu = ctx->pmu;
114 struct quadd_event_source_interface *pl310 = ctx->pl310;
116 record.magic = QUADD_RECORD_MAGIC;
117 record.record_type = QUADD_RECORD_TYPE_HEADER;
119 hdr->magic = QUADD_HEADER_MAGIC;
120 hdr->version = QUADD_SAMPLES_VERSION;
122 hdr->backtrace = param->backtrace;
123 hdr->use_freq = param->use_freq;
124 hdr->system_wide = param->system_wide;
126 /* TODO: dynamically */
127 #ifdef QM_DEBUG_SAMPLES_ENABLE
128 hdr->debug_samples = 1;
130 hdr->debug_samples = 0;
133 hdr->freq = param->freq;
134 hdr->ma_freq = param->ma_freq;
135 hdr->power_rate_freq = param->power_rate_freq;
137 hdr->power_rate = hdr->power_rate_freq > 0 ? 1 : 0;
138 hdr->get_mmap = (extra & QUADD_PARAM_IDX_EXTRA_GET_MMAP) ? 1 : 0;
141 hdr->extra_length = 0;
144 nr_events += pmu->get_current_events(events, max_events);
147 nr_events += pl310->get_current_events(events + nr_events,
148 max_events - nr_events);
150 hdr->nr_events = nr_events;
153 vec.len = nr_events * sizeof(events[0]);
155 quadd_put_sample(&record, &vec, 1);
158 void quadd_put_sample(struct quadd_record_data *data,
159 struct quadd_iovec *vec, int vec_count)
161 struct quadd_comm_data_interface *comm = hrt.quadd_ctx->comm;
163 comm->put_sample(data, vec, vec_count);
164 atomic64_inc(&hrt.counter_samples);
167 static int get_sample_data(struct quadd_sample_data *sample,
168 struct pt_regs *regs,
169 struct task_struct *task)
171 unsigned int cpu, flags;
172 struct quadd_ctx *quadd_ctx = hrt.quadd_ctx;
174 cpu = quadd_get_processor_id(regs, &flags);
178 (flags & QUADD_CPUMODE_TEGRA_POWER_CLUSTER_LP) ? 1 : 0;
179 sample->thumb_mode = (flags & QUADD_CPUMODE_THUMB) ? 1 : 0;
180 sample->user_mode = user_mode(regs) ? 1 : 0;
182 sample->ip = instruction_pointer(regs);
184 /* For security reasons, hide IPs from the kernel space. */
185 if (!sample->user_mode && !quadd_ctx->collect_kernel_ips)
188 sample->ip = instruction_pointer(regs);
190 sample->time = quadd_get_time();
191 sample->reserved = 0;
192 sample->pid = task->pid;
193 sample->in_interrupt = in_interrupt() ? 1 : 0;
198 static int read_source(struct quadd_event_source_interface *source,
199 struct pt_regs *regs,
200 struct hrt_event_value *events_vals,
204 u32 prev_val, val, res_val;
205 struct event_data events[QUADD_MAX_COUNTERS];
210 max_events = min_t(int, max_events, QUADD_MAX_COUNTERS);
211 nr_events = source->read(events, max_events);
213 for (i = 0; i < nr_events; i++) {
214 struct event_data *s = &events[i];
216 prev_val = s->prev_val;
220 res_val = val - prev_val;
222 res_val = QUADD_U32_MAX - prev_val + val;
224 if (s->event_source == QUADD_EVENT_SOURCE_PL310) {
225 int nr_active = atomic_read(&hrt.nr_active_all_core);
227 res_val = res_val / nr_active;
230 events_vals[i].event_id = s->event_id;
231 events_vals[i].value = res_val;
238 read_all_sources(struct pt_regs *regs, struct task_struct *task)
241 int i, vec_idx = 0, bt_size = 0;
242 int nr_events = 0, nr_positive_events = 0;
243 struct pt_regs *user_regs;
244 struct quadd_iovec vec[3];
245 struct hrt_event_value events[QUADD_MAX_COUNTERS];
246 u32 events_extra[QUADD_MAX_COUNTERS];
248 struct quadd_record_data record_data;
249 struct quadd_sample_data *s = &record_data.sample;
251 struct quadd_ctx *ctx = hrt.quadd_ctx;
252 struct quadd_cpu_context *cpu_ctx = this_cpu_ptr(hrt.cpu_ctx);
253 struct quadd_callchain *cc_data = &cpu_ctx->callchain_data;
258 if (atomic_read(&cpu_ctx->nr_active) == 0)
264 struct quadd_thread_data *t_data;
266 t_data = &cpu_ctx->active_thread;
270 pid_s = find_vpid(pid);
272 task = pid_task(pid_s, PIDTYPE_PID);
278 if (ctx->pmu && ctx->pmu_info.active)
279 nr_events += read_source(ctx->pmu, regs,
280 events, QUADD_MAX_COUNTERS);
282 if (ctx->pl310 && ctx->pl310_info.active)
283 nr_events += read_source(ctx->pl310, regs,
285 QUADD_MAX_COUNTERS - nr_events);
293 user_regs = current_pt_regs();
295 if (get_sample_data(s, regs, task))
298 if (ctx->param.backtrace) {
299 bt_size = quadd_get_user_callchain(user_regs, cc_data);
301 vec[vec_idx].base = cc_data->callchain;
303 bt_size * sizeof(cc_data->callchain[0]);
307 s->callchain_nr = bt_size;
309 record_data.magic = QUADD_RECORD_MAGIC;
310 record_data.record_type = QUADD_RECORD_TYPE_SAMPLE;
313 for (i = 0; i < nr_events; i++) {
314 u32 value = events[i].value;
316 s->events_flags |= 1 << i;
317 events_extra[nr_positive_events++] = value;
321 if (nr_positive_events == 0)
324 vec[vec_idx].base = events_extra;
325 vec[vec_idx].len = nr_positive_events * sizeof(events_extra[0]);
331 vec[vec_idx].base = &state;
332 vec[vec_idx].len = sizeof(state);
338 quadd_put_sample(&record_data, vec, vec_idx);
342 is_profile_process(struct task_struct *task)
345 pid_t pid, profile_pid;
346 struct quadd_ctx *ctx = hrt.quadd_ctx;
353 for (i = 0; i < ctx->param.nr_pids; i++) {
354 profile_pid = ctx->param.pids[i];
355 if (profile_pid == pid)
362 add_active_thread(struct quadd_cpu_context *cpu_ctx, pid_t pid, pid_t tgid)
364 struct quadd_thread_data *t_data = &cpu_ctx->active_thread;
366 if (t_data->pid > 0 ||
367 atomic_read(&cpu_ctx->nr_active) > 0) {
368 pr_warn_once("Warning for thread: %d\n", (int)pid);
377 static int remove_active_thread(struct quadd_cpu_context *cpu_ctx, pid_t pid)
379 struct quadd_thread_data *t_data = &cpu_ctx->active_thread;
384 if (t_data->pid == pid) {
390 pr_warn_once("Warning for thread: %d\n", (int)pid);
394 void __quadd_task_sched_in(struct task_struct *prev,
395 struct task_struct *task)
397 struct quadd_cpu_context *cpu_ctx = this_cpu_ptr(hrt.cpu_ctx);
398 struct quadd_ctx *ctx = hrt.quadd_ctx;
399 struct event_data events[QUADD_MAX_COUNTERS];
400 /* static DEFINE_RATELIMIT_STATE(ratelimit_state, 5 * HZ, 2); */
402 if (likely(!hrt.active))
405 if (__ratelimit(&ratelimit_state))
406 pr_info("sch_in, cpu: %d, prev: %u (%u) \t--> curr: %u (%u)\n",
407 smp_processor_id(), (unsigned int)prev->pid,
408 (unsigned int)prev->tgid, (unsigned int)task->pid,
409 (unsigned int)task->tgid);
412 if (is_profile_process(task)) {
413 add_active_thread(cpu_ctx, task->pid, task->tgid);
414 atomic_inc(&cpu_ctx->nr_active);
416 if (atomic_read(&cpu_ctx->nr_active) == 1) {
421 ctx->pl310->read(events, 1);
423 start_hrtimer(cpu_ctx);
424 atomic_inc(&hrt.nr_active_all_core);
429 void __quadd_task_sched_out(struct task_struct *prev,
430 struct task_struct *next)
433 struct pt_regs *user_regs;
434 struct quadd_cpu_context *cpu_ctx = this_cpu_ptr(hrt.cpu_ctx);
435 struct quadd_ctx *ctx = hrt.quadd_ctx;
436 /* static DEFINE_RATELIMIT_STATE(ratelimit_state, 5 * HZ, 2); */
438 if (likely(!hrt.active))
441 if (__ratelimit(&ratelimit_state))
442 pr_info("sch_out: cpu: %d, prev: %u (%u) \t--> next: %u (%u)\n",
443 smp_processor_id(), (unsigned int)prev->pid,
444 (unsigned int)prev->tgid, (unsigned int)next->pid,
445 (unsigned int)next->tgid);
448 if (is_profile_process(prev)) {
449 user_regs = task_pt_regs(prev);
451 read_all_sources(user_regs, prev);
453 n = remove_active_thread(cpu_ctx, prev->pid);
454 atomic_sub(n, &cpu_ctx->nr_active);
456 if (atomic_read(&cpu_ctx->nr_active) == 0) {
457 cancel_hrtimer(cpu_ctx);
458 atomic_dec(&hrt.nr_active_all_core);
466 void __quadd_event_mmap(struct vm_area_struct *vma)
468 struct quadd_parameters *param;
470 if (likely(!hrt.active))
473 if (!is_profile_process(current))
476 param = &hrt.quadd_ctx->param;
477 quadd_process_mmap(vma, param->pids[0]);
479 EXPORT_SYMBOL(__quadd_event_mmap);
481 static void reset_cpu_ctx(void)
484 struct quadd_cpu_context *cpu_ctx;
485 struct quadd_thread_data *t_data;
487 for (cpu_id = 0; cpu_id < nr_cpu_ids; cpu_id++) {
488 cpu_ctx = per_cpu_ptr(hrt.cpu_ctx, cpu_id);
489 t_data = &cpu_ctx->active_thread;
491 atomic_set(&cpu_ctx->nr_active, 0);
498 int quadd_hrt_start(void)
504 struct quadd_ctx *ctx = hrt.quadd_ctx;
505 struct quadd_parameters *param = &ctx->param;
507 freq = ctx->param.freq;
508 freq = max_t(long, QUADD_HRT_MIN_FREQ, freq);
509 period = NSEC_PER_SEC / freq;
510 hrt.sample_period = period;
512 if (ctx->param.ma_freq > 0)
513 hrt.ma_period = MSEC_PER_SEC / ctx->param.ma_freq;
517 atomic64_set(&hrt.counter_samples, 0);
523 extra = param->reserved[QUADD_PARAM_IDX_EXTRA];
525 if (extra & QUADD_PARAM_IDX_EXTRA_GET_MMAP) {
526 err = quadd_get_current_mmap(param->pids[0]);
528 pr_err("error: quadd_get_current_mmap\n");
536 quadd_ma_start(&hrt);
540 pr_info("Start hrt: freq/period: %ld/%llu\n", freq, period);
544 void quadd_hrt_stop(void)
546 struct quadd_ctx *ctx = hrt.quadd_ctx;
548 pr_info("Stop hrt, number of samples: %llu\n",
549 atomic64_read(&hrt.counter_samples));
558 atomic64_set(&hrt.counter_samples, 0);
560 /* reset_cpu_ctx(); */
563 void quadd_hrt_deinit(void)
568 free_percpu(hrt.cpu_ctx);
571 void quadd_hrt_get_state(struct quadd_module_state *state)
573 state->nr_all_samples = atomic64_read(&hrt.counter_samples);
574 state->nr_skipped_samples = 0;
577 struct quadd_hrt_ctx *quadd_hrt_init(struct quadd_ctx *ctx)
582 struct quadd_cpu_context *cpu_ctx;
587 freq = ctx->param.freq;
588 freq = max_t(long, QUADD_HRT_MIN_FREQ, freq);
589 period = NSEC_PER_SEC / freq;
590 hrt.sample_period = period;
592 if (ctx->param.ma_freq > 0)
593 hrt.ma_period = MSEC_PER_SEC / ctx->param.ma_freq;
597 atomic64_set(&hrt.counter_samples, 0);
599 hrt.cpu_ctx = alloc_percpu(struct quadd_cpu_context);
601 return ERR_PTR(-ENOMEM);
603 for (cpu_id = 0; cpu_id < nr_cpu_ids; cpu_id++) {
604 cpu_ctx = per_cpu_ptr(hrt.cpu_ctx, cpu_id);
606 atomic_set(&cpu_ctx->nr_active, 0);
608 cpu_ctx->active_thread.pid = -1;
609 cpu_ctx->active_thread.tgid = -1;
611 init_hrtimer(cpu_ctx);