2939173b57bb055badd720d4cb008f6bbf5b133f
[linux-3.10.git] / drivers / misc / tegra-profiler / main.c
1 /*
2  * drivers/misc/tegra-profiler/main.c
3  *
4  * Copyright (c) 2013-2016, NVIDIA CORPORATION.  All rights reserved.
5  *
6  * This program is free software; you can redistribute it and/or modify it
7  * under the terms and conditions of the GNU General Public License,
8  * version 2, as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope it will be useful, but WITHOUT
11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13  * more details.
14  *
15  */
16
17 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
18
19 #include <linux/init.h>
20 #include <linux/module.h>
21 #include <linux/err.h>
22 #include <linux/sched.h>
23
24 #include <linux/tegra_profiler.h>
25
26 #include "quadd.h"
27 #include "arm_pmu.h"
28 #include "hrt.h"
29 #include "comm.h"
30 #include "mmap.h"
31 #include "debug.h"
32 #include "tegra.h"
33 #include "power_clk.h"
34 #include "auth.h"
35 #include "version.h"
36 #include "quadd_proc.h"
37 #include "eh_unwind.h"
38
39 #ifdef CONFIG_ARM64
40 #include "armv8_pmu.h"
41 #else
42 #include "armv7_pmu.h"
43 #endif
44
45 #ifdef CONFIG_CACHE_L2X0
46 #include "pl310.h"
47 #endif
48
49 static struct quadd_ctx ctx;
50 static DEFINE_PER_CPU(struct source_info, ctx_pmu_info);
51 static DEFINE_PER_CPU(struct quadd_comm_cap_for_cpu, per_cpu_caps);
52
53 static struct source_info *get_pmu_info_for_current_cpu(void)
54 {
55         return &__get_cpu_var(ctx_pmu_info);
56 }
57
58 static struct quadd_comm_cap_for_cpu *get_capabilities_for_cpu_int(int cpuid)
59 {
60         return &per_cpu(per_cpu_caps, cpuid);
61 }
62
63 static int get_default_properties(void)
64 {
65         ctx.param.freq = 100;
66         ctx.param.ma_freq = 50;
67         ctx.param.backtrace = 1;
68         ctx.param.use_freq = 1;
69         ctx.param.system_wide = 1;
70         ctx.param.power_rate_freq = 0;
71         ctx.param.debug_samples = 0;
72
73         ctx.param.pids[0] = 0;
74         ctx.param.nr_pids = 1;
75         ctx.get_capabilities_for_cpu = get_capabilities_for_cpu_int;
76         ctx.get_pmu_info = get_pmu_info_for_current_cpu;
77
78         return 0;
79 }
80
81 int tegra_profiler_try_lock(void)
82 {
83         return atomic_cmpxchg(&ctx.tegra_profiler_lock, 0, 1);
84 }
85 EXPORT_SYMBOL_GPL(tegra_profiler_try_lock);
86
87 void tegra_profiler_unlock(void)
88 {
89         atomic_set(&ctx.tegra_profiler_lock, 0);
90 }
91 EXPORT_SYMBOL_GPL(tegra_profiler_unlock);
92
93 static int start(void)
94 {
95         int err;
96
97         if (tegra_profiler_try_lock()) {
98                 pr_err("Error: tegra_profiler lock\n");
99                 return -EBUSY;
100         }
101
102         if (!atomic_cmpxchg(&ctx.started, 0, 1)) {
103                 preempt_disable();
104
105                 if (ctx.pmu) {
106                         err = ctx.pmu->enable();
107                         if (err) {
108                                 pr_err("error: pmu enable\n");
109                                 goto errout_preempt;
110                         }
111                 }
112
113                 if (ctx.pl310) {
114                         err = ctx.pl310->enable();
115                         if (err) {
116                                 pr_err("error: pl310 enable\n");
117                                 goto errout_preempt;
118                         }
119                 }
120
121                 ctx.comm->reset();
122
123                 err = quadd_hrt_start();
124                 if (err) {
125                         pr_err("error: hrt start\n");
126                         goto errout_preempt;
127                 }
128
129                 preempt_enable();
130
131                 err = quadd_power_clk_start();
132                 if (err < 0) {
133                         pr_err("error: power_clk start\n");
134                         goto errout;
135                 }
136         }
137
138         return 0;
139
140 errout_preempt:
141         preempt_enable();
142
143 errout:
144         atomic_set(&ctx.started, 0);
145         tegra_profiler_unlock();
146
147         return err;
148 }
149
150 static void stop(void)
151 {
152         if (atomic_cmpxchg(&ctx.started, 1, 0)) {
153                 preempt_disable();
154
155                 quadd_hrt_stop();
156
157                 ctx.comm->reset();
158
159                 quadd_unwind_stop();
160
161                 if (ctx.pmu)
162                         ctx.pmu->disable();
163
164                 if (ctx.pl310)
165                         ctx.pl310->disable();
166
167                 tegra_profiler_unlock();
168
169                 preempt_enable();
170
171                 quadd_power_clk_stop();
172         }
173 }
174
175 static inline int is_event_supported(struct source_info *si, int event)
176 {
177         int i;
178         int nr = si->nr_supported_events;
179         int *events = si->supported_events;
180
181         for (i = 0; i < nr; i++) {
182                 if (event == events[i])
183                         return 1;
184         }
185         return 0;
186 }
187
188 static int
189 validate_freq(unsigned int freq)
190 {
191         return freq >= 100 && freq <= 100000;
192 }
193
194 static int
195 set_parameters_for_cpu(struct quadd_pmu_setup_for_cpu *params)
196 {
197         int i;
198         int err;
199         int nr_pmu = 0;
200         int cpuid = params->cpuid;
201
202         struct source_info *pmu_info = &per_cpu(ctx_pmu_info, cpuid);
203         int pmu_events_id[QUADD_MAX_COUNTERS];
204
205         if (!pmu_info->is_present)
206                 return -ENODEV;
207
208         for (i = 0; i < params->nr_events; i++) {
209                 int event = params->events[i];
210
211                 if (pmu_info->nr_supported_events > 0
212                         && is_event_supported(pmu_info, event)) {
213                         pmu_events_id[nr_pmu++] = event;
214                         pr_info("PMU active event for cpu %d: %s\n",
215                                         cpuid,
216                                         quadd_get_event_str(event));
217                 } else {
218                         pr_err("Bad event: %s\n",
219                                quadd_get_event_str(event));
220                         return -EINVAL;
221                 }
222         }
223
224         err = ctx.pmu->set_events(cpuid, pmu_events_id, nr_pmu);
225         if (err) {
226                 pr_err("PMU set parameters: error\n");
227                 return err;
228         }
229         per_cpu(ctx_pmu_info, cpuid).active = 1;
230
231         return err;
232 }
233
234 static int
235 set_parameters(struct quadd_parameters *p)
236 {
237         int i, err, uid = 0;
238         uid_t task_uid, current_uid;
239         int pl310_events_id;
240         int nr_pl310 = 0;
241         struct task_struct *task;
242         u64 *low_addr_p;
243
244         if (!validate_freq(p->freq)) {
245                 pr_err("error: incorrect frequency: %u\n", p->freq);
246                 return -EINVAL;
247         }
248
249         /* Currently only one process */
250         if (p->nr_pids != 1)
251                 return -EINVAL;
252
253         p->package_name[sizeof(p->package_name) - 1] = '\0';
254
255         ctx.param = *p;
256
257         rcu_read_lock();
258         task = pid_task(find_vpid(p->pids[0]), PIDTYPE_PID);
259         rcu_read_unlock();
260         if (!task) {
261                 pr_err("error: process not found: %u\n", p->pids[0]);
262                 return -ESRCH;
263         }
264
265         current_uid = __kuid_val(current_fsuid());
266         task_uid = __kuid_val(task_uid(task));
267         pr_info("owner/task uids: %u/%u\n", current_uid, task_uid);
268
269         if (!capable(CAP_SYS_ADMIN)) {
270                 if (current_uid != task_uid) {
271                         pr_info("package: %s\n", p->package_name);
272
273                         uid = quadd_auth_is_debuggable((char *)p->package_name);
274                         if (uid < 0) {
275                                 pr_err("error: tegra profiler security service\n");
276                                 return uid;
277                         } else if (uid == 0) {
278                                 pr_err("error: app is not debuggable\n");
279                                 return -EACCES;
280                         }
281                         pr_info("app is debuggable, uid: %u\n", uid);
282
283                         if (task_uid != uid) {
284                                 pr_err("error: uids are not matched\n");
285                                 return -EACCES;
286                         }
287                 }
288                 ctx.collect_kernel_ips = 0;
289         } else {
290                 ctx.collect_kernel_ips = 1;
291         }
292
293         for (i = 0; i < p->nr_events; i++) {
294                 int event = p->events[i];
295
296                 if (ctx.pl310 &&
297                     ctx.pl310_info.nr_supported_events > 0 &&
298                     is_event_supported(&ctx.pl310_info, event)) {
299                         pl310_events_id = p->events[i];
300
301                         pr_info("PL310 active event: %s\n",
302                                 quadd_get_event_str(event));
303
304                         if (nr_pl310++ > 1) {
305                                 pr_err("error: multiply pl310 events\n");
306                                 return -EINVAL;
307                         }
308                 } else {
309                         pr_err("Bad event: %s\n",
310                                quadd_get_event_str(event));
311                         return -EINVAL;
312                 }
313         }
314
315         if (ctx.pl310) {
316                 int cpuid = 0; /* We don't need cpuid for pl310.  */
317
318                 if (nr_pl310 == 1) {
319                         err = ctx.pl310->set_events(cpuid, &pl310_events_id, 1);
320                         if (err) {
321                                 pr_info("pl310 set_parameters: error\n");
322                                 return err;
323                         }
324                         ctx.pl310_info.active = 1;
325                 } else {
326                         ctx.pl310_info.active = 0;
327                         ctx.pl310->set_events(cpuid, NULL, 0);
328                 }
329         }
330
331         low_addr_p = (u64 *)&p->reserved[QUADD_PARAM_IDX_BT_LOWER_BOUND];
332         ctx.hrt->low_addr = (unsigned long)*low_addr_p;
333         pr_info("bt lower bound: %#lx\n", ctx.hrt->low_addr);
334
335         err = quadd_unwind_start(task);
336         if (err)
337                 return err;
338
339         pr_info("New parameters have been applied\n");
340
341         return 0;
342 }
343
344 static void
345 get_capabilities_for_cpu(int cpuid, struct quadd_comm_cap_for_cpu *cap)
346 {
347         int i;
348         struct quadd_events_cap *events_cap;
349         struct source_info *s = &per_cpu(ctx_pmu_info, cpuid);
350
351         if (!s->is_present)
352                 return;
353
354         events_cap = &cap->events_cap;
355
356         cap->cpuid = cpuid;
357         events_cap->cpu_cycles = 0;
358         events_cap->l1_dcache_read_misses = 0;
359         events_cap->l1_dcache_write_misses = 0;
360         events_cap->l1_icache_misses = 0;
361
362         events_cap->instructions = 0;
363         events_cap->branch_instructions = 0;
364         events_cap->branch_misses = 0;
365         events_cap->bus_cycles = 0;
366
367         events_cap->l2_dcache_read_misses = 0;
368         events_cap->l2_dcache_write_misses = 0;
369         events_cap->l2_icache_misses = 0;
370
371         for (i = 0; i < s->nr_supported_events; i++) {
372                 int event = s->supported_events[i];
373
374                 if (event == QUADD_EVENT_TYPE_L2_DCACHE_READ_MISSES ||
375                     event == QUADD_EVENT_TYPE_L2_DCACHE_WRITE_MISSES ||
376                     event == QUADD_EVENT_TYPE_L2_ICACHE_MISSES) {
377                         cap->l2_cache = 1;
378                         cap->l2_multiple_events = 1;
379                 }
380
381
382                 switch (event) {
383                 case QUADD_EVENT_TYPE_CPU_CYCLES:
384                         events_cap->cpu_cycles = 1;
385                         break;
386                 case QUADD_EVENT_TYPE_INSTRUCTIONS:
387                         events_cap->instructions = 1;
388                         break;
389                 case QUADD_EVENT_TYPE_BRANCH_INSTRUCTIONS:
390                         events_cap->branch_instructions = 1;
391                         break;
392                 case QUADD_EVENT_TYPE_BRANCH_MISSES:
393                         events_cap->branch_misses = 1;
394                         break;
395                 case QUADD_EVENT_TYPE_BUS_CYCLES:
396                         events_cap->bus_cycles = 1;
397                         break;
398
399                 case QUADD_EVENT_TYPE_L1_DCACHE_READ_MISSES:
400                         events_cap->l1_dcache_read_misses = 1;
401                         break;
402                 case QUADD_EVENT_TYPE_L1_DCACHE_WRITE_MISSES:
403                         events_cap->l1_dcache_write_misses = 1;
404                         break;
405                 case QUADD_EVENT_TYPE_L1_ICACHE_MISSES:
406                         events_cap->l1_icache_misses = 1;
407                         break;
408
409                 case QUADD_EVENT_TYPE_L2_DCACHE_READ_MISSES:
410                         events_cap->l2_dcache_read_misses = 1;
411                         break;
412                 case QUADD_EVENT_TYPE_L2_DCACHE_WRITE_MISSES:
413                         events_cap->l2_dcache_write_misses = 1;
414                         break;
415                 case QUADD_EVENT_TYPE_L2_ICACHE_MISSES:
416                         events_cap->l2_icache_misses = 1;
417                         break;
418
419                 default:
420                         pr_err_once("%s: error: invalid event\n",
421                                                 __func__);
422                         return;
423                 }
424         }
425 }
426
427 static u32 get_possible_cpu(void)
428 {
429         int cpu;
430         u32 mask = 0;
431         struct source_info *s;
432
433         if (ctx.pmu) {
434                 for_each_possible_cpu(cpu) {
435                         /* since we don't support more than 32 CPUs */
436                         if (cpu >= BITS_PER_BYTE * sizeof(mask))
437                                 break;
438
439                         s = &per_cpu(ctx_pmu_info, cpu);
440                         if (s->is_present)
441                                 mask |= (1U << cpu);
442                 }
443         }
444
445         return mask;
446 }
447
448 static void
449 get_capabilities(struct quadd_comm_cap *cap)
450 {
451         int i;
452         unsigned int extra = 0;
453         struct quadd_events_cap *events_cap = &cap->events_cap;
454
455         cap->pmu = ctx.pmu ? 1 : 0;
456
457         cap->l2_cache = 0;
458         if (ctx.pl310) {
459                 cap->l2_cache = 1;
460                 cap->l2_multiple_events = 0;
461         }
462
463         events_cap->cpu_cycles = 0;
464         events_cap->l1_dcache_read_misses = 0;
465         events_cap->l1_dcache_write_misses = 0;
466         events_cap->l1_icache_misses = 0;
467
468         events_cap->instructions = 0;
469         events_cap->branch_instructions = 0;
470         events_cap->branch_misses = 0;
471         events_cap->bus_cycles = 0;
472
473         events_cap->l2_dcache_read_misses = 0;
474         events_cap->l2_dcache_write_misses = 0;
475         events_cap->l2_icache_misses = 0;
476
477         if (ctx.pl310) {
478                 struct source_info *s = &ctx.pl310_info;
479
480                 for (i = 0; i < s->nr_supported_events; i++) {
481                         int event = s->supported_events[i];
482
483                         switch (event) {
484                         case QUADD_EVENT_TYPE_L2_DCACHE_READ_MISSES:
485                                 events_cap->l2_dcache_read_misses = 1;
486                                 break;
487                         case QUADD_EVENT_TYPE_L2_DCACHE_WRITE_MISSES:
488                                 events_cap->l2_dcache_write_misses = 1;
489                                 break;
490                         case QUADD_EVENT_TYPE_L2_ICACHE_MISSES:
491                                 events_cap->l2_icache_misses = 1;
492                                 break;
493
494                         default:
495                                 pr_err_once("%s: error: invalid event\n",
496                                             __func__);
497                                 return;
498                         }
499                 }
500         }
501
502         cap->tegra_lp_cluster = quadd_is_cpu_with_lp_cluster();
503         cap->power_rate = 1;
504         cap->blocked_read = 1;
505
506         extra |= QUADD_COMM_CAP_EXTRA_BT_KERNEL_CTX;
507         extra |= QUADD_COMM_CAP_EXTRA_GET_MMAP;
508         extra |= QUADD_COMM_CAP_EXTRA_GROUP_SAMPLES;
509         extra |= QUADD_COMM_CAP_EXTRA_BT_UNWIND_TABLES;
510         extra |= QUADD_COMM_CAP_EXTRA_SUPPORT_AARCH64;
511         extra |= QUADD_COMM_CAP_EXTRA_SPECIAL_ARCH_MMAP;
512         extra |= QUADD_COMM_CAP_EXTRA_UNWIND_MIXED;
513         extra |= QUADD_COMM_CAP_EXTRA_UNW_ENTRY_TYPE;
514         extra |= QUADD_COMM_CAP_EXTRA_RB_MMAP_OP;
515         extra |= QUADD_COMM_CAP_EXTRA_CPU_MASK;
516
517         if (ctx.hrt->tc)
518                 extra |= QUADD_COMM_CAP_EXTRA_ARCH_TIMER;
519
520         cap->reserved[QUADD_COMM_CAP_IDX_EXTRA] = extra;
521         cap->reserved[QUADD_COMM_CAP_IDX_CPU_MASK] = get_possible_cpu();
522 }
523
524 void quadd_get_state(struct quadd_module_state *state)
525 {
526         unsigned int status = 0;
527
528         quadd_hrt_get_state(state);
529
530         if (ctx.comm->is_active())
531                 status |= QUADD_MOD_STATE_STATUS_IS_ACTIVE;
532
533         if (quadd_auth_is_auth_open())
534                 status |= QUADD_MOD_STATE_STATUS_IS_AUTH_OPEN;
535
536         state->reserved[QUADD_MOD_STATE_IDX_STATUS] = status;
537 }
538
539 static int
540 set_extab(struct quadd_sections *extabs,
541           struct quadd_mmap_area *mmap)
542 {
543         return quadd_unwind_set_extab(extabs, mmap);
544 }
545
546 static void
547 delete_mmap(struct quadd_mmap_area *mmap)
548 {
549         quadd_unwind_delete_mmap(mmap);
550 }
551
552 static int
553 is_cpu_present(int cpuid)
554 {
555         struct source_info *s = &per_cpu(ctx_pmu_info, cpuid);
556
557         return s->is_present;
558 }
559
560 static struct quadd_comm_control_interface control = {
561         .start                  = start,
562         .stop                   = stop,
563         .set_parameters         = set_parameters,
564         .set_parameters_for_cpu = set_parameters_for_cpu,
565         .get_capabilities       = get_capabilities,
566         .get_capabilities_for_cpu = get_capabilities_for_cpu,
567         .get_state              = quadd_get_state,
568         .set_extab              = set_extab,
569         .delete_mmap            = delete_mmap,
570         .is_cpu_present         = is_cpu_present,
571 };
572
573 static int __init quadd_module_init(void)
574 {
575         int i, nr_events, err;
576         int *events;
577         int cpuid;
578
579         pr_info("Branch: %s\n", QUADD_MODULE_BRANCH);
580         pr_info("Version: %s\n", QUADD_MODULE_VERSION);
581         pr_info("Samples version: %d\n", QUADD_SAMPLES_VERSION);
582         pr_info("IO version: %d\n", QUADD_IO_VERSION);
583
584 #ifdef QM_DEBUG_SAMPLES_ENABLE
585         pr_info("############## DEBUG VERSION! ##############\n");
586 #endif
587
588         atomic_set(&ctx.started, 0);
589         atomic_set(&ctx.tegra_profiler_lock, 0);
590
591         get_default_properties();
592
593         for_each_possible_cpu(cpuid) {
594                 struct source_info *pmu_info = &per_cpu(ctx_pmu_info, cpuid);
595
596                 pmu_info->active = 0;
597                 pmu_info->is_present = 0;
598         }
599
600         ctx.pl310_info.active = 0;
601
602 #ifdef CONFIG_ARM64
603         ctx.pmu = quadd_armv8_pmu_init();
604 #else
605         ctx.pmu = quadd_armv7_pmu_init();
606 #endif
607         if (!ctx.pmu) {
608                 pr_err("PMU init failed\n");
609                 return -ENODEV;
610         }
611
612         for_each_possible_cpu(cpuid) {
613                 struct quadd_arch_info *arch;
614                 struct source_info *pmu_info;
615
616                 arch = ctx.pmu->get_arch(cpuid);
617                 if (!arch)
618                         continue;
619
620                 pmu_info = &per_cpu(ctx_pmu_info, cpuid);
621                 pmu_info->is_present = 1;
622
623                 events = pmu_info->supported_events;
624                 nr_events =
625                     ctx.pmu->get_supported_events(cpuid, events,
626                                                   QUADD_MAX_COUNTERS);
627
628                 pmu_info->nr_supported_events = nr_events;
629
630                 pr_debug("CPU: %d PMU: amount of events: %d\n",
631                          cpuid, nr_events);
632
633                 for (i = 0; i < nr_events; i++)
634                         pr_debug("CPU: %d PMU event: %s\n", cpuid,
635                                  quadd_get_event_str(events[i]));
636         }
637
638 #ifdef CONFIG_CACHE_L2X0
639         ctx.pl310 = quadd_l2x0_events_init();
640 #else
641         ctx.pl310 = NULL;
642 #endif
643         if (ctx.pl310) {
644                 events = ctx.pl310_info.supported_events;
645                 nr_events = ctx.pl310->get_supported_events(0, events,
646                                                             QUADD_MAX_COUNTERS);
647                 ctx.pl310_info.nr_supported_events = nr_events;
648
649                 pr_info("pl310 success, amount of events: %d\n",
650                         nr_events);
651
652                 for (i = 0; i < nr_events; i++)
653                         pr_info("pl310 event: %s\n",
654                                 quadd_get_event_str(events[i]));
655         } else {
656                 pr_debug("PL310 not found\n");
657         }
658
659         ctx.hrt = quadd_hrt_init(&ctx);
660         if (IS_ERR(ctx.hrt)) {
661                 pr_err("error: HRT init failed\n");
662                 return PTR_ERR(ctx.hrt);
663         }
664
665         err = quadd_power_clk_init(&ctx);
666         if (err < 0) {
667                 pr_err("error: POWER CLK init failed\n");
668                 return err;
669         }
670
671         ctx.comm = quadd_comm_events_init(&control);
672         if (IS_ERR(ctx.comm)) {
673                 pr_err("error: COMM init failed\n");
674                 return PTR_ERR(ctx.comm);
675         }
676
677         err = quadd_auth_init(&ctx);
678         if (err < 0) {
679                 pr_err("error: auth failed\n");
680                 return err;
681         }
682
683         err = quadd_unwind_init();
684         if (err < 0) {
685                 pr_err("error: EH unwinding init failed\n");
686                 return err;
687         }
688
689         get_capabilities(&ctx.cap);
690
691         for_each_possible_cpu(cpuid)
692                 get_capabilities_for_cpu(cpuid, &per_cpu(per_cpu_caps, cpuid));
693
694         quadd_proc_init(&ctx);
695
696         return 0;
697 }
698
699 static void __exit quadd_module_exit(void)
700 {
701         pr_info("QuadD module exit\n");
702
703         quadd_hrt_deinit();
704         quadd_power_clk_deinit();
705         quadd_comm_events_exit();
706         quadd_auth_deinit();
707         quadd_proc_deinit();
708         quadd_unwind_deinit();
709
710 #ifdef CONFIG_ARM64
711         quadd_armv8_pmu_deinit();
712 #else
713         quadd_armv7_pmu_deinit();
714 #endif
715 }
716
717 module_init(quadd_module_init);
718 module_exit(quadd_module_exit);
719
720 MODULE_LICENSE("GPL");
721
722 MODULE_AUTHOR("Nvidia Ltd");
723 MODULE_DESCRIPTION("Tegra profiler");