ARM: tegra: Add Tegra Profiler
Igor Nabirushkin [Wed, 26 Jun 2013 11:21:17 +0000 (15:21 +0400)]
Add Tegra Profiler kernel misc driver

Bug 1312406

Change-Id: Ifeadff529dbc949c40f91534fa059fde2b5f76b4
Signed-off-by: Igor Nabirushkin <inabirushkin@nvidia.com>
Reviewed-on: http://git-master/r/242395
(cherry picked from commit 7ef04d577f5764c3badd67cef9dcc36cc43c80e1)
Reviewed-on: http://git-master/r/268510
GVS: Gerrit_Virtual_Submit
Tested-by: Maxim Morin <mmorin@nvidia.com>
Reviewed-by: Bo Yan <byan@nvidia.com>

29 files changed:
drivers/misc/Kconfig
drivers/misc/Makefile
drivers/misc/tegra-profiler/Kconfig [new file with mode: 0644]
drivers/misc/tegra-profiler/Makefile [new file with mode: 0644]
drivers/misc/tegra-profiler/armv7_pmu.c [new file with mode: 0644]
drivers/misc/tegra-profiler/armv7_pmu.h [new file with mode: 0644]
drivers/misc/tegra-profiler/auth.c [new file with mode: 0644]
drivers/misc/tegra-profiler/auth.h [new file with mode: 0644]
drivers/misc/tegra-profiler/backtrace.c [new file with mode: 0644]
drivers/misc/tegra-profiler/backtrace.h [new file with mode: 0644]
drivers/misc/tegra-profiler/comm.c [new file with mode: 0644]
drivers/misc/tegra-profiler/comm.h [new file with mode: 0644]
drivers/misc/tegra-profiler/debug.c [new file with mode: 0644]
drivers/misc/tegra-profiler/debug.h [new file with mode: 0644]
drivers/misc/tegra-profiler/hrt.c [new file with mode: 0644]
drivers/misc/tegra-profiler/hrt.h [new file with mode: 0644]
drivers/misc/tegra-profiler/ma.c [new file with mode: 0644]
drivers/misc/tegra-profiler/ma.h [new file with mode: 0644]
drivers/misc/tegra-profiler/main.c [new file with mode: 0644]
drivers/misc/tegra-profiler/mmap.c [new file with mode: 0644]
drivers/misc/tegra-profiler/mmap.h [new file with mode: 0644]
drivers/misc/tegra-profiler/pl310.c [new file with mode: 0644]
drivers/misc/tegra-profiler/pl310.h [new file with mode: 0644]
drivers/misc/tegra-profiler/power_clk.c [new file with mode: 0644]
drivers/misc/tegra-profiler/power_clk.h [new file with mode: 0644]
drivers/misc/tegra-profiler/quadd.h [new file with mode: 0644]
drivers/misc/tegra-profiler/tegra.h [new file with mode: 0644]
drivers/misc/tegra-profiler/version.h [new file with mode: 0644]
include/linux/tegra_profiler.h [new file with mode: 0644]

index 72875a6..79cb885 100644 (file)
@@ -645,4 +645,5 @@ source "drivers/misc/tegra-cec/Kconfig"
 source "drivers/misc/mei/Kconfig"
 source "drivers/misc/vmw_vmci/Kconfig"
 source "drivers/misc/issp/Kconfig"
+source "drivers/misc/tegra-profiler/Kconfig"
 endmenu
index e869a63..24296fb 100644 (file)
@@ -75,3 +75,4 @@ obj-$(CONFIG_CPULOAD_MONITOR) += cpuload.o
 obj-$(CONFIG_SIM_MAX77660)     += max77660-sim.o
 obj-$(CONFIG_SIM_PALMAS)       += palmas-sim.o
 obj-y                          += issp/
+obj-$(CONFIG_TEGRA_PROFILER)    += tegra-profiler/
diff --git a/drivers/misc/tegra-profiler/Kconfig b/drivers/misc/tegra-profiler/Kconfig
new file mode 100644 (file)
index 0000000..312b75e
--- /dev/null
@@ -0,0 +1,6 @@
+config TEGRA_PROFILER
+       bool "Enable Tegra profiler"
+       depends on ARCH_TEGRA
+       select KPROBES
+       help
+         This option enables Tegra profiler
diff --git a/drivers/misc/tegra-profiler/Makefile b/drivers/misc/tegra-profiler/Makefile
new file mode 100644 (file)
index 0000000..7b3d808
--- /dev/null
@@ -0,0 +1,18 @@
+#
+# drivers/misc/tegra-profiler/Makefile
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms and conditions of the GNU General Public License,
+# version 2, as published by the Free Software Foundation.
+#
+# This program is distributed in the hope it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+# more details.
+#
+# Copyright (c) 2013, NVIDIA CORPORATION.  All rights reserved.
+#
+
+obj-$(CONFIG_TEGRA_PROFILER) += tegra-profiler.o
+tegra-profiler-objs := main.o armv7_pmu.o pl310.o hrt.o comm.o mmap.o backtrace.o debug.o ma.o power_clk.o auth.o
+
diff --git a/drivers/misc/tegra-profiler/armv7_pmu.c b/drivers/misc/tegra-profiler/armv7_pmu.c
new file mode 100644 (file)
index 0000000..9499c1f
--- /dev/null
@@ -0,0 +1,477 @@
+/*
+ * drivers/misc/tegra-profiler/armv7_pmu.c
+ *
+ * Copyright (c) 2013, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/module.h>
+#include <asm/cputype.h>
+#include <asm/pmu.h>
+
+#include <linux/tegra_profiler.h>
+
+#include "armv7_pmu.h"
+#include "quadd.h"
+#include "debug.h"
+
+static struct armv7_pmu_ctx pmu_ctx;
+
+DEFINE_PER_CPU(u32[QUADD_MAX_PMU_COUNTERS], pmu_prev_val);
+
+static unsigned quadd_armv7_a9_events_map[QUADD_EVENT_TYPE_MAX] = {
+       [QUADD_EVENT_TYPE_INSTRUCTIONS] =
+               QUADD_ARMV7_A9_HW_EVENT_INST_OUT_OF_RENAME_STAGE,
+       [QUADD_EVENT_TYPE_BRANCH_INSTRUCTIONS] =
+               QUADD_ARMV7_HW_EVENT_PC_WRITE,
+       [QUADD_EVENT_TYPE_BRANCH_MISSES] =
+               QUADD_ARMV7_HW_EVENT_PC_BRANCH_MIS_PRED,
+       [QUADD_EVENT_TYPE_BUS_CYCLES] =
+               QUADD_ARMV7_HW_EVENT_CLOCK_CYCLES,
+
+       [QUADD_EVENT_TYPE_L1_DCACHE_READ_MISSES] =
+               QUADD_ARMV7_HW_EVENT_DCACHE_REFILL,
+       [QUADD_EVENT_TYPE_L1_DCACHE_WRITE_MISSES] =
+               QUADD_ARMV7_HW_EVENT_DCACHE_REFILL,
+       [QUADD_EVENT_TYPE_L1_ICACHE_MISSES] =
+               QUADD_ARMV7_HW_EVENT_IFETCH_MISS,
+
+       [QUADD_EVENT_TYPE_L2_DCACHE_READ_MISSES] =
+               QUADD_ARMV7_UNSUPPORTED_EVENT,
+       [QUADD_EVENT_TYPE_L2_DCACHE_WRITE_MISSES] =
+               QUADD_ARMV7_UNSUPPORTED_EVENT,
+       [QUADD_EVENT_TYPE_L2_ICACHE_MISSES] =
+               QUADD_ARMV7_UNSUPPORTED_EVENT,
+};
+
+static unsigned quadd_armv7_a15_events_map[QUADD_EVENT_TYPE_MAX] = {
+       [QUADD_EVENT_TYPE_INSTRUCTIONS] =
+                               QUADD_ARMV7_HW_EVENT_INSTR_EXECUTED,
+       [QUADD_EVENT_TYPE_BRANCH_INSTRUCTIONS] =
+                               QUADD_ARMV7_A15_HW_EVENT_SPEC_PC_WRITE,
+       [QUADD_EVENT_TYPE_BRANCH_MISSES] =
+                               QUADD_ARMV7_HW_EVENT_PC_BRANCH_MIS_PRED,
+       [QUADD_EVENT_TYPE_BUS_CYCLES] = QUADD_ARMV7_HW_EVENT_BUS_CYCLES,
+
+       [QUADD_EVENT_TYPE_L1_DCACHE_READ_MISSES] =
+                               QUADD_ARMV7_A15_HW_EVENT_L1_DCACHE_READ_REFILL,
+       [QUADD_EVENT_TYPE_L1_DCACHE_WRITE_MISSES] =
+                               QUADD_ARMV7_A15_HW_EVENT_L1_DCACHE_WRITE_REFILL,
+       [QUADD_EVENT_TYPE_L1_ICACHE_MISSES] =
+                               QUADD_ARMV7_HW_EVENT_IFETCH_MISS,
+
+       [QUADD_EVENT_TYPE_L2_DCACHE_READ_MISSES] =
+                               QUADD_ARMV7_A15_HW_EVENT_L2_DCACHE_READ_REFILL,
+       [QUADD_EVENT_TYPE_L2_DCACHE_WRITE_MISSES] =
+                               QUADD_ARMV7_A15_HW_EVENT_L2_DCACHE_WRITE_REFILL,
+       [QUADD_EVENT_TYPE_L2_ICACHE_MISSES] =
+                               QUADD_ARMV7_UNSUPPORTED_EVENT,
+};
+
+static u32 armv7_pmu_pmnc_read(void)
+{
+       u32 val;
+       asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r"(val));
+       return val;
+}
+
+static void armv7_pmu_pmnc_write(u32 val)
+{
+       val &= QUADD_ARMV7_PMNC_MASK;
+       asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r"(val));
+}
+
+static void armv7_pmu_pmnc_enable_counter(int index)
+{
+       u32 val;
+
+       if (index == QUADD_ARMV7_CYCLE_COUNTER)
+               val = QUADD_ARMV7_CCNT;
+       else
+               val = 1 << index;
+
+       asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r" (val));
+}
+
+static void armv7_pmu_select_counter(unsigned int idx)
+{
+       u32 val;
+
+       val = idx & QUADD_ARMV7_SELECT_MASK;
+       asm volatile("mcr p15, 0, %0, c9, c12, 5" : : "r" (val));
+}
+
+static u32 armv7_pmu_adjust_value(u32 value, int event_id)
+{
+       /*
+       * Cortex A8/A9: l1 cache performance counters
+       * don't differentiate between read and write data accesses/misses,
+       * so currently we are devided by two
+       */
+       if (pmu_ctx.l1_cache_rw &&
+           (pmu_ctx.arch == QUADD_ARM_CPU_TYPE_CORTEX_A8 ||
+           pmu_ctx.arch == QUADD_ARM_CPU_TYPE_CORTEX_A9) &&
+           (event_id == QUADD_EVENT_TYPE_L1_DCACHE_READ_MISSES ||
+           event_id == QUADD_EVENT_TYPE_L1_DCACHE_WRITE_MISSES)) {
+               return value / 2;
+       }
+       return value;
+}
+
+static u32 armv7_pmu_read_counter(int idx)
+{
+       u32 val = 0;
+
+       if (idx == QUADD_ARMV7_CYCLE_COUNTER) {
+               /* Cycle count register (PMCCNTR) reading */
+               asm volatile ("MRC p15, 0, %0, c9, c13, 0" : "=r"(val));
+       } else {
+               /* counter selection*/
+               armv7_pmu_select_counter(idx);
+               /* event count register reading */
+               asm volatile ("MRC p15, 0, %0, c9, c13, 2" : "=r"(val));
+       }
+
+       return val;
+}
+
+static __attribute__((unused)) void armv7_pmu_write_counter(int idx, u32 value)
+{
+       if (idx == QUADD_ARMV7_CYCLE_COUNTER) {
+               /* Cycle count register (PMCCNTR) writing */
+               asm volatile("mcr p15, 0, %0, c9, c13, 0" : : "r" (value));
+       } else {
+               /* counter selection*/
+               armv7_pmu_select_counter(idx);
+               /* event count register writing */
+               asm volatile("mcr p15, 0, %0, c9, c13, 2" : : "r" (value));
+       }
+}
+
+static void armv7_pmu_event_select(u32 event)
+{
+       event &= QUADD_ARMV7_EVTSEL_MASK;
+       asm volatile("mcr p15, 0, %0, c9, c13, 1" : : "r" (event));
+}
+
+static __attribute__((unused)) void armv7_pmnc_enable_interrupt(int idx)
+{
+       u32 val;
+
+       if (idx == QUADD_ARMV7_CYCLE_COUNTER)
+               val = QUADD_ARMV7_CCNT;
+       else
+               val = 1 << idx;
+
+       asm volatile("mcr p15, 0, %0, c9, c14, 1" : : "r" (val));
+}
+
+static __attribute__((unused)) void armv7_pmnc_disable_interrupt(int idx)
+{
+       u32 val;
+
+       if (idx == QUADD_ARMV7_CYCLE_COUNTER)
+               val = QUADD_ARMV7_CCNT;
+       else
+               val = 1 << idx;
+
+       asm volatile("mcr p15, 0, %0, c9, c14, 2" : : "r" (val));
+}
+
+static void armv7_pmnc_disable_all_interrupts(void)
+{
+       u32 val = QUADD_ARMV7_CCNT | pmu_ctx.counters_mask;
+
+       asm volatile("mcr p15, 0, %0, c9, c14, 2" : : "r" (val));
+}
+
+static void armv7_pmnc_reset_overflow_flags(void)
+{
+       u32 val = QUADD_ARMV7_CCNT | pmu_ctx.counters_mask;
+
+       asm volatile("mcr p15, 0, %0, c9, c12, 3" : : "r" (val));
+}
+
+static inline void select_event(unsigned int idx, unsigned int event)
+{
+       /* counter selection */
+       armv7_pmu_select_counter(idx);
+       armv7_pmu_event_select(event);
+}
+
+static inline void disable_all_counters(void)
+{
+       u32 val;
+
+       /* Disable all counters */
+       val = armv7_pmu_pmnc_read();
+       if (val & QUADD_ARMV7_PMNC_E)
+               armv7_pmu_pmnc_write(val & ~QUADD_ARMV7_PMNC_E);
+}
+
+static inline void enable_all_counters(void)
+{
+       u32 val;
+
+       /* Enable all counters */
+       val = armv7_pmu_pmnc_read();
+       val |= QUADD_ARMV7_PMNC_E | QUADD_ARMV7_PMNC_X;
+       armv7_pmu_pmnc_write(val);
+}
+
+static inline void quadd_init_pmu(void)
+{
+       armv7_pmnc_reset_overflow_flags();
+       armv7_pmnc_disable_all_interrupts();
+}
+
+static inline void reset_all_counters(void)
+{
+       u32 val;
+
+       val = armv7_pmu_pmnc_read();
+       val |= QUADD_ARMV7_PMNC_P | QUADD_ARMV7_PMNC_C;
+       armv7_pmu_pmnc_write(val);
+}
+
+static int pmu_enable(void)
+{
+       return 0;
+}
+
+static void pmu_disable(void)
+{
+}
+
+static void pmu_start(void)
+{
+       int i, idx;
+       u32 event;
+       u32 *prevp = __get_cpu_var(pmu_prev_val);
+
+       disable_all_counters();
+       quadd_init_pmu();
+
+       for (i = 0; i < pmu_ctx.nr_used_counters; i++) {
+               struct quadd_pmu_event_info *pmu_event = &pmu_ctx.pmu_events[i];
+
+               prevp[i] = 0;
+
+               event = pmu_event->hw_value;
+               idx = pmu_event->counter_idx;
+
+               if (idx != QUADD_ARMV7_CYCLE_COUNTER)
+                       select_event(idx, event);
+
+               armv7_pmu_pmnc_enable_counter(idx);
+       }
+
+       reset_all_counters();
+       enable_all_counters();
+
+       qm_debug_start_source(QUADD_EVENT_SOURCE_PMU);
+}
+
+static void pmu_stop(void)
+{
+       reset_all_counters();
+       disable_all_counters();
+
+       qm_debug_stop_source(QUADD_EVENT_SOURCE_PMU);
+}
+
+static int __maybe_unused pmu_read(struct event_data *events)
+{
+       int idx, i;
+       u32 val;
+       u32 *prevp = __get_cpu_var(pmu_prev_val);
+
+       if (pmu_ctx.nr_used_counters == 0) {
+               pr_warn_once("error: counters were not initialized\n");
+               return 0;
+       }
+
+       for (i = 0; i < pmu_ctx.nr_used_counters; i++) {
+               struct quadd_pmu_event_info *pmu_event = &pmu_ctx.pmu_events[i];
+
+               idx = pmu_event->counter_idx;
+
+               val = armv7_pmu_read_counter(idx);
+               val = armv7_pmu_adjust_value(val, pmu_event->quadd_event_id);
+
+               events[i].event_source = QUADD_EVENT_SOURCE_PMU;
+               events[i].event_id = pmu_event->quadd_event_id;
+
+               events[i].val = val;
+               events[i].prev_val = prevp[i];
+
+               prevp[i] = val;
+
+               qm_debug_read_counter(events[i].event_id, events[i].prev_val,
+                                     events[i].val);
+       }
+
+       return pmu_ctx.nr_used_counters;
+}
+
+static int __maybe_unused pmu_read_emulate(struct event_data *events)
+{
+       int i;
+       static u32 val = 100;
+       u32 *prevp = __get_cpu_var(pmu_prev_val);
+
+       for (i = 0; i < pmu_ctx.nr_used_counters; i++) {
+               if (val > 200)
+                       val = 100;
+
+               events[i].event_id = prevp[i];
+               events[i].val = val;
+
+               val += 5;
+       }
+
+       return pmu_ctx.nr_used_counters;
+}
+
+static int set_events(int *events, int size)
+{
+       int i, nr_l1_r = 0, nr_l1_w = 0, curr_idx = 0;
+
+       pmu_ctx.l1_cache_rw = 0;
+       pmu_ctx.nr_used_counters = 0;
+
+       if (!events || size == 0)
+               return 0;
+
+       if (size > QUADD_MAX_PMU_COUNTERS) {
+               pr_err("Too many events (> %d)\n", QUADD_MAX_PMU_COUNTERS);
+               return -ENOSPC;
+       }
+
+       if (!pmu_ctx.current_map) {
+               pr_err("Invalid current_map\n");
+               return -ENODEV;
+       }
+
+       for (i = 0; i < size; i++) {
+               struct quadd_pmu_event_info *pmu_event = &pmu_ctx.pmu_events[i];
+
+               if (events[i] > QUADD_EVENT_TYPE_MAX) {
+                       pr_err("Error event: %d\n", events[i]);
+                       return -EINVAL;
+               }
+
+               if (curr_idx >= pmu_ctx.nr_counters) {
+                       pr_err("Too many events (> %d)\n",
+                              pmu_ctx.nr_counters);
+                       return -ENOSPC;
+               }
+
+               if (events[i] == QUADD_EVENT_TYPE_CPU_CYCLES) {
+                       pmu_event->hw_value = QUADD_ARMV7_CPU_CYCLE_EVENT;
+                       pmu_event->counter_idx = QUADD_ARMV7_CYCLE_COUNTER;
+               } else {
+                       pmu_event->hw_value = pmu_ctx.current_map[events[i]];
+                       pmu_event->counter_idx = curr_idx++;
+               }
+               pmu_event->quadd_event_id = events[i];
+
+               if (events[i] == QUADD_EVENT_TYPE_L1_DCACHE_READ_MISSES)
+                       nr_l1_r++;
+               else if (events[i] == QUADD_EVENT_TYPE_L1_DCACHE_WRITE_MISSES)
+                       nr_l1_w++;
+
+               pr_info("Event has been added: id/pmu value: %s/%#x\n",
+                       quadd_get_event_str(events[i]),
+                       pmu_event->hw_value);
+       }
+       pmu_ctx.nr_used_counters = size;
+
+       if (nr_l1_r > 0 && nr_l1_w > 0)
+               pmu_ctx.l1_cache_rw = 1;
+
+       return 0;
+}
+
+static int get_supported_events(int *events)
+{
+       int i, nr_events = 0;
+
+       for (i = 0; i < QUADD_EVENT_TYPE_MAX; i++) {
+               if (pmu_ctx.current_map[i] != QUADD_ARMV7_UNSUPPORTED_EVENT)
+                       events[nr_events++] = i;
+       }
+       return nr_events;
+}
+
+static struct quadd_event_source_interface pmu_armv7_int = {
+       .enable                 = pmu_enable,
+       .disable                = pmu_disable,
+
+       .start                  = pmu_start,
+       .stop                   = pmu_stop,
+
+#ifndef QUADD_USE_EMULATE_COUNTERS
+       .read                   = pmu_read,
+#else
+       .read                   = pmu_read_emulate,
+#endif
+       .set_events             = set_events,
+       .get_supported_events   = get_supported_events,
+};
+
+struct quadd_event_source_interface *quadd_armv7_pmu_init(void)
+{
+       struct quadd_event_source_interface *pmu = NULL;
+       unsigned long cpu_id, cpu_implementer, part_number;
+
+       cpu_id = read_cpuid_id();
+       cpu_implementer = cpu_id >> 24;
+       part_number = cpu_id & 0xFFF0;
+
+       if (cpu_implementer == QUADD_ARM_CPU_IMPLEMENTER) {
+               switch (part_number) {
+               case QUADD_ARM_CPU_PART_NUMBER_CORTEX_A9:
+                       pmu_ctx.arch = QUADD_ARM_CPU_TYPE_CORTEX_A9;
+                       strcpy(pmu_ctx.arch_name, "Cortex A9");
+                       pmu_ctx.nr_counters = 6;
+                       pmu_ctx.counters_mask =
+                               QUADD_ARMV7_COUNTERS_MASK_CORTEX_A9;
+                       pmu_ctx.current_map = quadd_armv7_a9_events_map;
+                       pmu = &pmu_armv7_int;
+                       break;
+
+               case QUADD_ARM_CPU_PART_NUMBER_CORTEX_A15:
+                       pmu_ctx.arch = QUADD_ARM_CPU_TYPE_CORTEX_A15;
+                       strcpy(pmu_ctx.arch_name, "Cortex A15");
+                       pmu_ctx.nr_counters = 6;
+                       pmu_ctx.counters_mask =
+                               QUADD_ARMV7_COUNTERS_MASK_CORTEX_A15;
+                       pmu_ctx.current_map = quadd_armv7_a15_events_map;
+                       pmu = &pmu_armv7_int;
+                       break;
+
+               default:
+                       pmu_ctx.arch = QUADD_ARM_CPU_TYPE_UNKNOWN;
+                       strcpy(pmu_ctx.arch_name, "Unknown");
+                       pmu_ctx.nr_counters = 0;
+                       pmu_ctx.current_map = NULL;
+                       break;
+               }
+       }
+
+       pr_info("arch: %s, number of counters: %d\n",
+               pmu_ctx.arch_name, pmu_ctx.nr_counters);
+       return pmu;
+}
diff --git a/drivers/misc/tegra-profiler/armv7_pmu.h b/drivers/misc/tegra-profiler/armv7_pmu.h
new file mode 100644 (file)
index 0000000..827fe42
--- /dev/null
@@ -0,0 +1,269 @@
+/*
+ * drivers/misc/tegra-profiler/armv7_pmu.h
+ *
+ * Copyright (c) 2013, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#ifndef __ARMV7_PMU_H
+#define __ARMV7_PMU_H
+
+#define QUADD_ARM_CPU_IMPLEMENTER 0x41
+
+enum {
+       QUADD_ARM_CPU_TYPE_UNKNOWN,
+       QUADD_ARM_CPU_TYPE_CORTEX_A5,
+       QUADD_ARM_CPU_TYPE_CORTEX_A8,
+       QUADD_ARM_CPU_TYPE_CORTEX_A9,
+       QUADD_ARM_CPU_TYPE_CORTEX_A15,
+};
+
+#define QUADD_ARM_CPU_PART_NUMBER_CORTEX_A5    0xC050
+#define QUADD_ARM_CPU_PART_NUMBER_CORTEX_A8    0xC080
+#define QUADD_ARM_CPU_PART_NUMBER_CORTEX_A9    0xC090
+#define QUADD_ARM_CPU_PART_NUMBER_CORTEX_A15   0xC0F0
+
+
+#define QUADD_MAX_PMU_COUNTERS 32
+
+struct quadd_pmu_event_info {
+       int quadd_event_id;
+       int hw_value;
+       int counter_idx;
+};
+
+struct armv7_pmu_ctx {
+       int arch;
+       char arch_name[32];
+
+       int nr_counters;
+       u32 counters_mask;
+
+       struct quadd_pmu_event_info pmu_events[QUADD_MAX_PMU_COUNTERS];
+       int nr_used_counters;
+
+       int l1_cache_rw;
+       int *current_map;
+};
+
+struct quadd_event_source_interface;
+
+extern struct quadd_event_source_interface *quadd_armv7_pmu_init(void);
+
+/*
+ * PMNC Register
+ */
+
+ /* 0/1: disables/enables all counters, including CCNT */
+#define QUADD_ARMV7_PMNC_E             (1 << 0)
+/* 1: Resets all performance counters to zero. */
+#define QUADD_ARMV7_PMNC_P             (1 << 1)
+/* 1: Resets cycle counter, CCNT, to zero. */
+#define QUADD_ARMV7_PMNC_C             (1 << 2)
+/* 0: counts every processor clock cycle, reset value. 1:
+   counts every 64th processor clock cycle. */
+#define QUADD_ARMV7_PMNC_D             (1 << 3)
+/* 0/1: Export to ETM disabled/enabled */
+#define QUADD_ARMV7_PMNC_X             (1 << 4)
+/* 0/1: count is disabled/enabled in regions where
+   non-invasive debug is prohibited */
+#define QUADD_ARMV7_PMNC_DP            (1 << 5)
+/* Mask for writable bits */
+#define        QUADD_ARMV7_PMNC_MASK           0x3f
+
+
+#define QUADD_ARMV7_CCNT               (1 << 31)       /* Cycle counter */
+
+#define QUADD_ARMV7_CYCLE_COUNTER      -1
+
+/*
+ * CNTENS: counters enable reg
+ */
+#define QUADD_ARMV7_CNTENS_P(i)                (1 << i)
+#define QUADD_ARMV7_CNTENS_C           (1 << QUADD_ARMV7_CCNT)
+
+/*
+ * CNTENC: counters disable reg
+ */
+#define QUADD_ARMV7_CNTENC_P(i)        (1 << i)
+#define QUADD_ARMV7_CNTENC_C   (1 << QUADD_ARMV7_CCNT)
+
+/*
+ * Performance Counter Selection Register mask
+ */
+#define QUADD_ARMV7_SELECT_MASK        0x1f
+
+/*
+ * EVTSEL Register mask
+ */
+#define QUADD_ARMV7_EVTSEL_MASK        0xff
+
+#define QUADD_ARMV7_COUNTERS_MASK_CORTEX_A5            0x03
+#define QUADD_ARMV7_COUNTERS_MASK_CORTEX_A8            0x0f
+#define QUADD_ARMV7_COUNTERS_MASK_CORTEX_A9            0x3f
+#define QUADD_ARMV7_COUNTERS_MASK_CORTEX_A15           0x3f
+
+enum quadd_armv7_common_events {
+       QUADD_ARMV7_HW_EVENT_PMNC_SW_INCR               = 0x00,
+       QUADD_ARMV7_HW_EVENT_IFETCH_MISS                = 0x01,
+       QUADD_ARMV7_HW_EVENT_ITLB_MISS                  = 0x02,
+       QUADD_ARMV7_HW_EVENT_DCACHE_REFILL              = 0x03,
+       QUADD_ARMV7_HW_EVENT_DCACHE_ACCESS              = 0x04,
+       QUADD_ARMV7_HW_EVENT_DTLB_REFILL                = 0x05,
+       QUADD_ARMV7_HW_EVENT_DREAD                      = 0x06,
+       QUADD_ARMV7_HW_EVENT_DWRITE                     = 0x07,
+       QUADD_ARMV7_HW_EVENT_INSTR_EXECUTED             = 0x08,
+       QUADD_ARMV7_HW_EVENT_EXC_TAKEN                  = 0x09,
+       QUADD_ARMV7_HW_EVENT_EXC_EXECUTED               = 0x0A,
+       QUADD_ARMV7_HW_EVENT_CID_WRITE                  = 0x0B,
+       QUADD_ARMV7_HW_EVENT_PC_WRITE                   = 0x0C,
+       QUADD_ARMV7_HW_EVENT_PC_IMM_BRANCH              = 0x0D,
+       QUADD_ARMV7_HW_EVENT_PC_PROC_RETURN             = 0x0E,
+       QUADD_ARMV7_HW_EVENT_UNALIGNED_ACCESS           = 0x0F,
+
+       QUADD_ARMV7_HW_EVENT_PC_BRANCH_MIS_PRED         = 0x10,
+       QUADD_ARMV7_HW_EVENT_CLOCK_CYCLES               = 0x11,
+       QUADD_ARMV7_HW_EVENT_PC_BRANCH_PRED             = 0x12,
+       QUADD_ARMV7_HW_EVENT_MEM_ACCESS                 = 0x13,
+       QUADD_ARMV7_HW_EVENT_L1_ICACHE_ACCESS           = 0x14,
+       QUADD_ARMV7_HW_EVENT_L1_DCACHE_WB               = 0x15,
+       QUADD_ARMV7_HW_EVENT_L2_DCACHE_ACCESS           = 0x16,
+       QUADD_ARMV7_HW_EVENT_L2_DCACHE_REFILL           = 0x17,
+       QUADD_ARMV7_HW_EVENT_L2_DCACHE_WB               = 0x18,
+       QUADD_ARMV7_HW_EVENT_BUS_ACCESS                 = 0x19,
+       QUADD_ARMV7_HW_EVENT_MEMORY_ERROR               = 0x1A,
+       QUADD_ARMV7_HW_EVENT_INSTR_SPEC                 = 0x1B,
+       QUADD_ARMV7_HW_EVENT_TTBR_WRITE                 = 0x1C,
+       QUADD_ARMV7_HW_EVENT_BUS_CYCLES                 = 0x1D,
+};
+
+enum quadd_armv7_a8_specific_events {
+       QUADD_ARMV7_A8_HW_EVENT_WRITE_BUFFER_FULL                       = 0x40,
+       QUADD_ARMV7_A8_HW_EVENT_L2_STORE_MERGED                         = 0x41,
+       QUADD_ARMV7_A8_HW_EVENT_L2_STORE_BUFF                           = 0x42,
+       QUADD_ARMV7_A8_HW_EVENT_L2_ACCESS                               = 0x43,
+       QUADD_ARMV7_A8_HW_EVENT_L2_CACH_MISS                            = 0x44,
+       QUADD_ARMV7_A8_HW_EVENT_AXI_READ_CYCLES                         = 0x45,
+       QUADD_ARMV7_A8_HW_EVENT_AXI_WRITE_CYCLES                        = 0x46,
+       QUADD_ARMV7_A8_HW_EVENT_MEMORY_REPLAY                           = 0x47,
+       QUADD_ARMV7_A8_HW_EVENT_UNALIGNED_ACCESS_REPLAY                 = 0x48,
+       QUADD_ARMV7_A8_HW_EVENT_L1_DATA_MISS                            = 0x49,
+       QUADD_ARMV7_A8_HW_EVENT_L1_INST_MISS                            = 0x4A,
+       QUADD_ARMV7_A8_HW_EVENT_L1_DATA_COLORING                        = 0x4B,
+       QUADD_ARMV7_A8_HW_EVENT_L1_NEON_DATA                            = 0x4C,
+       QUADD_ARMV7_A8_HW_EVENT_L1_NEON_CACH_DATA                       = 0x4D,
+       QUADD_ARMV7_A8_HW_EVENT_L2_NEON                                 = 0x4E,
+       QUADD_ARMV7_A8_HW_EVENT_L2_NEON_HIT                             = 0x4F,
+       QUADD_ARMV7_A8_HW_EVENT_L1_INST                                 = 0x50,
+       QUADD_ARMV7_A8_HW_EVENT_PC_RETURN_MIS_PRED                      = 0x51,
+       QUADD_ARMV7_A8_HW_EVENT_PC_BRANCH_FAILED                        = 0x52,
+       QUADD_ARMV7_A8_HW_EVENT_PC_BRANCH_TAKEN                         = 0x53,
+       QUADD_ARMV7_A8_HW_EVENT_PC_BRANCH_EXECUTED                      = 0x54,
+       QUADD_ARMV7_A8_HW_EVENT_OP_EXECUTED                             = 0x55,
+       QUADD_ARMV7_A8_HW_EVENT_CYCLES_INST_STALL                       = 0x56,
+       QUADD_ARMV7_A8_HW_EVENT_CYCLES_INST                             = 0x57,
+       QUADD_ARMV7_A8_HW_EVENT_CYCLES_NEON_DATA_STALL                  = 0x58,
+       QUADD_ARMV7_A8_HW_EVENT_CYCLES_NEON_INST_STALL                  = 0x59,
+       QUADD_ARMV7_A8_HW_EVENT_NEON_CYCLES                             = 0x5A,
+
+       QUADD_ARMV7_A8_HW_EVENT_PMU0_EVENTS                             = 0x70,
+       QUADD_ARMV7_A8_HW_EVENT_PMU1_EVENTS                             = 0x71,
+       QUADD_ARMV7_A8_HW_EVENT_PMU_EVENTS                              = 0x72,
+};
+
+enum quadd_armv7_a9_specific_events {
+       QUADD_ARMV7_A9_HW_EVENT_JAVA_HW_BYTECODE_EXEC                   = 0x40,
+       QUADD_ARMV7_A9_HW_EVENT_JAVA_SW_BYTECODE_EXEC                   = 0x41,
+       QUADD_ARMV7_A9_HW_EVENT_JAZELLE_BRANCH_EXEC                     = 0x42,
+
+       QUADD_ARMV7_A9_HW_EVENT_COHERENT_LINE_MISS                      = 0x50,
+       QUADD_ARMV7_A9_HW_EVENT_COHERENT_LINE_HIT                       = 0x51,
+
+       QUADD_ARMV7_A9_HW_EVENT_ICACHE_DEP_STALL_CYCLES                 = 0x60,
+       QUADD_ARMV7_A9_HW_EVENT_DCACHE_DEP_STALL_CYCLES                 = 0x61,
+       QUADD_ARMV7_A9_HW_EVENT_TLB_MISS_DEP_STALL_CYCLES               = 0x62,
+       QUADD_ARMV7_A9_HW_EVENT_STREX_EXECUTED_PASSED                   = 0x63,
+       QUADD_ARMV7_A9_HW_EVENT_STREX_EXECUTED_FAILED                   = 0x64,
+       QUADD_ARMV7_A9_HW_EVENT_DATA_EVICTION                           = 0x65,
+       QUADD_ARMV7_A9_HW_EVENT_ISSUE_STAGE_NO_INST                     = 0x66,
+       QUADD_ARMV7_A9_HW_EVENT_ISSUE_STAGE_EMPTY                       = 0x67,
+       QUADD_ARMV7_A9_HW_EVENT_INST_OUT_OF_RENAME_STAGE                = 0x68,
+
+       QUADD_ARMV7_A9_HW_EVENT_PREDICTABLE_FUNCT_RETURNS               = 0x6E,
+
+       QUADD_ARMV7_A9_HW_EVENT_MAIN_UNIT_EXECUTED_INST                 = 0x70,
+       QUADD_ARMV7_A9_HW_EVENT_SECOND_UNIT_EXECUTED_INST               = 0x71,
+       QUADD_ARMV7_A9_HW_EVENT_LD_ST_UNIT_EXECUTED_INST                = 0x72,
+       QUADD_ARMV7_A9_HW_EVENT_FP_EXECUTED_INST                        = 0x73,
+       QUADD_ARMV7_A9_HW_EVENT_NEON_EXECUTED_INST                      = 0x74,
+
+       QUADD_ARMV7_A9_HW_EVENT_PLD_FULL_DEP_STALL_CYCLES               = 0x80,
+       QUADD_ARMV7_A9_HW_EVENT_DATA_WR_DEP_STALL_CYCLES                = 0x81,
+       QUADD_ARMV7_A9_HW_EVENT_ITLB_MISS_DEP_STALL_CYCLES              = 0x82,
+       QUADD_ARMV7_A9_HW_EVENT_DTLB_MISS_DEP_STALL_CYCLES              = 0x83,
+       QUADD_ARMV7_A9_HW_EVENT_MICRO_ITLB_MISS_DEP_STALL_CYCLES        = 0x84,
+       QUADD_ARMV7_A9_HW_EVENT_MICRO_DTLB_MISS_DEP_STALL_CYCLES        = 0x85,
+       QUADD_ARMV7_A9_HW_EVENT_DMB_DEP_STALL_CYCLES                    = 0x86,
+
+       QUADD_ARMV7_A9_HW_EVENT_INTGR_CLK_ENABLED_CYCLES                = 0x8A,
+       QUADD_ARMV7_A9_HW_EVENT_DATA_ENGINE_CLK_EN_CYCLES               = 0x8B,
+
+       QUADD_ARMV7_A9_HW_EVENT_ISB_INST                                = 0x90,
+       QUADD_ARMV7_A9_HW_EVENT_DSB_INST                                = 0x91,
+       QUADD_ARMV7_A9_HW_EVENT_DMB_INST                                = 0x92,
+       QUADD_ARMV7_A9_HW_EVENT_EXT_INTERRUPTS                          = 0x93,
+
+       QUADD_ARMV7_A9_HW_EVENT_PLE_CACHE_LINE_RQST_COMPLETED           = 0xA0,
+       QUADD_ARMV7_A9_HW_EVENT_PLE_CACHE_LINE_RQST_SKIPPED             = 0xA1,
+       QUADD_ARMV7_A9_HW_EVENT_PLE_FIFO_FLUSH                          = 0xA2,
+       QUADD_ARMV7_A9_HW_EVENT_PLE_RQST_COMPLETED                      = 0xA3,
+       QUADD_ARMV7_A9_HW_EVENT_PLE_FIFO_OVERFLOW                       = 0xA4,
+       QUADD_ARMV7_A9_HW_EVENT_PLE_RQST_PROG                           = 0xA5
+};
+
+enum quadd_armv7_a5_specific_events {
+       QUADD_ARMV7_A5_HW_EVENT_IRQ_TAKEN                               = 0x86,
+       QUADD_ARMV7_A5_HW_EVENT_FIQ_TAKEN                               = 0x87,
+
+       QUADD_ARMV7_A5_HW_EVENT_EXT_MEM_RQST                            = 0xc0,
+       QUADD_ARMV7_A5_HW_EVENT_NC_EXT_MEM_RQST                         = 0xc1,
+       QUADD_ARMV7_A5_HW_EVENT_PREFETCH_LINEFILL                       = 0xc2,
+       QUADD_ARMV7_A5_HW_EVENT_PREFETCH_LINEFILL_DROP                  = 0xc3,
+       QUADD_ARMV7_A5_HW_EVENT_ENTER_READ_ALLOC                        = 0xc4,
+       QUADD_ARMV7_A5_HW_EVENT_READ_ALLOC                              = 0xc5,
+
+       QUADD_ARMV7_A5_HW_EVENT_STALL_SB_FULL                           = 0xc9,
+};
+
+enum quadd_armv7_a15_specific_events {
+       QUADD_ARMV7_A15_HW_EVENT_L1_DCACHE_READ_ACCESS  = 0x40,
+       QUADD_ARMV7_A15_HW_EVENT_L1_DCACHE_WRITE_ACCESS = 0x41,
+       QUADD_ARMV7_A15_HW_EVENT_L1_DCACHE_READ_REFILL  = 0x42,
+       QUADD_ARMV7_A15_HW_EVENT_L1_DCACHE_WRITE_REFILL = 0x43,
+
+       QUADD_ARMV7_A15_HW_EVENT_L1_DTLB_READ_REFILL    = 0x4C,
+       QUADD_ARMV7_A15_HW_EVENT_L1_DTLB_WRITE_REFILL   = 0x4D,
+
+       QUADD_ARMV7_A15_HW_EVENT_L2_DCACHE_READ_ACCESS  = 0x50,
+       QUADD_ARMV7_A15_HW_EVENT_L2_DCACHE_WRITE_ACCESS = 0x51,
+       QUADD_ARMV7_A15_HW_EVENT_L2_DCACHE_READ_REFILL  = 0x52,
+       QUADD_ARMV7_A15_HW_EVENT_L2_DCACHE_WRITE_REFILL = 0x53,
+
+       QUADD_ARMV7_A15_HW_EVENT_SPEC_PC_WRITE          = 0x76,
+};
+
+#define QUADD_ARMV7_UNSUPPORTED_EVENT  0xff00
+#define QUADD_ARMV7_CPU_CYCLE_EVENT    0xffff
+
+void quadd_pmu_test(void);
+
+#endif /* __ARMV7_PMU_H */
diff --git a/drivers/misc/tegra-profiler/auth.c b/drivers/misc/tegra-profiler/auth.c
new file mode 100644 (file)
index 0000000..654f401
--- /dev/null
@@ -0,0 +1,336 @@
+/*
+ * drivers/misc/tegra-profiler/auth.c
+ *
+ * Copyright (c) 2013, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/module.h>
+#include <linux/miscdevice.h>
+#include <linux/fs.h>
+#include <linux/wait.h>
+#include <linux/sched.h>
+#include <linux/uaccess.h>
+
+#include "auth.h"
+#include "quadd.h"
+#include "debug.h"
+
+#define QUADD_SECURITY_MAGIC_REQUEST   0x11112222
+#define QUADD_SECURITY_MAGIC_RESPONSE  0x33334444
+
+#define QUADD_TIMEOUT  1000    /* msec */
+
+enum {
+       QUADD_SECURITY_RESPONSE_ERROR                   = 0,
+       QUADD_SECURITY_RESPONSE_DEBUG_FLAG_ON           = 1,
+       QUADD_SECURITY_RESPONSE_DEBUG_FLAG_OFF          = 2,
+       QUADD_SECURITY_RESPONSE_PACKAGE_NOT_FOUND       = 3,
+};
+
+enum {
+       QUADD_SECURITY_REQUEST_CMD_TEST_DEBUG_FLAG      = 1,
+       QUADD_SECURITY_RESPONSE_CMD_TEST_DEBUG_FLAG     = 2,
+};
+
+struct quadd_auth_data {
+       char package_name[QUADD_MAX_PACKAGE_NAME];
+
+       uid_t debug_app_uid;
+       int response_value;
+};
+
+static struct quadd_auth_context {
+       struct miscdevice misc_dev;
+
+       atomic_t opened;
+
+       wait_queue_head_t request_wait;
+       wait_queue_head_t response_wait;
+
+       int request_ready;
+       int response_ready;
+       struct quadd_auth_data data;
+       struct mutex lock;
+
+       unsigned int msg_id;
+
+       struct quadd_ctx *quadd_ctx;
+} auth_ctx;
+
+static inline void response_ready(void)
+{
+       auth_ctx.response_ready = 1;
+       wake_up_interruptible(&auth_ctx.response_wait);
+}
+
+static inline void request_ready(void)
+{
+       auth_ctx.request_ready = 1;
+       wake_up_interruptible(&auth_ctx.request_wait);
+}
+
+static int auth_open(struct inode *inode, struct file *file)
+{
+       struct quadd_auth_data *data = &auth_ctx.data;
+
+       if (atomic_cmpxchg(&auth_ctx.opened, 0, 1)) {
+               pr_err("Error: auth file is already opened\n");
+               return -EBUSY;
+       }
+       pr_info("auth is opened\n");
+
+       auth_ctx.request_ready = 0;
+       auth_ctx.response_ready = 0;
+
+       mutex_lock(&auth_ctx.lock);
+       data->package_name[0] = '\0';
+       data->debug_app_uid = 0;
+       data->response_value = 0;
+       mutex_unlock(&auth_ctx.lock);
+
+       return 0;
+}
+
+static int auth_release(struct inode *inode, struct file *file)
+{
+       pr_info("auth is released\n");
+       atomic_set(&auth_ctx.opened, 0);
+       return 0;
+}
+
+static ssize_t
+auth_read(struct file *filp,
+           char __user *user_buf,
+           size_t length,
+           loff_t *offset)
+{
+       char buf[QUADD_MAX_PACKAGE_NAME + 4 * sizeof(u32)];
+       int msg_length, err;
+       struct quadd_auth_data *data = &auth_ctx.data;
+
+       wait_event_interruptible(auth_ctx.request_wait, auth_ctx.request_ready);
+
+       mutex_lock(&auth_ctx.lock);
+
+       ((u32 *)buf)[0] = QUADD_SECURITY_MAGIC_REQUEST;
+       ((u32 *)buf)[1] = ++auth_ctx.msg_id;
+       ((u32 *)buf)[2] = QUADD_SECURITY_REQUEST_CMD_TEST_DEBUG_FLAG;
+       ((u32 *)buf)[3] = strlen(data->package_name);
+
+       strcpy(buf + 4 * sizeof(u32), data->package_name);
+       msg_length = strlen(data->package_name) + 4 * sizeof(u32);
+
+       mutex_unlock(&auth_ctx.lock);
+
+       err = copy_to_user(user_buf, buf, msg_length);
+       if (err != 0) {
+               pr_err("Error: copy to user: %d\n", err);
+               return err;
+       }
+
+       pr_info("auth read, msg_length: %d\n", msg_length);
+       return msg_length;
+}
+
+static ssize_t
+auth_write(struct file *file,
+         const char __user *user_buf,
+         size_t count,
+         loff_t *ppos)
+{
+       int err;
+       char buf[5 * sizeof(u32)];
+       u32 magic, response_cmd, response_value, length, uid, msg_id;
+       struct quadd_auth_data *data = &auth_ctx.data;
+
+       pr_info("auth read, count: %d\n", count);
+
+       mutex_lock(&auth_ctx.lock);
+       data->response_value = QUADD_SECURITY_RESPONSE_ERROR;
+       data->debug_app_uid = 0;
+       mutex_unlock(&auth_ctx.lock);
+
+       if (count < 5 * sizeof(u32)) {
+               pr_err("Error count: %u\n", count);
+               response_ready();
+               return -E2BIG;
+       }
+
+       err = copy_from_user(buf, user_buf, 5 * sizeof(u32));
+       if (err) {
+               pr_err("Error: copy from user: %d\n", err);
+               response_ready();
+               return err;
+       }
+
+       magic = ((u32 *)buf)[0];
+       if (magic != QUADD_SECURITY_MAGIC_RESPONSE) {
+               pr_err("Error magic: %#x\n", magic);
+               response_ready();
+               return -EINVAL;
+       }
+
+       msg_id = ((u32 *)buf)[1];
+       if (msg_id != auth_ctx.msg_id) {
+               pr_err("Error message id: %u\n", msg_id);
+               response_ready();
+               return -EINVAL;
+       }
+
+       response_cmd = ((u32 *)buf)[2];
+       response_value = ((u32 *)buf)[3];
+       length = ((u32 *)buf)[4];
+
+       switch (response_cmd) {
+       case QUADD_SECURITY_RESPONSE_CMD_TEST_DEBUG_FLAG:
+               if (length != 4) {
+                       pr_err("Error: too long data: %u\n", length);
+                       response_ready();
+                       return -E2BIG;
+               }
+
+               err = get_user(uid, (u32 __user *)user_buf + 5);
+               if (err) {
+                       pr_err("Error: copy from user: %d\n", err);
+                       response_ready();
+                       return err;
+               }
+
+               mutex_lock(&auth_ctx.lock);
+               data->response_value = response_value;
+               data->debug_app_uid = uid;
+               mutex_unlock(&auth_ctx.lock);
+
+               pr_info("uid: %u, response_value: %u\n",
+                       uid, response_value);
+               break;
+
+       default:
+               pr_err("Error: invalid response command: %u\n",
+                      response_cmd);
+               response_ready();
+               return -EINVAL;
+       }
+       response_ready();
+
+       return count;
+}
+
+static const struct file_operations auth_fops = {
+       .read           = auth_read,
+       .write          = auth_write,
+       .open           = auth_open,
+       .release        = auth_release,
+};
+
+int quadd_auth_check_debug_flag(const char *package_name)
+{
+       int uid, response_value;
+       struct quadd_auth_data *data = &auth_ctx.data;
+       int pkg_name_length;
+
+       if (!package_name)
+               return -EINVAL;
+
+       pkg_name_length = strlen(package_name);
+       if (pkg_name_length == 0 ||
+           pkg_name_length > QUADD_MAX_PACKAGE_NAME)
+               return -EINVAL;
+
+       if (atomic_read(&auth_ctx.opened) == 0)
+               return -EIO;
+
+       mutex_lock(&auth_ctx.lock);
+       data->debug_app_uid = 0;
+       data->response_value = 0;
+
+       strncpy(data->package_name, package_name, QUADD_MAX_PACKAGE_NAME);
+       mutex_unlock(&auth_ctx.lock);
+
+       request_ready();
+
+       wait_event_interruptible_timeout(auth_ctx.response_wait,
+                                        auth_ctx.response_ready,
+                                        msecs_to_jiffies(QUADD_TIMEOUT));
+       if (!auth_ctx.response_ready) {
+               pr_err("Error: Tegra profiler service did not answer\n");
+               return -ETIMEDOUT;
+       }
+
+       mutex_lock(&auth_ctx.lock);
+       uid = data->debug_app_uid;
+       response_value = data->response_value;
+       mutex_unlock(&auth_ctx.lock);
+
+       switch (response_value) {
+       case QUADD_SECURITY_RESPONSE_DEBUG_FLAG_ON:
+               pr_info("package %s is debuggable, uid: %d\n",
+                       package_name, uid);
+               return uid;
+
+       case QUADD_SECURITY_RESPONSE_DEBUG_FLAG_OFF:
+               pr_info("package %s is not debuggable\n",
+                       package_name);
+               return 0;
+
+       case QUADD_SECURITY_RESPONSE_PACKAGE_NOT_FOUND:
+               pr_err("Error: package %s not found\n", package_name);
+               return -ESRCH;
+
+       case QUADD_SECURITY_RESPONSE_ERROR:
+       default:
+               pr_err("Error: invalid response\n");
+               return -EBADMSG;
+       }
+}
+
+int quadd_auth_init(struct quadd_ctx *quadd_ctx)
+{
+       int err;
+       struct miscdevice *misc_dev = &auth_ctx.misc_dev;
+
+       pr_info("auth: init\n");
+
+       misc_dev->minor = MISC_DYNAMIC_MINOR;
+       misc_dev->name = QUADD_AUTH_DEVICE_NAME;
+       misc_dev->fops = &auth_fops;
+
+       err = misc_register(misc_dev);
+       if (err < 0) {
+               pr_err("Error: misc_register %d\n", err);
+               return err;
+       }
+
+       init_waitqueue_head(&auth_ctx.request_wait);
+       init_waitqueue_head(&auth_ctx.response_wait);
+
+       auth_ctx.request_ready = 0;
+       auth_ctx.response_ready = 0;
+
+       atomic_set(&auth_ctx.opened, 0);
+       mutex_init(&auth_ctx.lock);
+       auth_ctx.msg_id = 0;
+
+       auth_ctx.quadd_ctx = quadd_ctx;
+       return 0;
+}
+
+void quadd_auth_deinit(void)
+{
+       struct miscdevice *misc_dev = &auth_ctx.misc_dev;
+
+       pr_info("auth: deinit\n");
+       misc_deregister(misc_dev);
+}
diff --git a/drivers/misc/tegra-profiler/auth.h b/drivers/misc/tegra-profiler/auth.h
new file mode 100644 (file)
index 0000000..aa810f2
--- /dev/null
@@ -0,0 +1,27 @@
+/*
+ * drivers/misc/tegra-profiler/auth.h
+ *
+ * Copyright (c) 2013, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#ifndef __QUADD_AUTH_H__
+#define __QUADD_AUTH_H__
+
+struct quadd_ctx;
+
+int quadd_auth_check_debug_flag(const char *package_name);
+
+int quadd_auth_init(struct quadd_ctx *quadd_ctx);
+void quadd_auth_deinit(void);
+
+#endif /* __QUADD_AUTH_H__ */
diff --git a/drivers/misc/tegra-profiler/backtrace.c b/drivers/misc/tegra-profiler/backtrace.c
new file mode 100644 (file)
index 0000000..d3fa8a0
--- /dev/null
@@ -0,0 +1,168 @@
+/*
+ * drivers/misc/tegra-profiler/backtrace.c
+ *
+ * Copyright (c) 2013, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/module.h>
+#include <asm-generic/uaccess.h>
+
+#include <linux/tegra_profiler.h>
+#include <linux/mm.h>
+
+#include "backtrace.h"
+
+#define QUADD_USER_SPACE_MIN_ADDR      0x8000
+
+static inline void
+quadd_callchain_store(struct quadd_callchain *callchain_data, u32 ip)
+{
+       if (callchain_data->nr < QUADD_MAX_STACK_DEPTH) {
+               /* pr_debug("[%d] Add entry: %#llx\n",
+                           callchain_data->nr, ip); */
+               callchain_data->callchain[callchain_data->nr++] = ip;
+       }
+}
+
+static int
+check_vma_address(unsigned long addr, struct vm_area_struct *vma)
+{
+       unsigned long start, end;
+
+       if (vma) {
+               start = vma->vm_start;
+               end = vma->vm_end;
+               if (addr >= start && addr + sizeof(unsigned long) <= end)
+                       return 0;
+       }
+       return -EINVAL;
+}
+
+static unsigned long __user *
+user_backtrace(unsigned long __user *tail,
+              struct quadd_callchain *callchain_data,
+              struct vm_area_struct *stack_vma)
+{
+       unsigned long value, value_lr = 0, value_fp = 0;
+       unsigned long __user *fp_prev = NULL;
+
+       if (check_vma_address((unsigned long)tail, stack_vma))
+               return NULL;
+
+       if (__copy_from_user_inatomic(&value, tail, sizeof(unsigned long)))
+               return NULL;
+
+       if (!check_vma_address(value, stack_vma)) {
+               /* clang's frame */
+               value_fp = value;
+
+               if (check_vma_address((unsigned long)(tail + 1), stack_vma))
+                       return NULL;
+
+               if (__copy_from_user_inatomic(&value_lr, tail + 1,
+                                             sizeof(unsigned long)))
+                       return NULL;
+       } else {
+               /* gcc's frame */
+               if (__copy_from_user_inatomic(&value_fp, tail - 1,
+                                             sizeof(unsigned long)))
+                       return NULL;
+
+               if (check_vma_address(value_fp, stack_vma))
+                       return NULL;
+
+               value_lr = value;
+       }
+
+       fp_prev = (unsigned long __user *)value_fp;
+
+       if (value_lr < QUADD_USER_SPACE_MIN_ADDR)
+               return NULL;
+
+       quadd_callchain_store(callchain_data, value_lr);
+
+       if (fp_prev <= tail)
+               return NULL;
+
+       return fp_prev;
+}
+
+unsigned int
+quadd_get_user_callchain(struct pt_regs *regs,
+                        struct quadd_callchain *callchain_data)
+{
+       unsigned long fp, sp, pc, reg;
+       struct vm_area_struct *vma, *vma_pc;
+       unsigned long __user *tail = NULL;
+       struct mm_struct *mm = current->mm;
+
+       callchain_data->nr = 0;
+
+       if (!regs || !user_mode(regs) || !mm)
+               return 0;
+
+       if (thumb_mode(regs))
+               return 0;
+
+       fp = regs->ARM_fp;
+       sp = regs->ARM_sp;
+       pc = regs->ARM_pc;
+
+       if (fp == 0 || fp < sp || fp & 0x3)
+               return 0;
+
+       vma = find_vma(mm, sp);
+       if (check_vma_address(fp, vma))
+               return 0;
+
+       if (__copy_from_user_inatomic(&reg, (unsigned long __user *)fp,
+                                     sizeof(unsigned long)))
+               return 0;
+
+       if (reg > fp &&
+           !check_vma_address(reg, vma)) {
+               unsigned long value;
+               int read_lr = 0;
+
+               if (!check_vma_address(fp + sizeof(unsigned long), vma)) {
+                       if (__copy_from_user_inatomic(
+                                       &value,
+                                       (unsigned long __user *)fp + 1,
+                                       sizeof(unsigned long)))
+                               return 0;
+
+                       vma_pc = find_vma(mm, pc);
+                       read_lr = 1;
+               }
+
+               if (!read_lr || check_vma_address(value, vma_pc)) {
+                       /* gcc: fp --> short frame tail (fp) */
+
+                       if (regs->ARM_lr < QUADD_USER_SPACE_MIN_ADDR)
+                               return 0;
+
+                       quadd_callchain_store(callchain_data, regs->ARM_lr);
+                       tail = (unsigned long __user *)reg;
+               }
+       }
+
+       if (!tail)
+               tail = (unsigned long __user *)fp;
+
+       while (tail && !((unsigned long)tail & 0x3))
+               tail = user_backtrace(tail, callchain_data, vma);
+
+       return callchain_data->nr;
+}
diff --git a/drivers/misc/tegra-profiler/backtrace.h b/drivers/misc/tegra-profiler/backtrace.h
new file mode 100644 (file)
index 0000000..82b55db
--- /dev/null
@@ -0,0 +1,34 @@
+/*
+ * drivers/misc/tegra-profiler/backtrace.h
+ *
+ * Copyright (c) 2013, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#ifndef __QUADD_BACKTRACE_H
+#define __QUADD_BACKTRACE_H
+
+#include <linux/types.h>
+
+#define QUADD_MAX_STACK_DEPTH          64
+
+struct quadd_callchain {
+       int nr;
+       u32 callchain[QUADD_MAX_STACK_DEPTH];
+};
+
+unsigned int
+quadd_get_user_callchain(struct pt_regs *regs,
+                        struct quadd_callchain *callchain_data);
+
+
+#endif  /* __QUADD_BACKTRACE_H */
diff --git a/drivers/misc/tegra-profiler/comm.c b/drivers/misc/tegra-profiler/comm.c
new file mode 100644 (file)
index 0000000..36bfa67
--- /dev/null
@@ -0,0 +1,650 @@
+/*
+ * drivers/misc/tegra-profiler/comm.c
+ *
+ * Copyright (c) 2013, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <asm/uaccess.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/miscdevice.h>
+#include <linux/sched.h>
+
+#include <linux/tegra_profiler.h>
+
+#include "comm.h"
+#include "version.h"
+
+#define QUADD_SIZE_RB_BUFFER   (0x100000 * 8)  /* 8 MB */
+
+struct quadd_comm_ctx comm_ctx;
+
+static inline void *rb_alloc(unsigned long size)
+{
+       return vmalloc(size);
+}
+
+static inline void rb_free(void *addr)
+{
+       vfree(addr);
+}
+
+static void rb_reset(struct quadd_ring_buffer *rb)
+{
+       rb->pos_read = 0;
+       rb->pos_write = 0;
+       rb->fill_count = 0;
+}
+
+static int rb_init(struct quadd_ring_buffer *rb, size_t size)
+{
+       spin_lock_init(&rb->lock);
+
+       rb->size = size;
+       rb->buf = NULL;
+
+       rb->buf = (char *) rb_alloc(rb->size);
+       if (!rb->buf) {
+               pr_err("Ring buffer alloc error\n");
+               return 1;
+       }
+       pr_debug("data buffer size: %u\n", (unsigned int)rb->size);
+
+       rb_reset(rb);
+
+       return 0;
+}
+
+static void rb_deinit(struct quadd_ring_buffer *rb)
+{
+       unsigned long flags;
+
+       spin_lock_irqsave(&rb->lock, flags);
+       if (rb->buf) {
+               rb_reset(rb);
+
+               rb_free(rb->buf);
+               rb->buf = NULL;
+       }
+       spin_unlock_irqrestore(&rb->lock, flags);
+}
+
+static __attribute__((unused)) int rb_is_full(struct quadd_ring_buffer *rb)
+{
+       return rb->fill_count == rb->size;
+}
+
+static int rb_is_empty(struct quadd_ring_buffer *rb)
+{
+       return rb->fill_count == 0;
+}
+
+static size_t
+rb_get_free_space(struct quadd_ring_buffer *rb)
+{
+       return rb->size - rb->fill_count;
+}
+
+static size_t
+rb_write(struct quadd_ring_buffer *rb, char *data, size_t length)
+{
+       size_t new_pos_write, chunk1;
+
+       if (length > rb_get_free_space(rb))
+               return 0;
+
+       new_pos_write = (rb->pos_write + length) % rb->size;
+
+       if (new_pos_write < rb->pos_write) {
+               chunk1 = rb->size - rb->pos_write;
+               memcpy(rb->buf + rb->pos_write, data, chunk1);
+               if (new_pos_write > 0)
+                       memcpy(rb->buf, data + chunk1, new_pos_write);
+       } else {
+               memcpy(rb->buf + rb->pos_write, data, length);
+       }
+
+       rb->pos_write = new_pos_write;
+       rb->fill_count += length;
+
+       return length;
+}
+
+static size_t rb_read_undo(struct quadd_ring_buffer *rb, size_t length)
+{
+       if (rb_get_free_space(rb) < length)
+               return 0;
+
+       if (rb->pos_read > length)
+               rb->pos_read -= length;
+       else
+               rb->pos_read += rb->size - length;
+
+       rb->fill_count += sizeof(struct quadd_record_data);
+       return length;
+}
+
+static size_t rb_read(struct quadd_ring_buffer *rb, char *data, size_t length)
+{
+       unsigned int new_pos_read, chunk1;
+
+       if (length > rb->fill_count)
+               return 0;
+
+       new_pos_read = (rb->pos_read + length) % rb->size;
+
+       if (new_pos_read < rb->pos_read) {
+               chunk1 = rb->size - rb->pos_read;
+               memcpy(data, rb->buf + rb->pos_read, chunk1);
+               if (new_pos_read > 0)
+                       memcpy(data + chunk1, rb->buf, new_pos_read);
+       } else {
+               memcpy(data, rb->buf + rb->pos_read, length);
+       }
+
+       rb->pos_read = new_pos_read;
+       rb->fill_count -= length;
+
+       return length;
+}
+
+static size_t
+rb_read_user(struct quadd_ring_buffer *rb, char __user *data, size_t length)
+{
+       size_t new_pos_read, chunk1;
+
+       if (length > rb->fill_count)
+               return 0;
+
+       new_pos_read = (rb->pos_read + length) % rb->size;
+
+       if (new_pos_read < rb->pos_read) {
+               chunk1 = rb->size - rb->pos_read;
+               if (copy_to_user(data, rb->buf + rb->pos_read, chunk1)) {
+                       pr_err_once("Error: copy_to_user\n");
+                       return 0;
+               }
+
+               if (new_pos_read > 0) {
+                       if (copy_to_user(data + chunk1, rb->buf,
+                                        new_pos_read)) {
+                               pr_err_once("Error: copy_to_user\n");
+                               return 0;
+                       }
+               }
+       } else {
+               if (copy_to_user(data, rb->buf + rb->pos_read, length)) {
+                       pr_err_once("Error: copy_to_user\n");
+                       return 0;
+               }
+       }
+
+       rb->pos_read = new_pos_read;
+       rb->fill_count -= length;
+
+       return length;
+}
+
+static void
+write_sample(struct quadd_record_data *sample, void *extra_data,
+            size_t extra_length)
+{
+       unsigned long flags;
+       struct quadd_ring_buffer *rb = &comm_ctx.rb;
+       int length_sample = sizeof(struct quadd_record_data) + extra_length;
+
+       spin_lock_irqsave(&rb->lock, flags);
+
+       if (length_sample > rb_get_free_space(rb)) {
+               pr_err_once("Error: Buffer overflowed, skip sample\n");
+               spin_unlock_irqrestore(&rb->lock, flags);
+               return;
+       }
+
+       if (!rb_write(rb, (char *)sample, sizeof(struct quadd_record_data))) {
+               spin_unlock_irqrestore(&rb->lock, flags);
+               return;
+       }
+
+       if (extra_data && extra_length > 0) {
+               if (!rb_write(rb, extra_data, extra_length)) {
+                       pr_err_once("Buffer overflowed, skip sample\n");
+                       spin_unlock_irqrestore(&rb->lock, flags);
+                       return;
+               }
+       }
+       spin_unlock_irqrestore(&rb->lock, flags);
+}
+
+static int read_sample(char __user *buffer, size_t max_length)
+{
+       unsigned long flags;
+       struct quadd_ring_buffer *rb = &comm_ctx.rb;
+       struct quadd_record_data record;
+       size_t length_extra = 0;
+
+       spin_lock_irqsave(&rb->lock, flags);
+
+       if (rb_is_empty(rb)) {
+               spin_unlock_irqrestore(&rb->lock, flags);
+               return 0;
+       }
+
+       if (rb->fill_count < sizeof(struct quadd_record_data)) {
+               pr_err_once("Error: data\n");
+               spin_unlock_irqrestore(&rb->lock, flags);
+               return 0;
+       }
+
+       if (!rb_read(rb, (char *)&record, sizeof(struct quadd_record_data))) {
+               pr_err_once("Error: read sample\n");
+               spin_unlock_irqrestore(&rb->lock, flags);
+               return 0;
+       }
+
+       if (record.magic != QUADD_RECORD_MAGIC) {
+               pr_err_once("Bad magic: %#x\n", record.magic);
+               spin_unlock_irqrestore(&rb->lock, flags);
+               return 0;
+       }
+
+       switch (record.record_type) {
+       case QUADD_RECORD_TYPE_SAMPLE:
+               length_extra = record.sample.callchain_nr *
+                                       sizeof(record.sample.ip);
+               break;
+
+       case QUADD_RECORD_TYPE_MMAP:
+               if (record.mmap.filename_length > 0) {
+                       length_extra = record.mmap.filename_length;
+               } else {
+                       length_extra = 0;
+                       pr_err_once("Error: filename\n");
+               }
+               break;
+
+       case QUADD_RECORD_TYPE_DEBUG:
+       case QUADD_RECORD_TYPE_HEADER:
+       case QUADD_RECORD_TYPE_MA:
+               length_extra = 0;
+               break;
+
+       case QUADD_RECORD_TYPE_POWER_RATE:
+               length_extra = record.power_rate.nr_cpus * sizeof(u32);
+               break;
+
+       case QUADD_RECORD_TYPE_ADDITIONAL_SAMPLE:
+               length_extra = record.additional_sample.extra_length;
+               break;
+
+       default:
+               pr_err_once("Error: Unknown sample: %u\n", record.record_type);
+               spin_unlock_irqrestore(&rb->lock, flags);
+               return 0;
+       }
+
+       if (sizeof(struct quadd_record_data) + length_extra > max_length) {
+               if (!rb_read_undo(rb, sizeof(struct quadd_record_data)))
+                       pr_err_once("Error: rb_read_undo\n");
+               spin_unlock_irqrestore(&rb->lock, flags);
+               return 0;
+       }
+
+       if (length_extra > rb_get_free_space(rb)) {
+               pr_err_once("Error: Incompleted sample\n");
+               spin_unlock_irqrestore(&rb->lock, flags);
+               return 0;
+       }
+
+       if (copy_to_user(buffer, &record, sizeof(struct quadd_record_data))) {
+               pr_err_once("Error: copy_to_user\n");
+               spin_unlock_irqrestore(&rb->lock, flags);
+               return 0;
+       }
+
+       if (length_extra > 0) {
+               if (!rb_read_user(rb, buffer + sizeof(struct quadd_record_data),
+                                 length_extra)) {
+                       pr_err_once("Error: copy_to_user\n");
+                       spin_unlock_irqrestore(&rb->lock, flags);
+                       return 0;
+               }
+       }
+
+       spin_unlock_irqrestore(&rb->lock, flags);
+       return sizeof(struct quadd_record_data) + length_extra;
+}
+
+static void put_sample(struct quadd_record_data *data, char *extra_data,
+                      unsigned int extra_length)
+{
+       if (!atomic_read(&comm_ctx.active))
+               return;
+
+       write_sample(data, extra_data, extra_length);
+}
+
+static void comm_reset(void)
+{
+       unsigned long flags;
+
+       pr_debug("Comm reset\n");
+       spin_lock_irqsave(&comm_ctx.rb.lock, flags);
+       rb_reset(&comm_ctx.rb);
+       spin_unlock_irqrestore(&comm_ctx.rb.lock, flags);
+}
+
+static struct quadd_comm_data_interface comm_data = {
+       .put_sample = put_sample,
+       .reset = comm_reset,
+};
+
+static int check_access_permission(void)
+{
+       struct task_struct *task;
+
+       if (capable(CAP_SYS_ADMIN))
+               return 0;
+
+       if (!comm_ctx.params_ok || comm_ctx.process_pid == 0)
+               return -EACCES;
+
+       rcu_read_lock();
+       task = pid_task(find_vpid(comm_ctx.process_pid), PIDTYPE_PID);
+       rcu_read_unlock();
+       if (!task)
+               return -EACCES;
+
+       if (current_fsuid() != task_uid(task) &&
+           task_uid(task) != comm_ctx.debug_app_uid) {
+               pr_err("Permission denied, owner/task uids: %u/%u\n",
+                          current_fsuid(), task_uid(task));
+               return -EACCES;
+       }
+       return 0;
+}
+
+static int device_open(struct inode *inode, struct file *file)
+{
+       mutex_lock(&comm_ctx.io_mutex);
+       comm_ctx.nr_users++;
+       mutex_unlock(&comm_ctx.io_mutex);
+       return 0;
+}
+
+static int device_release(struct inode *inode, struct file *file)
+{
+       mutex_lock(&comm_ctx.io_mutex);
+       comm_ctx.nr_users--;
+
+       if (comm_ctx.nr_users == 0) {
+               if (atomic_cmpxchg(&comm_ctx.active, 1, 0)) {
+                       comm_ctx.control->stop();
+                       pr_info("Stop profiling: daemon is closed\n");
+               }
+       }
+       mutex_unlock(&comm_ctx.io_mutex);
+
+       return 0;
+}
+
+static ssize_t
+device_read(struct file *filp,
+           char __user *buffer,
+           size_t length,
+           loff_t *offset)
+{
+       int err;
+       size_t was_read = 0, res, samples_counter = 0;
+
+       err = check_access_permission();
+       if (err)
+               return err;
+
+       mutex_lock(&comm_ctx.io_mutex);
+
+       if (!atomic_read(&comm_ctx.active)) {
+               mutex_unlock(&comm_ctx.io_mutex);
+               return -1;
+       }
+
+       while (was_read + sizeof(struct quadd_record_data) < length) {
+               res = read_sample(buffer + was_read, length - was_read);
+               if (res == 0)
+                       break;
+
+               was_read += res;
+               samples_counter++;
+
+               if (!atomic_read(&comm_ctx.active))
+                       break;
+       }
+
+       mutex_unlock(&comm_ctx.io_mutex);
+       return was_read;
+}
+
+static long
+device_ioctl(struct file *file,
+            unsigned int ioctl_num,
+            unsigned long ioctl_param)
+{
+       int err;
+       struct quadd_parameters user_params;
+       struct quadd_comm_cap cap;
+       struct quadd_module_state state;
+       struct quadd_module_version versions;
+       unsigned long flags;
+       struct quadd_ring_buffer *rb = &comm_ctx.rb;
+
+       if (ioctl_num != IOCTL_SETUP &&
+           ioctl_num != IOCTL_GET_CAP &&
+           ioctl_num != IOCTL_GET_STATE &&
+           ioctl_num != IOCTL_GET_VERSION) {
+               err = check_access_permission();
+               if (err)
+                       return err;
+       }
+
+       mutex_lock(&comm_ctx.io_mutex);
+
+       switch (ioctl_num) {
+       case IOCTL_SETUP:
+               if (atomic_read(&comm_ctx.active)) {
+                       pr_err("error: tegra profiler is active\n");
+                       mutex_unlock(&comm_ctx.io_mutex);
+                       return -EBUSY;
+               }
+
+               if (copy_from_user(&user_params, (void __user *)ioctl_param,
+                                  sizeof(struct quadd_parameters))) {
+                       pr_err("setup failed\n");
+                       mutex_unlock(&comm_ctx.io_mutex);
+                       return -EFAULT;
+               }
+
+               err = comm_ctx.control->set_parameters(&user_params,
+                                                      &comm_ctx.debug_app_uid);
+               if (err) {
+                       pr_err("error: setup failed\n");
+                       mutex_unlock(&comm_ctx.io_mutex);
+                       return err;
+               }
+               comm_ctx.params_ok = 1;
+               comm_ctx.process_pid = user_params.pids[0];
+
+               pr_info("setup success: freq/mafreq: %u/%u, backtrace: %d, pid: %d\n",
+                       user_params.freq,
+                       user_params.ma_freq,
+                       user_params.backtrace,
+                       user_params.pids[0]);
+               break;
+
+       case IOCTL_GET_CAP:
+               comm_ctx.control->get_capabilities(&cap);
+               if (copy_to_user((void __user *)ioctl_param, &cap,
+                                sizeof(struct quadd_comm_cap))) {
+                       pr_err("error: get_capabilities failed\n");
+                       mutex_unlock(&comm_ctx.io_mutex);
+                       return -EFAULT;
+               }
+               break;
+
+       case IOCTL_GET_VERSION:
+               strcpy(versions.branch, QUADD_MODULE_BRANCH);
+               strcpy(versions.version, QUADD_MODULE_VERSION);
+
+               versions.samples_version = QUADD_SAMPLES_VERSION;
+               versions.io_version = QUADD_IO_VERSION;
+
+               if (copy_to_user((void __user *)ioctl_param, &versions,
+                                sizeof(struct quadd_module_version))) {
+                       pr_err("error: get version failed\n");
+                       mutex_unlock(&comm_ctx.io_mutex);
+                       return -EFAULT;
+               }
+               break;
+
+       case IOCTL_GET_STATE:
+               comm_ctx.control->get_state(&state);
+
+               state.buffer_size = QUADD_SIZE_RB_BUFFER;
+
+               spin_lock_irqsave(&rb->lock, flags);
+               state.buffer_fill_size =
+                       QUADD_SIZE_RB_BUFFER - rb_get_free_space(rb);
+               spin_unlock_irqrestore(&rb->lock, flags);
+
+               if (copy_to_user((void __user *)ioctl_param, &state,
+                                sizeof(struct quadd_module_state))) {
+                       pr_err("error: get_state failed\n");
+                       mutex_unlock(&comm_ctx.io_mutex);
+                       return -EFAULT;
+               }
+               break;
+
+       case IOCTL_START:
+               if (!atomic_cmpxchg(&comm_ctx.active, 0, 1)) {
+                       if (!comm_ctx.params_ok) {
+                               pr_err("error: params failed\n");
+                               atomic_set(&comm_ctx.active, 0);
+                               mutex_unlock(&comm_ctx.io_mutex);
+                               return -EFAULT;
+                       }
+
+                       if (comm_ctx.control->start()) {
+                               pr_err("error: start failed\n");
+                               atomic_set(&comm_ctx.active, 0);
+                               mutex_unlock(&comm_ctx.io_mutex);
+                               return -EFAULT;
+                       }
+                       pr_info("Start profiling success\n");
+               }
+               break;
+
+       case IOCTL_STOP:
+               if (atomic_cmpxchg(&comm_ctx.active, 1, 0)) {
+                       comm_ctx.control->stop();
+                       pr_info("Stop profiling success\n");
+               }
+               break;
+
+       default:
+               pr_err("error: ioctl %u is unsupported in this version of module\n",
+                      ioctl_num);
+               mutex_unlock(&comm_ctx.io_mutex);
+               return -EFAULT;
+       }
+       mutex_unlock(&comm_ctx.io_mutex);
+
+       return 0;
+}
+
+static void unregister(void)
+{
+       misc_deregister(comm_ctx.misc_dev);
+       kfree(comm_ctx.misc_dev);
+}
+
+static void free_ctx(void)
+{
+       rb_deinit(&comm_ctx.rb);
+}
+
+static const struct file_operations qm_fops = {
+       .read           = device_read,
+       .open           = device_open,
+       .release        = device_release,
+       .unlocked_ioctl = device_ioctl
+};
+
+static int comm_init(void)
+{
+       int res;
+       struct miscdevice *misc_dev;
+       struct quadd_ring_buffer *rb = &comm_ctx.rb;
+
+       misc_dev = kzalloc(sizeof(*misc_dev), GFP_KERNEL);
+       if (!misc_dev) {
+               pr_err("Error: alloc error\n");
+               return -ENOMEM;
+       }
+
+       misc_dev->minor = MISC_DYNAMIC_MINOR;
+       misc_dev->name = QUADD_DEVICE_NAME;
+       misc_dev->fops = &qm_fops;
+
+       res = misc_register(misc_dev);
+       if (res < 0) {
+               pr_err("Error: misc_register %d\n", res);
+               return res;
+       }
+       comm_ctx.misc_dev = misc_dev;
+
+       mutex_init(&comm_ctx.io_mutex);
+       atomic_set(&comm_ctx.active, 0);
+
+       comm_ctx.params_ok = 0;
+       comm_ctx.process_pid = 0;
+       comm_ctx.nr_users = 0;
+
+       if (rb_init(rb, QUADD_SIZE_RB_BUFFER)) {
+               free_ctx();
+               unregister();
+               return -ENOMEM;
+       }
+
+       return 0;
+}
+
+struct quadd_comm_data_interface *
+quadd_comm_events_init(struct quadd_comm_control_interface *control)
+{
+       if (comm_init() < 0)
+               return NULL;
+
+       comm_ctx.control = control;
+       return &comm_data;
+}
+
+void quadd_comm_events_exit(void)
+{
+       mutex_lock(&comm_ctx.io_mutex);
+       unregister();
+       free_ctx();
+       mutex_unlock(&comm_ctx.io_mutex);
+}
diff --git a/drivers/misc/tegra-profiler/comm.h b/drivers/misc/tegra-profiler/comm.h
new file mode 100644 (file)
index 0000000..da8e918
--- /dev/null
@@ -0,0 +1,74 @@
+/*
+ * drivers/misc/tegra-profiler/comm.h
+ *
+ * Copyright (c) 2013, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#ifndef __QUADD_COMM_H__
+#define __QUADD_COMM_H__
+
+#include <linux/types.h>
+
+struct quadd_record_data;
+struct quadd_comm_cap;
+struct quadd_module_state;
+struct miscdevice;
+
+struct quadd_ring_buffer {
+       char *buf;
+       spinlock_t lock;
+
+       size_t size;
+       size_t pos_read;
+       size_t pos_write;
+       size_t fill_count;
+};
+
+struct quadd_parameters;
+
+struct quadd_comm_control_interface {
+       int (*start)(void);
+       void (*stop)(void);
+       int (*set_parameters)(struct quadd_parameters *param,
+                             uid_t *debug_app_uid);
+       void (*get_capabilities)(struct quadd_comm_cap *cap);
+       void (*get_state)(struct quadd_module_state *state);
+};
+
+struct quadd_comm_data_interface {
+       void (*put_sample)(struct quadd_record_data *data, char *extra_data,
+                          unsigned int extra_length);
+       void (*reset)(void);
+};
+
+struct quadd_comm_ctx {
+       struct quadd_comm_control_interface *control;
+       struct quadd_ring_buffer rb;
+
+       atomic_t active;
+
+       struct mutex io_mutex;
+       int nr_users;
+
+       int params_ok;
+       pid_t process_pid;
+       uid_t debug_app_uid;
+
+       struct miscdevice *misc_dev;
+};
+
+struct quadd_comm_data_interface *
+quadd_comm_events_init(struct quadd_comm_control_interface *control);
+void quadd_comm_events_exit(void);
+
+#endif /* __QUADD_COMM_H__ */
diff --git a/drivers/misc/tegra-profiler/debug.c b/drivers/misc/tegra-profiler/debug.c
new file mode 100644 (file)
index 0000000..e0270a3
--- /dev/null
@@ -0,0 +1,164 @@
+/*
+ * drivers/misc/tegra-profiler/debug.c
+ *
+ * Copyright (c) 2013, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#include <linux/module.h>
+#include <asm/irq_regs.h>
+
+#include <linux/tegra_profiler.h>
+
+#include "debug.h"
+#include "hrt.h"
+#include "tegra.h"
+
+#ifdef QM_DEBUG_SAMPLES_ENABLE
+
+static inline void
+init_sample(struct quadd_record_data *record, struct pt_regs *regs)
+{
+       struct quadd_debug_data *s = &record->debug;
+
+       record->magic = QUADD_RECORD_MAGIC;
+       record->record_type = QUADD_RECORD_TYPE_DEBUG;
+
+       if (!regs)
+               regs = get_irq_regs();
+
+       if (!regs)
+               record->cpu_mode = QUADD_CPU_MODE_NONE;
+       else
+               record->cpu_mode = user_mode(regs) ?
+                       QUADD_CPU_MODE_USER : QUADD_CPU_MODE_KERNEL;
+
+       s->cpu = quadd_get_processor_id();
+       s->pid = 0;
+       s->time = quadd_get_time();
+       s->timer_period = 0;
+
+       s->extra_value1 = 0;
+       s->extra_value2 = 0;
+       s->extra_value3 = 0;
+}
+
+void qm_debug_handler_sample(struct pt_regs *regs)
+{
+       struct quadd_record_data record;
+       struct quadd_debug_data *s = &record.debug;
+
+       init_sample(&record, regs);
+
+       s->type = QM_DEBUG_SAMPLE_TYPE_TIMER_HANDLE;
+
+       quadd_put_sample(&record, NULL, 0);
+}
+
+void qm_debug_timer_forward(struct pt_regs *regs, u64 period)
+{
+       struct quadd_record_data record;
+       struct quadd_debug_data *s = &record.debug;
+
+       init_sample(&record, regs);
+
+       s->type = QM_DEBUG_SAMPLE_TYPE_TIMER_FORWARD;
+       s->timer_period = period;
+
+       quadd_put_sample(&record, NULL, 0);
+}
+
+void qm_debug_timer_start(struct pt_regs *regs, u64 period)
+{
+       struct quadd_record_data record;
+       struct quadd_debug_data *s = &record.debug;
+
+       init_sample(&record, regs);
+
+       s->type = QM_DEBUG_SAMPLE_TYPE_TIMER_START;
+       s->timer_period = period;
+
+       quadd_put_sample(&record, NULL, 0);
+}
+
+void qm_debug_timer_cancel(void)
+{
+       struct quadd_record_data record;
+       struct quadd_debug_data *s = &record.debug;
+
+       init_sample(&record, NULL);
+
+       s->type = QM_DEBUG_SAMPLE_TYPE_TIMER_CANCEL;
+
+       quadd_put_sample(&record, NULL, 0);
+}
+
+void
+qm_debug_task_sched_in(pid_t prev_pid, pid_t current_pid, int prev_nr_active)
+{
+       struct quadd_record_data record;
+       struct quadd_debug_data *s = &record.debug;
+
+       init_sample(&record, NULL);
+
+       s->type = QM_DEBUG_SAMPLE_TYPE_SCHED_IN;
+
+       s->extra_value1 = prev_pid;
+       s->extra_value2 = current_pid;
+       s->extra_value3 = prev_nr_active;
+
+       quadd_put_sample(&record, NULL, 0);
+}
+
+void qm_debug_read_counter(int event_id, u32 prev_val, u32 val)
+{
+       struct quadd_record_data record;
+       struct quadd_debug_data *s = &record.debug;
+
+       init_sample(&record, NULL);
+
+       s->type = QM_DEBUG_SAMPLE_TYPE_READ_COUNTER;
+
+       s->extra_value1 = event_id;
+       s->extra_value2 = prev_val;
+       s->extra_value3 = val;
+
+       quadd_put_sample(&record, NULL, 0);
+}
+
+void qm_debug_start_source(int source_type)
+{
+       struct quadd_record_data record;
+       struct quadd_debug_data *s = &record.debug;
+
+       init_sample(&record, NULL);
+
+       s->type = QM_DEBUG_SAMPLE_TYPE_SOURCE_START;
+       s->extra_value1 = source_type;
+
+       quadd_put_sample(&record, NULL, 0);
+}
+
+void qm_debug_stop_source(int source_type)
+{
+       struct quadd_record_data record;
+       struct quadd_debug_data *s = &record.debug;
+
+       init_sample(&record, NULL);
+
+       s->type = QM_DEBUG_SAMPLE_TYPE_SOURCE_STOP;
+       s->extra_value1 = source_type;
+
+       quadd_put_sample(&record, NULL, 0);
+}
+
+#endif /* QM_DEBUG_SAMPLES_ENABLE */
diff --git a/drivers/misc/tegra-profiler/debug.h b/drivers/misc/tegra-profiler/debug.h
new file mode 100644 (file)
index 0000000..ff62919
--- /dev/null
@@ -0,0 +1,87 @@
+/*
+ * drivers/misc/tegra-profiler/debug.h
+ *
+ * Copyright (c) 2013, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#ifndef __QUADD_DEBUG_H
+#define __QUADD_DEBUG_H
+
+#include <linux/tegra_profiler.h>
+
+/* #define QM_DEBUG_SAMPLES_ENABLE 1 */
+
+#ifdef QM_DEBUG_SAMPLES_ENABLE
+void qm_debug_handler_sample(struct pt_regs *regs);
+void qm_debug_timer_forward(struct pt_regs *regs, u64 period);
+void qm_debug_timer_start(struct pt_regs *regs, u64 period);
+void qm_debug_timer_cancel(void);
+void qm_debug_task_sched_in(pid_t prev_pid, pid_t current_pid,
+                           int prev_nr_active);
+void qm_debug_read_counter(int event_id, u32 prev_val, u32 val);
+void qm_debug_start_source(int source_type);
+void qm_debug_stop_source(int source_type);
+#else
+static inline void qm_debug_handler_sample(struct pt_regs *regs)
+{
+}
+static inline void qm_debug_timer_forward(struct pt_regs *regs, u64 period)
+{
+}
+static inline void qm_debug_timer_start(struct pt_regs *regs, u64 period)
+{
+}
+static inline void qm_debug_timer_cancel(void)
+{
+}
+static inline void
+qm_debug_task_sched_in(pid_t prev_pid, pid_t current_pid, int prev_nr_active)
+{
+}
+static inline void qm_debug_read_counter(int event_id, u32 prev_val, u32 val)
+{
+}
+static inline void qm_debug_start_source(int source_type)
+{
+}
+static inline void qm_debug_stop_source(int source_type)
+{
+}
+#endif
+
+void quadd_test_delay(void);
+
+#define QM_ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0]))
+static inline char *
+quadd_get_event_str(int event)
+{
+       static char *str[] = {
+               [QUADD_EVENT_TYPE_CPU_CYCLES]           = "cpu-cycles",
+
+               [QUADD_EVENT_TYPE_INSTRUCTIONS]         = "instructions",
+               [QUADD_EVENT_TYPE_BRANCH_INSTRUCTIONS]  = "branch_instruction",
+               [QUADD_EVENT_TYPE_BRANCH_MISSES]        = "branch_misses",
+               [QUADD_EVENT_TYPE_BUS_CYCLES]           = "bus-cycles",
+
+               [QUADD_EVENT_TYPE_L1_DCACHE_READ_MISSES]        = "l1_d_read",
+               [QUADD_EVENT_TYPE_L1_DCACHE_WRITE_MISSES]       = "l1_d_write",
+               [QUADD_EVENT_TYPE_L1_ICACHE_MISSES]             = "l1_i",
+
+               [QUADD_EVENT_TYPE_L2_DCACHE_READ_MISSES]        = "l2_d_read",
+               [QUADD_EVENT_TYPE_L2_DCACHE_WRITE_MISSES]       = "l2_d_write",
+               [QUADD_EVENT_TYPE_L2_ICACHE_MISSES]             = "l2_i",
+       };
+       return (event < QM_ARRAY_SIZE(str)) ? str[event] : "invalid event";
+}
+
+#endif /* __QUADD_DEBUG_H */
diff --git a/drivers/misc/tegra-profiler/hrt.c b/drivers/misc/tegra-profiler/hrt.c
new file mode 100644 (file)
index 0000000..56d8b84
--- /dev/null
@@ -0,0 +1,620 @@
+/*
+ * drivers/misc/tegra-profiler/hrt.c
+ *
+ * Copyright (c) 2013, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/module.h>
+#include <linux/kallsyms.h>
+#include <linux/sched.h>
+#include <asm/cputype.h>
+#include <linux/hrtimer.h>
+#include <linux/slab.h>
+#include <linux/cpu.h>
+#include <linux/ratelimit.h>
+#include <asm/irq_regs.h>
+
+#include <linux/tegra_profiler.h>
+
+#include "quadd.h"
+#include "hrt.h"
+#include "comm.h"
+#include "mmap.h"
+#include "ma.h"
+#include "power_clk.h"
+#include "tegra.h"
+#include "debug.h"
+
+static struct quadd_hrt_ctx hrt;
+
+static void read_all_sources(struct pt_regs *regs, pid_t pid);
+
+static void sample_time_prepare(void);
+static void sample_time_finish(void);
+static void sample_time_reset(struct quadd_cpu_context *cpu_ctx);
+
+static enum hrtimer_restart hrtimer_handler(struct hrtimer *hrtimer)
+{
+       struct pt_regs *regs;
+
+       regs = get_irq_regs();
+
+       if (hrt.active == 0)
+               return HRTIMER_NORESTART;
+
+       qm_debug_handler_sample(regs);
+
+       if (regs) {
+               sample_time_prepare();
+               read_all_sources(regs, -1);
+               sample_time_finish();
+       }
+
+       hrtimer_forward_now(hrtimer, ns_to_ktime(hrt.sample_period));
+       qm_debug_timer_forward(regs, hrt.sample_period);
+
+       return HRTIMER_RESTART;
+}
+
+static void start_hrtimer(struct quadd_cpu_context *cpu_ctx)
+{
+       u64 period = hrt.sample_period;
+
+       sample_time_reset(cpu_ctx);
+
+       hrtimer_start(&cpu_ctx->hrtimer, ns_to_ktime(period),
+                     HRTIMER_MODE_REL_PINNED);
+       qm_debug_timer_start(NULL, period);
+}
+
+static void cancel_hrtimer(struct quadd_cpu_context *cpu_ctx)
+{
+       hrtimer_cancel(&cpu_ctx->hrtimer);
+       qm_debug_timer_cancel();
+}
+
+static void init_hrtimer(struct quadd_cpu_context *cpu_ctx)
+{
+       sample_time_reset(cpu_ctx);
+
+       hrtimer_init(&cpu_ctx->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+       cpu_ctx->hrtimer.function = hrtimer_handler;
+}
+
+u64 quadd_get_time(void)
+{
+       struct timespec ts;
+
+       do_posix_clock_monotonic_gettime(&ts);
+       return timespec_to_ns(&ts);
+}
+
+static u64 get_sample_time(void)
+{
+#ifndef QUADD_USE_CORRECT_SAMPLE_TS
+       return quadd_get_time();
+#else
+       struct quadd_cpu_context *cpu_ctx = this_cpu_ptr(hrt.cpu_ctx);
+       return cpu_ctx->current_time;
+#endif
+}
+
+static void sample_time_prepare(void)
+{
+#ifdef QUADD_USE_CORRECT_SAMPLE_TS
+       struct quadd_cpu_context *cpu_ctx = this_cpu_ptr(hrt.cpu_ctx);
+
+       if (cpu_ctx->prev_time == ULLONG_MAX)
+               cpu_ctx->current_time = quadd_get_time();
+       else
+               cpu_ctx->current_time = cpu_ctx->prev_time + hrt.sample_period;
+#endif
+}
+
+static void sample_time_finish(void)
+{
+#ifdef QUADD_USE_CORRECT_SAMPLE_TS
+       struct quadd_cpu_context *cpu_ctx = this_cpu_ptr(hrt.cpu_ctx);
+       cpu_ctx->prev_time = cpu_ctx->current_time;
+#endif
+}
+
+static void sample_time_reset(struct quadd_cpu_context *cpu_ctx)
+{
+#ifdef QUADD_USE_CORRECT_SAMPLE_TS
+       cpu_ctx->prev_time = ULLONG_MAX;
+       cpu_ctx->current_time = ULLONG_MAX;
+#endif
+}
+
+static void put_header(void)
+{
+       int power_rate_period;
+       struct quadd_record_data record;
+       struct quadd_header_data *hdr = &record.hdr;
+       struct quadd_parameters *param = &hrt.quadd_ctx->param;
+       struct quadd_comm_data_interface *comm = hrt.quadd_ctx->comm;
+
+       record.magic = QUADD_RECORD_MAGIC;
+       record.record_type = QUADD_RECORD_TYPE_HEADER;
+       record.cpu_mode = QUADD_CPU_MODE_NONE;
+
+       hdr->version = QUADD_SAMPLES_VERSION;
+
+       hdr->backtrace = param->backtrace;
+       hdr->use_freq = param->use_freq;
+       hdr->system_wide = param->system_wide;
+
+       /* TODO: dynamically */
+#ifdef QM_DEBUG_SAMPLES_ENABLE
+       hdr->debug_samples = 1;
+#else
+       hdr->debug_samples = 0;
+#endif
+
+       hdr->period = hrt.sample_period;
+       hdr->ma_period = hrt.ma_period;
+
+       hdr->power_rate = quadd_power_clk_is_enabled(&power_rate_period);
+       hdr->power_rate_period = power_rate_period;
+
+       comm->put_sample(&record, NULL, 0);
+}
+
+void quadd_put_sample(struct quadd_record_data *data,
+                     char *extra_data, unsigned int extra_length)
+{
+       struct quadd_comm_data_interface *comm = hrt.quadd_ctx->comm;
+
+       if (data->record_type == QUADD_RECORD_TYPE_SAMPLE &&
+               data->sample.period > 0x7FFFFFFF) {
+               struct quadd_sample_data *sample = &data->sample;
+               pr_err_once("very big period, sample id: %d\n",
+                           sample->event_id);
+               return;
+       }
+
+       comm->put_sample(data, extra_data, extra_length);
+       atomic64_inc(&hrt.counter_samples);
+}
+
+static int get_sample_data(struct event_data *event,
+                          struct pt_regs *regs,
+                          struct quadd_sample_data *sample)
+{
+       u32 period;
+       u32 prev_val, val;
+
+       prev_val = event->prev_val;
+       val = event->val;
+
+       sample->event_id = event->event_id;
+
+       sample->ip = instruction_pointer(regs);
+       sample->cpu = quadd_get_processor_id();
+       sample->time = get_sample_time();
+
+       if (prev_val <= val)
+               period = val - prev_val;
+       else
+               period = QUADD_U32_MAX - prev_val + val;
+
+       if (event->event_source == QUADD_EVENT_SOURCE_PL310) {
+               int nr_current_active = atomic_read(&hrt.nr_active_all_core);
+               if (nr_current_active > 1)
+                       period = period / nr_current_active;
+       }
+
+       sample->period = period;
+       return 0;
+}
+
+static char *get_mmap_data(struct pt_regs *regs,
+                          struct quadd_mmap_data *sample,
+                          unsigned int *extra_length)
+{
+       struct quadd_cpu_context *cpu_ctx = this_cpu_ptr(hrt.cpu_ctx);
+       return quadd_get_mmap(cpu_ctx, regs, sample, extra_length);
+}
+
+static void read_source(struct quadd_event_source_interface *source,
+                       struct pt_regs *regs, pid_t pid)
+{
+       int nr_events, i;
+       struct event_data events[QUADD_MAX_COUNTERS];
+       struct quadd_record_data record_data;
+       struct quadd_thread_data *t_data;
+       char *extra_data = NULL;
+       unsigned int extra_length = 0, callchain_nr = 0;
+       struct quadd_cpu_context *cpu_ctx = this_cpu_ptr(hrt.cpu_ctx);
+       struct quadd_callchain *callchain_data = &cpu_ctx->callchain_data;
+
+       if (!source)
+               return;
+
+       nr_events = source->read(events);
+
+       if (nr_events == 0 || nr_events > QUADD_MAX_COUNTERS) {
+               pr_err_once("Error number of counters: %d, source: %p\n",
+                               nr_events, source);
+               return;
+       }
+
+       if (user_mode(regs) && hrt.quadd_ctx->param.backtrace) {
+               callchain_nr = quadd_get_user_callchain(regs, callchain_data);
+               if (callchain_nr > 0) {
+                       extra_data = (char *)cpu_ctx->callchain_data.callchain;
+                       extra_length = callchain_nr * sizeof(u32);
+               }
+       }
+
+       for (i = 0; i < nr_events; i++) {
+               if (get_sample_data(&events[i], regs, &record_data.sample))
+                       return;
+
+               record_data.magic = QUADD_RECORD_MAGIC;
+               record_data.record_type = QUADD_RECORD_TYPE_SAMPLE;
+               record_data.cpu_mode = user_mode(regs) ?
+                       QUADD_CPU_MODE_USER : QUADD_CPU_MODE_KERNEL;
+
+               record_data.sample.callchain_nr = callchain_nr;
+
+               if (pid > 0) {
+                       record_data.sample.pid = pid;
+                       quadd_put_sample(&record_data, extra_data,
+                                        extra_length);
+               } else {
+                       t_data = &cpu_ctx->active_thread;
+
+                       if (atomic_read(&cpu_ctx->nr_active) > 0) {
+                               record_data.sample.pid = t_data->pid;
+                               quadd_put_sample(&record_data, extra_data,
+                                                extra_length);
+                       }
+               }
+       }
+}
+
+static void read_all_sources(struct pt_regs *regs, pid_t pid)
+{
+       struct quadd_record_data record_data;
+       struct quadd_ctx *ctx = hrt.quadd_ctx;
+       unsigned int extra_length;
+       char *extra_data;
+
+       if (!regs)
+               return;
+
+       extra_data = get_mmap_data(regs, &record_data.mmap, &extra_length);
+       if (extra_data && extra_length > 0) {
+               record_data.magic = QUADD_RECORD_MAGIC;
+               record_data.record_type = QUADD_RECORD_TYPE_MMAP;
+               record_data.cpu_mode = QUADD_CPU_MODE_USER;
+
+               record_data.mmap.filename_length = extra_length;
+               record_data.mmap.pid = pid > 0 ? pid : ctx->param.pids[0];
+
+               quadd_put_sample(&record_data, extra_data, extra_length);
+       } else {
+               record_data.mmap.filename_length = 0;
+       }
+
+       if (ctx->pmu && ctx->pmu_info.active)
+               read_source(ctx->pmu, regs, pid);
+
+       if (ctx->pl310 && ctx->pl310_info.active)
+               read_source(ctx->pl310, regs, pid);
+}
+
+static inline int is_profile_process(pid_t pid)
+{
+       int i;
+       pid_t profile_pid;
+       struct quadd_ctx *ctx = hrt.quadd_ctx;
+
+       for (i = 0; i < ctx->param.nr_pids; i++) {
+               profile_pid = ctx->param.pids[i];
+               if (profile_pid == pid)
+                       return 1;
+       }
+       return 0;
+}
+
+static int
+add_active_thread(struct quadd_cpu_context *cpu_ctx, pid_t pid, pid_t tgid)
+{
+       struct quadd_thread_data *t_data = &cpu_ctx->active_thread;
+
+       if (t_data->pid > 0 ||
+               atomic_read(&cpu_ctx->nr_active) > 0) {
+               pr_warn_once("Warning for thread: %d\n", (int)pid);
+               return 0;
+       }
+
+       t_data->pid = pid;
+       t_data->tgid = tgid;
+       return 1;
+}
+
+static int remove_active_thread(struct quadd_cpu_context *cpu_ctx, pid_t pid)
+{
+       struct quadd_thread_data *t_data = &cpu_ctx->active_thread;
+
+       if (t_data->pid < 0)
+               return 0;
+
+       if (t_data->pid == pid) {
+               t_data->pid = -1;
+               t_data->tgid = -1;
+               return 1;
+       }
+
+       pr_warn_once("Warning for thread: %d\n", (int)pid);
+       return 0;
+}
+
+static int task_sched_in(struct kprobe *kp, struct pt_regs *regs)
+{
+       int n, prev_flag, current_flag;
+       struct task_struct *prev, *task;
+       int prev_nr_active, new_nr_active;
+       struct quadd_cpu_context *cpu_ctx = this_cpu_ptr(hrt.cpu_ctx);
+       struct quadd_ctx *ctx = hrt.quadd_ctx;
+       struct event_data events[QUADD_MAX_COUNTERS];
+       /* static DEFINE_RATELIMIT_STATE(ratelimit_state, 5 * HZ, 2); */
+
+       if (hrt.active == 0)
+               return 0;
+
+       prev = (struct task_struct *)regs->ARM_r1;
+       task = current;
+/*
+       if (__ratelimit(&ratelimit_state))
+               pr_info("cpu: %d, prev: %u (%u) \t--> curr: %u (%u)\n",
+                       quadd_get_processor_id(), (unsigned int)prev->pid,
+                       (unsigned int)prev->tgid, (unsigned int)task->pid,
+                       (unsigned int)task->tgid);
+*/
+       if (!prev || !prev->real_parent || !prev->group_leader ||
+               prev->group_leader->tgid != prev->tgid) {
+               pr_err_once("Warning\n");
+               return 0;
+       }
+
+       prev_flag = is_profile_process(prev->tgid);
+       current_flag = is_profile_process(task->tgid);
+
+       if (prev_flag || current_flag) {
+               prev_nr_active = atomic_read(&cpu_ctx->nr_active);
+               qm_debug_task_sched_in(prev->pid, task->pid, prev_nr_active);
+
+               if (prev_flag) {
+                       n = remove_active_thread(cpu_ctx, prev->pid);
+                       atomic_sub(n, &cpu_ctx->nr_active);
+               }
+               if (current_flag) {
+                       add_active_thread(cpu_ctx, task->pid, task->tgid);
+                       atomic_inc(&cpu_ctx->nr_active);
+               }
+
+               new_nr_active = atomic_read(&cpu_ctx->nr_active);
+               if (prev_nr_active != new_nr_active) {
+                       if (prev_nr_active == 0) {
+                               if (ctx->pmu)
+                                       ctx->pmu->start();
+
+                               if (ctx->pl310)
+                                       ctx->pl310->read(events);
+
+                               start_hrtimer(cpu_ctx);
+                               atomic_inc(&hrt.nr_active_all_core);
+                       } else if (new_nr_active == 0) {
+                               cancel_hrtimer(cpu_ctx);
+                               atomic_dec(&hrt.nr_active_all_core);
+
+                               if (ctx->pmu)
+                                       ctx->pmu->stop();
+                       }
+               }
+       }
+
+       return 0;
+}
+
+static int handler_fault(struct kprobe *kp, struct pt_regs *regs, int trapnr)
+{
+       pr_err_once("addr: %p, symbol: %s\n", kp->addr, kp->symbol_name);
+       return 0;
+}
+
+static int start_instr(void)
+{
+       int err;
+
+       memset(&hrt.kp_in, 0, sizeof(struct kprobe));
+
+       hrt.kp_in.pre_handler = task_sched_in;
+       hrt.kp_in.fault_handler = handler_fault;
+       hrt.kp_in.addr = 0;
+       hrt.kp_in.symbol_name = QUADD_HRT_SCHED_IN_FUNC;
+
+       err = register_kprobe(&hrt.kp_in);
+       if (err) {
+               pr_err("register_kprobe error, symbol_name: %s\n",
+                       hrt.kp_in.symbol_name);
+               return err;
+       }
+       return 0;
+}
+
+static void stop_instr(void)
+{
+       unregister_kprobe(&hrt.kp_in);
+}
+
+static int init_instr(void)
+{
+       int err;
+
+       err = start_instr();
+       if (err) {
+               pr_err("Init instr failed\n");
+               return err;
+       }
+       stop_instr();
+       return 0;
+}
+
+static int deinit_instr(void)
+{
+       return 0;
+}
+
+static void reset_cpu_ctx(void)
+{
+       int cpu_id;
+       struct quadd_cpu_context *cpu_ctx;
+       struct quadd_thread_data *t_data;
+
+       for (cpu_id = 0; cpu_id < nr_cpu_ids; cpu_id++) {
+               cpu_ctx = per_cpu_ptr(hrt.cpu_ctx, cpu_id);
+               t_data = &cpu_ctx->active_thread;
+
+               atomic_set(&cpu_ctx->nr_active, 0);
+
+               t_data->pid = -1;
+               t_data->tgid = -1;
+
+               sample_time_reset(cpu_ctx);
+       }
+}
+
+int quadd_hrt_start(void)
+{
+       int err;
+       u64 period;
+       long freq;
+       struct quadd_ctx *ctx = hrt.quadd_ctx;
+
+       freq = ctx->param.freq;
+       freq = max_t(long, QUADD_HRT_MIN_FREQ, freq);
+       period = NSEC_PER_SEC / freq;
+       hrt.sample_period = period;
+
+       if (ctx->param.ma_freq > 0)
+               hrt.ma_period = MSEC_PER_SEC / ctx->param.ma_freq;
+       else
+               hrt.ma_period = 0;
+
+       atomic64_set(&hrt.counter_samples, 0);
+
+       reset_cpu_ctx();
+
+       err = start_instr();
+       if (err) {
+               pr_err("error: start_instr is failed\n");
+               return err;
+       }
+
+       put_header();
+
+       if (ctx->pl310)
+               ctx->pl310->start();
+
+       quadd_ma_start(&hrt);
+
+       hrt.active = 1;
+
+       pr_info("Start hrt: freq/period: %ld/%llu\n", freq, period);
+       return 0;
+}
+
+void quadd_hrt_stop(void)
+{
+       struct quadd_ctx *ctx = hrt.quadd_ctx;
+
+       pr_info("Stop hrt, number of samples: %llu\n",
+               atomic64_read(&hrt.counter_samples));
+
+       if (ctx->pl310)
+               ctx->pl310->stop();
+
+       quadd_ma_stop(&hrt);
+
+       hrt.active = 0;
+       stop_instr();
+
+       atomic64_set(&hrt.counter_samples, 0);
+
+       /* reset_cpu_ctx(); */
+}
+
+void quadd_hrt_deinit(void)
+{
+       if (hrt.active)
+               quadd_hrt_stop();
+
+       deinit_instr();
+       free_percpu(hrt.cpu_ctx);
+}
+
+void quadd_hrt_get_state(struct quadd_module_state *state)
+{
+       state->nr_all_samples = atomic64_read(&hrt.counter_samples);
+       state->nr_skipped_samples = 0;
+}
+
+struct quadd_hrt_ctx *quadd_hrt_init(struct quadd_ctx *ctx)
+{
+       int cpu_id;
+       u64 period;
+       long freq;
+       struct quadd_cpu_context *cpu_ctx;
+
+       hrt.quadd_ctx = ctx;
+       hrt.active = 0;
+
+       freq = ctx->param.freq;
+       freq = max_t(long, QUADD_HRT_MIN_FREQ, freq);
+       period = NSEC_PER_SEC / freq;
+       hrt.sample_period = period;
+
+       if (ctx->param.ma_freq > 0)
+               hrt.ma_period = MSEC_PER_SEC / ctx->param.ma_freq;
+       else
+               hrt.ma_period = 0;
+
+       atomic64_set(&hrt.counter_samples, 0);
+
+       hrt.cpu_ctx = alloc_percpu(struct quadd_cpu_context);
+       if (!hrt.cpu_ctx)
+               return NULL;
+
+       for (cpu_id = 0; cpu_id < nr_cpu_ids; cpu_id++) {
+               cpu_ctx = per_cpu_ptr(hrt.cpu_ctx, cpu_id);
+
+               atomic_set(&cpu_ctx->nr_active, 0);
+
+               cpu_ctx->active_thread.pid = -1;
+               cpu_ctx->active_thread.tgid = -1;
+
+               init_hrtimer(cpu_ctx);
+       }
+
+       if (init_instr())
+               return NULL;
+
+       return &hrt;
+}
diff --git a/drivers/misc/tegra-profiler/hrt.h b/drivers/misc/tegra-profiler/hrt.h
new file mode 100644 (file)
index 0000000..f113b88
--- /dev/null
@@ -0,0 +1,94 @@
+/*
+ * drivers/misc/tegra-profiler/hrt.h
+ *
+ * Copyright (c) 2013, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#ifndef __QUADD_HRT_H
+#define __QUADD_HRT_H
+
+#define QUADD_MAX_STACK_DEPTH          64
+
+#ifdef __KERNEL__
+
+#include <linux/hrtimer.h>
+#include <linux/limits.h>
+#include <linux/kprobes.h>
+
+#include "backtrace.h"
+
+#define QUADD_USE_CORRECT_SAMPLE_TS    1
+
+struct quadd_thread_data {
+       pid_t pid;
+       pid_t tgid;
+};
+
+struct quadd_cpu_context {
+       struct hrtimer hrtimer;
+
+       struct quadd_callchain callchain_data;
+       char mmap_filename[PATH_MAX];
+
+       struct quadd_thread_data active_thread;
+       atomic_t nr_active;
+
+#ifdef QUADD_USE_CORRECT_SAMPLE_TS
+       u64 prev_time;
+       u64 current_time;
+#endif
+};
+
+struct quadd_hrt_ctx {
+       struct quadd_cpu_context * __percpu cpu_ctx;
+       u64 sample_period;
+
+       struct kprobe kp_in;
+       /* struct kinstr ki_out; */
+
+       struct quadd_ctx *quadd_ctx;
+
+       int active;
+       atomic64_t counter_samples;
+       atomic_t nr_active_all_core;
+
+       struct timer_list ma_timer;
+       unsigned int ma_period;
+
+       unsigned long vm_size_prev;
+       unsigned long rss_size_prev;
+};
+
+#define QUADD_HRT_MIN_FREQ     110
+
+#define QUADD_U32_MAX (~(__u32)0)
+
+struct quadd_hrt_ctx;
+struct quadd_record_data;
+struct quadd_module_state;
+
+struct quadd_hrt_ctx *quadd_hrt_init(struct quadd_ctx *ctx);
+void quadd_hrt_deinit(void);
+
+int quadd_hrt_start(void);
+void quadd_hrt_stop(void);
+
+void quadd_put_sample(struct quadd_record_data *data,
+                     char *extra_data, unsigned int extra_length);
+
+void quadd_hrt_get_state(struct quadd_module_state *state);
+u64 quadd_get_time(void);
+
+#endif /* __KERNEL__ */
+
+#endif /* __QUADD_HRT_H */
diff --git a/drivers/misc/tegra-profiler/ma.c b/drivers/misc/tegra-profiler/ma.c
new file mode 100644 (file)
index 0000000..6967ea9
--- /dev/null
@@ -0,0 +1,131 @@
+/*
+ * drivers/misc/tegra-profiler/ma.c
+ *
+ * Copyright (c) 2013, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <linux/timer.h>
+
+#include <linux/tegra_profiler.h>
+
+#include "ma.h"
+#include "quadd.h"
+#include "hrt.h"
+#include "comm.h"
+#include "debug.h"
+
+static void make_sample(struct quadd_hrt_ctx *hrt_ctx,
+                       pid_t pid, unsigned long vm_size,
+                       unsigned long rss_size)
+{
+       struct quadd_record_data record;
+       struct quadd_ma_data *ma = &record.ma;
+       struct quadd_comm_data_interface *comm = hrt_ctx->quadd_ctx->comm;
+
+       record.magic = QUADD_RECORD_MAGIC;
+       record.record_type = QUADD_RECORD_TYPE_MA;
+       record.cpu_mode = QUADD_CPU_MODE_NONE;
+
+       ma->pid = pid;
+       ma->time = quadd_get_time();
+
+       ma->vm_size = vm_size << PAGE_SHIFT;
+       ma->rss_size = rss_size << PAGE_SHIFT;
+/*
+       pr_debug("vm: %llu bytes (%llu mb), rss: %llu bytes (%llu mb)\n",
+               ma->vm_size, ma->vm_size / 0x100000,
+               ma->rss_size, ma->rss_size / 0x100000);
+*/
+       comm->put_sample(&record, NULL, 0);
+}
+
+static void check_ma(struct quadd_hrt_ctx *hrt_ctx)
+{
+       pid_t pid;
+       struct pid *pid_s;
+       struct task_struct *task = NULL;
+       struct mm_struct *mm;
+       struct quadd_ctx *quadd_ctx = hrt_ctx->quadd_ctx;
+       unsigned long vm_size, rss_size;
+
+       pid = quadd_ctx->param.pids[0];
+
+       rcu_read_lock();
+       pid_s = find_vpid(pid);
+       if (pid_s)
+               task = pid_task(pid_s, PIDTYPE_PID);
+       rcu_read_unlock();
+       if (!task)
+               return;
+
+       mm = task->mm;
+       if (!mm)
+               return;
+
+       vm_size = mm->total_vm;
+       rss_size = get_mm_rss(mm);
+
+       if (vm_size != hrt_ctx->vm_size_prev ||
+           rss_size != hrt_ctx->rss_size_prev) {
+               make_sample(hrt_ctx, pid, vm_size, rss_size);
+               hrt_ctx->vm_size_prev = vm_size;
+               hrt_ctx->rss_size_prev = rss_size;
+       }
+}
+
+static void timer_interrupt(unsigned long data)
+{
+       struct quadd_hrt_ctx *hrt_ctx = (struct quadd_hrt_ctx *)data;
+       struct timer_list *timer = &hrt_ctx->ma_timer;
+
+       if (hrt_ctx->active == 0)
+               return;
+
+       check_ma(hrt_ctx);
+
+       timer->expires = jiffies + msecs_to_jiffies(hrt_ctx->ma_period);
+       add_timer(timer);
+}
+
+void quadd_ma_start(struct quadd_hrt_ctx *hrt_ctx)
+{
+       struct timer_list *timer = &hrt_ctx->ma_timer;
+
+       if (hrt_ctx->ma_period == 0) {
+               pr_info("QuadD MA is disabled\n");
+               return;
+       }
+       pr_info("QuadD MA is started, interval: %u msec\n",
+               hrt_ctx->ma_period);
+
+       hrt_ctx->vm_size_prev = 0;
+       hrt_ctx->rss_size_prev = 0;
+
+       init_timer(timer);
+       timer->function = timer_interrupt;
+       timer->expires = jiffies + msecs_to_jiffies(hrt_ctx->ma_period);
+       timer->data = (unsigned long)hrt_ctx;
+       add_timer(timer);
+}
+
+void quadd_ma_stop(struct quadd_hrt_ctx *hrt_ctx)
+{
+       if (hrt_ctx->ma_period > 0) {
+               pr_info("QuadD MA is stopped\n");
+               del_timer_sync(&hrt_ctx->ma_timer);
+       }
+}
diff --git a/drivers/misc/tegra-profiler/ma.h b/drivers/misc/tegra-profiler/ma.h
new file mode 100644 (file)
index 0000000..be892b6
--- /dev/null
@@ -0,0 +1,25 @@
+/*
+ * drivers/misc/tegra-profiler/ma.h
+ *
+ * Copyright (c) 2013, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#ifndef __QUADD_MA_H
+#define __QUADD_MA_H
+
+struct quadd_hrt_ctx;
+
+void quadd_ma_start(struct quadd_hrt_ctx *hrt_ctx);
+void quadd_ma_stop(struct quadd_hrt_ctx *hrt_ctx);
+
+#endif /* __QUADD_MA_H */
diff --git a/drivers/misc/tegra-profiler/main.c b/drivers/misc/tegra-profiler/main.c
new file mode 100644 (file)
index 0000000..71b9554
--- /dev/null
@@ -0,0 +1,471 @@
+/*
+ * drivers/misc/tegra-profiler/main.c
+ *
+ * Copyright (c) 2013, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/init.h>
+#include <linux/module.h>
+
+#include <linux/tegra_profiler.h>
+
+#include "quadd.h"
+#include "armv7_pmu.h"
+#include "hrt.h"
+#include "pl310.h"
+#include "comm.h"
+#include "mmap.h"
+#include "debug.h"
+#include "tegra.h"
+#include "power_clk.h"
+#include "auth.h"
+#include "version.h"
+
+static struct quadd_ctx ctx;
+
+static int get_default_properties(void)
+{
+       ctx.param.freq = 100;
+       ctx.param.ma_freq = 50;
+       ctx.param.backtrace = 1;
+       ctx.param.use_freq = 1;
+       ctx.param.system_wide = 1;
+       ctx.param.power_rate_freq = 0;
+       ctx.param.debug_samples = 0;
+
+       ctx.param.pids[0] = 0;
+       ctx.param.nr_pids = 1;
+
+       return 0;
+}
+
+static int start(void)
+{
+       int err;
+
+       if (!atomic_cmpxchg(&ctx.started, 0, 1)) {
+               if (ctx.pmu) {
+                       err = ctx.pmu->enable();
+                       if (err) {
+                               pr_err("error: pmu enable\n");
+                               return err;
+                       }
+               }
+
+               if (ctx.pl310) {
+                       err = ctx.pl310->enable();
+                       if (err) {
+                               pr_err("error: pl310 enable\n");
+                               return err;
+                       }
+               }
+
+               quadd_mmap_reset();
+               ctx.comm->reset();
+
+               err = quadd_power_clk_start();
+               if (err < 0) {
+                       pr_err("error: power_clk start\n");
+                       return err;
+               }
+
+               err = quadd_hrt_start();
+               if (err) {
+                       pr_err("error: hrt start\n");
+                       return err;
+               }
+       }
+
+       return 0;
+}
+
+static void stop(void)
+{
+       if (atomic_cmpxchg(&ctx.started, 1, 0)) {
+               quadd_hrt_stop();
+
+               quadd_mmap_reset();
+               ctx.comm->reset();
+
+               quadd_power_clk_stop();
+
+               if (ctx.pmu)
+                       ctx.pmu->disable();
+
+               if (ctx.pl310)
+                       ctx.pl310->disable();
+       }
+}
+
+static inline int is_event_supported(struct source_info *si, int event)
+{
+       int i;
+       int nr = si->nr_supported_events;
+       int *events = si->supported_events;
+
+       for (i = 0; i < nr; i++) {
+               if (event == events[i])
+                       return 1;
+       }
+       return 0;
+}
+
+static int set_parameters(struct quadd_parameters *param, uid_t *debug_app_uid)
+{
+       int i, err;
+       int pmu_events_id[QUADD_MAX_COUNTERS];
+       int pl310_events_id;
+       int nr_pmu = 0, nr_pl310 = 0;
+       int uid = 0;
+       struct task_struct *task;
+
+       if (ctx.param.freq != 100 && ctx.param.freq != 1000 &&
+           ctx.param.freq != 10000)
+               return -EINVAL;
+
+       ctx.param.freq = param->freq;
+       ctx.param.ma_freq = param->ma_freq;
+       ctx.param.backtrace = param->backtrace;
+       ctx.param.use_freq = param->use_freq;
+       ctx.param.system_wide = param->system_wide;
+       ctx.param.power_rate_freq = param->power_rate_freq;
+       ctx.param.debug_samples = param->debug_samples;
+
+       /* Currently only one process */
+       if (param->nr_pids != 1)
+               return -EINVAL;
+
+       rcu_read_lock();
+       task = pid_task(find_vpid(param->pids[0]), PIDTYPE_PID);
+       rcu_read_unlock();
+       if (!task) {
+               pr_err("Process not found: %u\n", param->pids[0]);
+               return -ESRCH;
+       }
+
+       pr_info("owner/task uids: %u/%u\n", current_fsuid(), task_uid(task));
+       if (!capable(CAP_SYS_ADMIN)) {
+               if (current_fsuid() != task_uid(task)) {
+                       uid = quadd_auth_check_debug_flag(param->package_name);
+                       if (uid < 0) {
+                               pr_err("Error: QuadD security service\n");
+                               return uid;
+                       } else if (uid == 0) {
+                               pr_err("Error: app is not debuggable\n");
+                               return -EACCES;
+                       }
+
+                       *debug_app_uid = uid;
+                       pr_info("debug_app_uid: %u\n", uid);
+               }
+       }
+
+       for (i = 0; i < param->nr_pids; i++)
+               ctx.param.pids[i] = param->pids[i];
+
+       ctx.param.nr_pids = param->nr_pids;
+
+       for (i = 0; i < param->nr_events; i++) {
+               int event = param->events[i];
+
+               if (ctx.pmu && ctx.pmu_info.nr_supported_events > 0
+                       && is_event_supported(&ctx.pmu_info, event)) {
+                       pmu_events_id[nr_pmu++] = param->events[i];
+
+                       pr_info("PMU active event: %s\n",
+                               quadd_get_event_str(event));
+               } else if (ctx.pl310 &&
+                          ctx.pl310_info.nr_supported_events > 0 &&
+                          is_event_supported(&ctx.pl310_info, event)) {
+                       pl310_events_id = param->events[i];
+
+                       pr_info("PL310 active event: %s\n",
+                               quadd_get_event_str(event));
+
+                       if (nr_pl310++ > 1) {
+                               pr_err("error: multiply pl310 events\n");
+                               return -EINVAL;
+                       }
+               } else {
+                       pr_err("Bad event: %s\n",
+                              quadd_get_event_str(event));
+                       return -EINVAL;
+               }
+       }
+
+       if (ctx.pmu) {
+               if (nr_pmu > 0) {
+                       err = ctx.pmu->set_events(pmu_events_id, nr_pmu);
+                       if (err) {
+                               pr_err("PMU set parameters: error\n");
+                               return err;
+                       }
+                       ctx.pmu_info.active = 1;
+               } else {
+                       ctx.pmu_info.active = 0;
+                       ctx.pmu->set_events(NULL, 0);
+               }
+       }
+
+       if (ctx.pl310) {
+               if (nr_pl310 == 1) {
+                       err = ctx.pl310->set_events(&pl310_events_id, 1);
+                       if (err) {
+                               pr_info("pl310 set_parameters: error\n");
+                               return err;
+                       }
+                       ctx.pl310_info.active = 1;
+               } else {
+                       ctx.pl310_info.active = 0;
+                       ctx.pl310->set_events(NULL, 0);
+               }
+       }
+       pr_info("New parameters have been applied\n");
+
+       return 0;
+}
+
+static void get_capabilities(struct quadd_comm_cap *cap)
+{
+       int i, event;
+       struct quadd_events_cap *events_cap = &cap->events_cap;
+
+       cap->pmu = ctx.pmu ? 1 : 0;
+
+       cap->l2_cache = 0;
+       if (ctx.pl310) {
+               cap->l2_cache = 1;
+               cap->l2_multiple_events = 0;
+       } else if (ctx.pmu) {
+               struct source_info *s = &ctx.pmu_info;
+               for (i = 0; i < s->nr_supported_events; i++) {
+                       event = s->supported_events[i];
+                       if (event == QUADD_EVENT_TYPE_L2_DCACHE_READ_MISSES ||
+                           event == QUADD_EVENT_TYPE_L2_DCACHE_WRITE_MISSES ||
+                           event == QUADD_EVENT_TYPE_L2_ICACHE_MISSES) {
+                               cap->l2_cache = 1;
+                               cap->l2_multiple_events = 1;
+                               break;
+                       }
+               }
+       }
+
+       events_cap->cpu_cycles = 0;
+       events_cap->l1_dcache_read_misses = 0;
+       events_cap->l1_dcache_write_misses = 0;
+       events_cap->l1_icache_misses = 0;
+
+       events_cap->instructions = 0;
+       events_cap->branch_instructions = 0;
+       events_cap->branch_misses = 0;
+       events_cap->bus_cycles = 0;
+
+       events_cap->l2_dcache_read_misses = 0;
+       events_cap->l2_dcache_write_misses = 0;
+       events_cap->l2_icache_misses = 0;
+
+       if (ctx.pl310) {
+               struct source_info *s = &ctx.pl310_info;
+               for (i = 0; i < s->nr_supported_events; i++) {
+                       int event = s->supported_events[i];
+
+                       switch (event) {
+                       case QUADD_EVENT_TYPE_L2_DCACHE_READ_MISSES:
+                               events_cap->l2_dcache_read_misses = 1;
+                               break;
+                       case QUADD_EVENT_TYPE_L2_DCACHE_WRITE_MISSES:
+                               events_cap->l2_dcache_write_misses = 1;
+                               break;
+                       case QUADD_EVENT_TYPE_L2_ICACHE_MISSES:
+                               events_cap->l2_icache_misses = 1;
+                               break;
+
+                       default:
+                               BUG();
+                               break;
+                       }
+               }
+       }
+
+       if (ctx.pmu) {
+               struct source_info *s = &ctx.pmu_info;
+               for (i = 0; i < s->nr_supported_events; i++) {
+                       int event = s->supported_events[i];
+
+                       switch (event) {
+                       case QUADD_EVENT_TYPE_CPU_CYCLES:
+                               events_cap->cpu_cycles = 1;
+                               break;
+                       case QUADD_EVENT_TYPE_INSTRUCTIONS:
+                               events_cap->instructions = 1;
+                               break;
+                       case QUADD_EVENT_TYPE_BRANCH_INSTRUCTIONS:
+                               events_cap->branch_instructions = 1;
+                               break;
+                       case QUADD_EVENT_TYPE_BRANCH_MISSES:
+                               events_cap->branch_misses = 1;
+                               break;
+                       case QUADD_EVENT_TYPE_BUS_CYCLES:
+                               events_cap->bus_cycles = 1;
+                               break;
+
+                       case QUADD_EVENT_TYPE_L1_DCACHE_READ_MISSES:
+                               events_cap->l1_dcache_read_misses = 1;
+                               break;
+                       case QUADD_EVENT_TYPE_L1_DCACHE_WRITE_MISSES:
+                               events_cap->l1_dcache_write_misses = 1;
+                               break;
+                       case QUADD_EVENT_TYPE_L1_ICACHE_MISSES:
+                               events_cap->l1_icache_misses = 1;
+                               break;
+
+                       case QUADD_EVENT_TYPE_L2_DCACHE_READ_MISSES:
+                               events_cap->l2_dcache_read_misses = 1;
+                               break;
+                       case QUADD_EVENT_TYPE_L2_DCACHE_WRITE_MISSES:
+                               events_cap->l2_dcache_write_misses = 1;
+                               break;
+                       case QUADD_EVENT_TYPE_L2_ICACHE_MISSES:
+                               events_cap->l2_icache_misses = 1;
+                               break;
+
+                       default:
+                               BUG();
+                               break;
+                       }
+               }
+       }
+
+       cap->tegra_lp_cluster = quadd_is_cpu_with_lp_cluster();
+       cap->power_rate = 1;
+       cap->blocked_read = 0;
+}
+
+static void get_state(struct quadd_module_state *state)
+{
+       quadd_hrt_get_state(state);
+}
+
+static struct quadd_comm_control_interface control = {
+       .start                  = start,
+       .stop                   = stop,
+       .set_parameters         = set_parameters,
+       .get_capabilities       = get_capabilities,
+       .get_state              = get_state,
+};
+
+static int __init quadd_module_init(void)
+{
+       int i, nr_events, err;
+       int *events;
+
+       pr_info("Branch: %s\n", QUADD_MODULE_BRANCH);
+       pr_info("Version: %s\n", QUADD_MODULE_VERSION);
+       pr_info("Samples version: %d\n", QUADD_SAMPLES_VERSION);
+       pr_info("IO version: %d\n", QUADD_IO_VERSION);
+
+#ifdef QM_DEBUG_SAMPLES_ENABLE
+       pr_info("############## DEBUG VERSION! ##############\n");
+#endif
+       atomic_set(&ctx.started, 0);
+
+       get_default_properties();
+
+       ctx.pmu_info.active = 0;
+       ctx.pl310_info.active = 0;
+
+       ctx.pmu = quadd_armv7_pmu_init();
+       if (!ctx.pmu) {
+               pr_err("PMU init failed\n");
+               return -ENODEV;
+       } else {
+               events = ctx.pmu_info.supported_events;
+               nr_events = ctx.pmu->get_supported_events(events);
+               ctx.pmu_info.nr_supported_events = nr_events;
+
+               pr_info("PMU: amount of events: %d\n", nr_events);
+
+               for (i = 0; i < nr_events; i++)
+                       pr_info("PMU event: %s\n",
+                               quadd_get_event_str(events[i]));
+       }
+
+       ctx.pl310 = quadd_l2x0_events_init();
+       if (ctx.pl310) {
+               events = ctx.pl310_info.supported_events;
+               nr_events = ctx.pl310->get_supported_events(events);
+               ctx.pl310_info.nr_supported_events = nr_events;
+
+               pr_info("pl310 success, amount of events: %d\n",
+                       nr_events);
+
+               for (i = 0; i < nr_events; i++)
+                       pr_info("pl310 event: %s\n",
+                               quadd_get_event_str(events[i]));
+       } else {
+               pr_info("PL310 not found\n");
+       }
+
+       ctx.hrt = quadd_hrt_init(&ctx);
+       if (!ctx.hrt) {
+               pr_err("error: HRT init failed\n");
+               return -ENODEV;
+       }
+
+       ctx.mmap = quadd_mmap_init(&ctx);
+       if (!ctx.mmap) {
+               pr_err("error: MMAP init failed\n");
+               return -ENODEV;
+       }
+
+       err = quadd_power_clk_init(&ctx);
+       if (err < 0) {
+               pr_err("error: POWER CLK init failed\n");
+               return err;
+       }
+
+       ctx.comm = quadd_comm_events_init(&control);
+       if (!ctx.comm) {
+               pr_err("error: COMM init failed\n");
+               return -ENODEV;
+       }
+
+       err = quadd_auth_init(&ctx);
+       if (err < 0) {
+               pr_err("error: auth failed\n");
+               return err;
+       }
+
+       return 0;
+}
+
+static void __exit quadd_module_exit(void)
+{
+       pr_info("QuadD module exit\n");
+
+       quadd_hrt_deinit();
+       quadd_mmap_deinit();
+       quadd_power_clk_deinit();
+       quadd_comm_events_exit();
+       quadd_auth_deinit();
+}
+
+module_init(quadd_module_init);
+module_exit(quadd_module_exit);
+
+MODULE_LICENSE("GPL");
+
+MODULE_AUTHOR("Nvidia Ltd");
+MODULE_DESCRIPTION("Tegra profiler");
diff --git a/drivers/misc/tegra-profiler/mmap.c b/drivers/misc/tegra-profiler/mmap.c
new file mode 100644 (file)
index 0000000..a52b11f
--- /dev/null
@@ -0,0 +1,236 @@
+/*
+ * drivers/misc/tegra-profiler/mmap.c
+ *
+ * Copyright (c) 2013, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <linux/crc32.h>
+#include <linux/fs.h>
+#include <linux/slab.h>
+
+#include <linux/tegra_profiler.h>
+
+#include "mmap.h"
+#include "hrt.h"
+#include "debug.h"
+
+static struct quadd_mmap_ctx mmap_ctx;
+
+static int binary_search_and_add(unsigned int *array,
+                       unsigned int length, unsigned int key)
+{
+       unsigned int i_min, i_max, mid;
+
+       if (length == 0) {
+               array[0] = key;
+               return 1;
+       } else if (length == 1 && array[0] == key) {
+               return 0;
+       }
+
+       i_min = 0;
+       i_max = length;
+
+       if (array[0] > key) {
+               memmove((char *)((unsigned int *)array + 1), array,
+                       length * sizeof(unsigned int));
+               array[0] = key;
+               return 1;
+       } else if (array[length - 1] < key) {
+               array[length] = key;
+               return 1;
+       }
+
+       while (i_min < i_max) {
+               mid = i_min + (i_max - i_min) / 2;
+
+               if (key <= array[mid])
+                       i_max = mid;
+               else
+                       i_min = mid + 1;
+       }
+
+       if (array[i_max] == key) {
+               return 0;
+       } else {
+               memmove((char *)((unsigned int *)array + i_max + 1),
+                       (char *)((unsigned int *)array + i_max),
+                       (length - i_max) * sizeof(unsigned int));
+               array[i_max] = key;
+               return 1;
+       }
+}
+
+static int check_hash(u32 key)
+{
+       int res;
+       unsigned long flags;
+
+       spin_lock_irqsave(&mmap_ctx.lock, flags);
+
+       if (mmap_ctx.nr_hashes >= QUADD_MMAP_SIZE_ARRAY) {
+               spin_unlock_irqrestore(&mmap_ctx.lock, flags);
+               return 1;
+       }
+
+       res = binary_search_and_add(mmap_ctx.hash_array,
+                                   mmap_ctx.nr_hashes, key);
+       if (res > 0) {
+               mmap_ctx.nr_hashes++;
+               spin_unlock_irqrestore(&mmap_ctx.lock, flags);
+               return 0;
+       }
+
+       spin_unlock_irqrestore(&mmap_ctx.lock, flags);
+       return 1;
+}
+
+char *quadd_get_mmap(struct quadd_cpu_context *cpu_ctx,
+                    struct pt_regs *regs, struct quadd_mmap_data *sample,
+                    unsigned int *extra_length)
+{
+       struct mm_struct *mm = current->mm;
+       struct vm_area_struct *vma;
+       struct file *vm_file;
+       struct path *path;
+       char *file_name = NULL;
+       int length, length_aligned;
+       u32 crc;
+       unsigned long ip;
+
+       if (!mm) {
+               *extra_length = 0;
+               return NULL;
+       }
+
+       ip = instruction_pointer(regs);
+
+       if (user_mode(regs)) {
+               for (vma = find_vma(mm, ip); vma; vma = vma->vm_next) {
+                       if (ip < vma->vm_start || ip >= vma->vm_end)
+                               continue;
+
+                       vm_file = vma->vm_file;
+                       if (!vm_file)
+                               break;
+
+                       path = &vm_file->f_path;
+
+                       file_name = d_path(path, mmap_ctx.tmp_buf, PATH_MAX);
+                       if (file_name) {
+                               sample->addr = vma->vm_start;
+                               sample->len = vma->vm_end - vma->vm_start;
+                               sample->pgoff =
+                                       (u64)vma->vm_pgoff << PAGE_SHIFT;
+                       }
+                       break;
+               }
+       } else {
+               struct module *mod;
+
+               preempt_disable();
+               mod = __module_address(ip);
+               preempt_enable();
+
+               if (mod) {
+                       file_name = mod->name;
+                       if (file_name) {
+                               sample->addr = (u32) mod->module_core;
+                               sample->len = mod->core_size;
+                               sample->pgoff = 0;
+                       }
+               }
+       }
+
+       if (file_name) {
+               length = strlen(file_name);
+               if (length >= PATH_MAX) {
+                       *extra_length = 0;
+                       return NULL;
+               }
+
+               crc = crc32_le(~0, file_name, length);
+               crc = crc32_le(crc, (unsigned char *)&sample->addr,
+                              sizeof(sample->addr));
+               crc = crc32_le(crc, (unsigned char *)&sample->len,
+                              sizeof(sample->len));
+
+               if (!check_hash(crc)) {
+                       strcpy(cpu_ctx->mmap_filename, file_name);
+                       length_aligned = (length + 1 + 7) & (~7);
+                       *extra_length = length_aligned;
+
+                       return cpu_ctx->mmap_filename;
+               }
+       }
+
+       *extra_length = 0;
+       return NULL;
+}
+
+struct quadd_mmap_ctx *quadd_mmap_init(struct quadd_ctx *quadd_ctx)
+{
+       u32 *hash;
+       char *tmp;
+
+       mmap_ctx.quadd_ctx = quadd_ctx;
+
+       hash = kzalloc(QUADD_MMAP_SIZE_ARRAY * sizeof(unsigned int),
+                      GFP_KERNEL);
+       if (!hash) {
+               pr_err("Alloc error\n");
+               return NULL;
+       }
+       mmap_ctx.hash_array = hash;
+
+       mmap_ctx.nr_hashes = 0;
+       spin_lock_init(&mmap_ctx.lock);
+
+       tmp = kzalloc(PATH_MAX + sizeof(unsigned long long),
+                     GFP_KERNEL);
+       if (!tmp) {
+               pr_err("Alloc error\n");
+               return NULL;
+       }
+       mmap_ctx.tmp_buf = tmp;
+
+       return &mmap_ctx;
+}
+
+void quadd_mmap_reset(void)
+{
+       unsigned long flags;
+
+       spin_lock_irqsave(&mmap_ctx.lock, flags);
+       mmap_ctx.nr_hashes = 0;
+       spin_unlock_irqrestore(&mmap_ctx.lock, flags);
+}
+
+void quadd_mmap_deinit(void)
+{
+       unsigned long flags;
+
+       spin_lock_irqsave(&mmap_ctx.lock, flags);
+
+       kfree(mmap_ctx.hash_array);
+       mmap_ctx.hash_array = NULL;
+
+       kfree(mmap_ctx.tmp_buf);
+       mmap_ctx.tmp_buf = NULL;
+
+       spin_unlock_irqrestore(&mmap_ctx.lock, flags);
+}
diff --git a/drivers/misc/tegra-profiler/mmap.h b/drivers/misc/tegra-profiler/mmap.h
new file mode 100644 (file)
index 0000000..f12ec4d
--- /dev/null
@@ -0,0 +1,47 @@
+/*
+ * drivers/misc/tegra-profiler/mmap.h
+ *
+ * Copyright (c) 2013, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#ifndef __QUADD_MMAP_H
+#define __QUADD_MMAP_H
+
+#include <linux/types.h>
+
+struct quadd_cpu_context;
+struct quadd_ctx;
+struct quadd_mmap_data;
+
+#define QUADD_MMAP_SIZE_ARRAY  4096
+
+struct quadd_mmap_ctx {
+       u32 *hash_array;
+       unsigned int nr_hashes;
+       spinlock_t lock;
+
+       char *tmp_buf;
+
+       struct quadd_ctx *quadd_ctx;
+};
+
+char *quadd_get_mmap(struct quadd_cpu_context *cpu_ctx,
+                    struct pt_regs *regs, struct quadd_mmap_data *sample,
+                    unsigned int *extra_length);
+
+
+struct quadd_mmap_ctx *quadd_mmap_init(struct quadd_ctx *quadd_ctx);
+void quadd_mmap_deinit(void);
+void quadd_mmap_reset(void);
+
+#endif  /* __QUADD_MMAP_H */
diff --git a/drivers/misc/tegra-profiler/pl310.c b/drivers/misc/tegra-profiler/pl310.c
new file mode 100644 (file)
index 0000000..0108308
--- /dev/null
@@ -0,0 +1,317 @@
+/*
+ * drivers/misc/tegra-profiler/pl310.c
+ *
+ * Copyright (c) 2013, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/io.h>
+#include <asm/hardware/cache-l2x0.h>
+
+#include <linux/tegra_profiler.h>
+
+#include "quadd.h"
+#include "pl310.h"
+#include "debug.h"
+
+DEFINE_PER_CPU(u32, pl310_prev_val);
+
+static struct l2x0_context l2x0_ctx;
+
+static void l2x0_enable_event_counters(u32 event0, u32 event1)
+{
+       u32 reg_val;
+       void __iomem *base = l2x0_ctx.l2x0_base;
+
+       /* configure counter0 */
+       reg_val = event0;
+       writel_relaxed(reg_val, base + L2X0_EVENT_CNT0_CFG);
+
+       /* configure counter1 */
+       reg_val = event1;
+       writel_relaxed(reg_val, base + L2X0_EVENT_CNT1_CFG);
+
+       /* enable event counting */
+       reg_val = L2X0_EVENT_CNT_ENABLE;
+       writel_relaxed(reg_val, base + L2X0_EVENT_CNT_CTRL);
+}
+
+static void __maybe_unused l2x0_disable_event_counters(void)
+{
+       u32 reg_val;
+       void __iomem *base = l2x0_ctx.l2x0_base;
+
+       /* disable event counting */
+       reg_val = 0;
+       writel_relaxed(reg_val, base + L2X0_EVENT_CNT_CTRL);
+}
+
+static void l2x0_stop_event_counters(void)
+{
+       void __iomem *base = l2x0_ctx.l2x0_base;
+
+       writel_relaxed(0, base + L2X0_EVENT_CNT_CTRL);
+
+       writel_relaxed(0, base + L2X0_EVENT_CNT0_CFG);
+       writel_relaxed(0, base + L2X0_EVENT_CNT1_CFG);
+}
+
+static void l2x0_reset_event_counters(void)
+{
+       u32 reg_val;
+       void __iomem *base = l2x0_ctx.l2x0_base;
+
+       reg_val = readl_relaxed(base + L2X0_EVENT_CNT_CTRL);
+       reg_val |= L2X0_EVENT_CNT_RESET_CNT0 | L2X0_EVENT_CNT_RESET_CNT1;
+       writel_relaxed(reg_val, base + L2X0_EVENT_CNT_CTRL);
+}
+
+static u32 l2x0_read_event_counter(enum quadd_l2x0_counter counter)
+{
+       u32 reg_val = 0;
+       void __iomem *base = l2x0_ctx.l2x0_base;
+
+       switch (counter) {
+       case QUADD_L2X0_COUNTER0:
+               reg_val = readl_relaxed(base + L2X0_EVENT_CNT0_VAL);
+               break;
+       case QUADD_L2X0_COUNTER1:
+               reg_val = readl_relaxed(base + L2X0_EVENT_CNT1_VAL);
+               break;
+       }
+
+       return reg_val;
+}
+
+static void l2x0_enable_perf_event(enum quadd_l2x0_event_type type)
+{
+       l2x0_reset_event_counters();
+
+       switch (type) {
+       case QUADD_L2X0_TYPE_DATA_READ_MISSES:
+               l2x0_enable_event_counters(L2X0_EVENT_CNT_CFG_DRREQ,
+                                          L2X0_EVENT_CNT_CFG_DRHIT);
+               break;
+       case QUADD_L2X0_TYPE_DATA_WRITE_MISSES:
+               l2x0_enable_event_counters(L2X0_EVENT_CNT_CFG_DWREQ,
+                                          L2X0_EVENT_CNT_CFG_DWHIT);
+               break;
+       case QUADD_L2X0_TYPE_INSTRUCTION_MISSES:
+               l2x0_enable_event_counters(L2X0_EVENT_CNT_CFG_IRREQ,
+                                          L2X0_EVENT_CNT_CFG_IRHIT);
+               break;
+       }
+}
+
+static u32 l2x0_read_perf_event(void)
+{
+       u32 count_req, count_hit, count_miss;
+
+       count_req = l2x0_read_event_counter(QUADD_L2X0_COUNTER0);
+       count_hit = l2x0_read_event_counter(QUADD_L2X0_COUNTER1);
+
+       count_miss = count_req - count_hit;
+       if (count_req < count_hit)
+               return 0;
+
+       return count_miss;
+}
+
+static void l2x0_clear_values(void)
+{
+       int cpu_id;
+       for (cpu_id = 0; cpu_id < nr_cpu_ids; cpu_id++)
+               per_cpu(pl310_prev_val, cpu_id) = 0;
+}
+
+static int l2x0_events_enable(void)
+{
+       return 0;
+}
+
+static void l2x0_events_disable(void)
+{
+}
+
+static void l2x0_events_start(void)
+{
+       unsigned long flags;
+
+       if (l2x0_ctx.l2x0_event_type < 0)
+               return;
+
+       spin_lock_irqsave(&l2x0_ctx.lock, flags);
+       l2x0_clear_values();
+       l2x0_enable_perf_event(l2x0_ctx.l2x0_event_type);
+       spin_unlock_irqrestore(&l2x0_ctx.lock, flags);
+
+       qm_debug_start_source(QUADD_EVENT_SOURCE_PL310);
+}
+
+static void l2x0_events_stop(void)
+{
+       unsigned long flags;
+
+       if (l2x0_ctx.l2x0_event_type < 0)
+               return;
+
+       spin_lock_irqsave(&l2x0_ctx.lock, flags);
+       l2x0_stop_event_counters();
+       l2x0_clear_values();
+       spin_unlock_irqrestore(&l2x0_ctx.lock, flags);
+
+       qm_debug_stop_source(QUADD_EVENT_SOURCE_PL310);
+}
+
+static int __maybe_unused l2x0_events_read(struct event_data *events)
+{
+       unsigned long flags;
+
+       if (l2x0_ctx.l2x0_event_type < 0) {
+               pr_err_once("pl310 value: %u\n", events[0].val);
+               return 0;
+       }
+
+       events[0].event_source = QUADD_EVENT_SOURCE_PL310;
+       events[0].event_id = l2x0_ctx.event_id;
+
+       spin_lock_irqsave(&l2x0_ctx.lock, flags);
+       events[0].val = l2x0_read_perf_event();
+       spin_unlock_irqrestore(&l2x0_ctx.lock, flags);
+
+       events[0].prev_val = __get_cpu_var(pl310_prev_val);
+
+       __get_cpu_var(pl310_prev_val) = events[0].val;
+
+       qm_debug_read_counter(l2x0_ctx.event_id, events[0].prev_val,
+                             events[0].val);
+
+       return 1;
+}
+
+static int __maybe_unused l2x0_events_read_emulate(struct event_data *events)
+{
+       static u32 val;
+
+       if (val > 100)
+               val = 0;
+
+       events[0].event_source = QUADD_EVENT_SOURCE_PL310;
+       events[0].event_id = QUADD_L2X0_TYPE_DATA_READ_MISSES;
+
+       events[0].val = val;
+       events[0].prev_val = __get_cpu_var(pl310_prev_val);
+
+       __get_cpu_var(pl310_prev_val) = val;
+
+       val += 10;
+
+       return 1;
+}
+
+static int l2x0_set_events(int *events, int size)
+{
+       if (!events || size == 0) {
+               l2x0_ctx.l2x0_event_type = -1;
+               l2x0_ctx.event_id = -1;
+               return 0;
+       }
+
+       if (size != 1) {
+               pr_err("Error: number of events more than one\n");
+               return -ENOSPC;
+       }
+
+       switch (*events) {
+       case QUADD_EVENT_TYPE_L2_DCACHE_READ_MISSES:
+               l2x0_ctx.l2x0_event_type = QUADD_L2X0_TYPE_DATA_READ_MISSES;
+               break;
+       case QUADD_EVENT_TYPE_L2_DCACHE_WRITE_MISSES:
+               l2x0_ctx.l2x0_event_type = QUADD_L2X0_TYPE_DATA_WRITE_MISSES;
+               break;
+       case QUADD_EVENT_TYPE_L2_ICACHE_MISSES:
+               l2x0_ctx.l2x0_event_type = QUADD_L2X0_TYPE_INSTRUCTION_MISSES;
+               break;
+       default:
+               pr_err("Error event: %s\n", quadd_get_event_str(*events));
+               return 1;
+       }
+       l2x0_ctx.event_id = *events;
+
+       pr_info("Event has been added: id/l2x0: %s/%#x\n",
+               quadd_get_event_str(*events), l2x0_ctx.l2x0_event_type);
+       return 0;
+}
+
+static int get_supported_events(int *events)
+{
+       events[0] = QUADD_EVENT_TYPE_L2_DCACHE_READ_MISSES;
+       events[1] = QUADD_EVENT_TYPE_L2_DCACHE_WRITE_MISSES;
+       events[2] = QUADD_EVENT_TYPE_L2_ICACHE_MISSES;
+       return 3;
+}
+
+static struct quadd_event_source_interface l2x0_int = {
+       .enable                 = l2x0_events_enable,
+       .disable                = l2x0_events_disable,
+
+       .start                  = l2x0_events_start,
+       .stop                   = l2x0_events_stop,
+
+#ifndef QUADD_USE_EMULATE_COUNTERS
+       .read                   = l2x0_events_read,
+#else
+       .read                   = l2x0_events_read_emulate,
+#endif
+       .set_events             = l2x0_set_events,
+       .get_supported_events   = get_supported_events,
+};
+
+struct quadd_event_source_interface *quadd_l2x0_events_init(void)
+{
+       void __iomem *base;
+       unsigned long phys_addr;
+
+       l2x0_ctx.l2x0_event_type = -1;
+       l2x0_ctx.event_id = -1;
+
+       l2x0_ctx.l2x0_base = NULL;
+
+       phys_addr = quadd_get_pl310_phys_addr();
+       if (!phys_addr)
+               return NULL;
+
+       base = ioremap(phys_addr, SZ_4K);
+       if (base) {
+               u32 cache_id = readl(base + L2X0_CACHE_ID);
+
+               if ((cache_id & 0xff0003c0) != 0x410000c0) {
+                       iounmap(base);
+                       return NULL;
+               }
+       }
+
+       if (!base)
+               return NULL;
+
+       l2x0_ctx.l2x0_base = base;
+
+       l2x0_clear_values();
+       spin_lock_init(&l2x0_ctx.lock);
+
+       pr_debug("pl310 init success, l2x0_base: %p\n", base);
+       return &l2x0_int;
+}
diff --git a/drivers/misc/tegra-profiler/pl310.h b/drivers/misc/tegra-profiler/pl310.h
new file mode 100644 (file)
index 0000000..96e60bf
--- /dev/null
@@ -0,0 +1,79 @@
+/*
+ * drivers/misc/tegra-profiler/pl310.h
+ *
+ * Copyright (c) 2013, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#ifndef __QUADD_PL310_H
+#define __QUADD_PL310_H
+
+/*
+ * l2x0 event type
+ */
+enum quadd_l2x0_event_type {
+       QUADD_L2X0_TYPE_DATA_READ_MISSES        = 0,
+       QUADD_L2X0_TYPE_DATA_WRITE_MISSES       = 1,
+       QUADD_L2X0_TYPE_INSTRUCTION_MISSES      = 2,
+};
+
+#ifdef __KERNEL__
+
+#include <linux/io.h>
+
+#define L2X0_EVENT_CNT_ENABLE          (1 << 0)
+#define L2X0_EVENT_CNT_RESET_CNT0      (1 << 1)
+#define L2X0_EVENT_CNT_RESET_CNT1      (2 << 1)
+
+
+#define L2X0_EVENT_CNT_CFG_DRHIT       (2 << 2)
+#define L2X0_EVENT_CNT_CFG_DRREQ       (3 << 2)
+
+#define L2X0_EVENT_CNT_CFG_DWHIT       (4 << 2)
+#define L2X0_EVENT_CNT_CFG_DWREQ       (5 << 2)
+
+#define L2X0_EVENT_CNT_CFG_IRHIT       (7 << 2)
+#define L2X0_EVENT_CNT_CFG_IRREQ       (8 << 2)
+
+/*
+ * l2x0 counters
+ */
+enum quadd_l2x0_counter {
+       QUADD_L2X0_COUNTER1 = 0,
+       QUADD_L2X0_COUNTER0 = 1,
+};
+
+struct l2x0_context {
+       int l2x0_event_type;
+       int event_id;
+
+       void __iomem *l2x0_base;
+       spinlock_t lock;
+};
+
+struct quadd_event_source_interface;
+
+struct quadd_event_source_interface *quadd_l2x0_events_init(void);
+
+static inline unsigned long quadd_get_pl310_phys_addr(void)
+{
+       unsigned long phys_addr = 0;
+
+#if defined(CONFIG_ARCH_TEGRA)
+       phys_addr = 0x50043000;
+#endif
+       return phys_addr;
+}
+
+#endif  /* __KERNEL__ */
+
+#endif /* __QUADD_PL310_H */
diff --git a/drivers/misc/tegra-profiler/power_clk.c b/drivers/misc/tegra-profiler/power_clk.c
new file mode 100644 (file)
index 0000000..b5b73f4
--- /dev/null
@@ -0,0 +1,454 @@
+/*
+ * drivers/misc/tegra-profiler/power_clk.c
+ *
+ * Copyright (c) 2013, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/module.h>
+#include <linux/cpufreq.h>
+#include <linux/clk.h>
+#include <linux/notifier.h>
+#include <linux/cpu.h>
+#include <linux/timer.h>
+
+#include <linux/tegra_profiler.h>
+
+#include "power_clk.h"
+#include "quadd.h"
+#include "hrt.h"
+#include "comm.h"
+#include "debug.h"
+
+#define POWER_CLK_MAX_VALUES   32
+
+typedef int (*notifier_call_ft)(struct notifier_block *, unsigned long, void *);
+
+struct power_clk_data {
+       unsigned long value;
+       unsigned long prev;
+};
+
+struct power_clk_source {
+       int type;
+
+       struct clk *clkp;
+       struct notifier_block nb;
+
+       int nr;
+       struct power_clk_data data[POWER_CLK_MAX_VALUES];
+
+       unsigned long long counter;
+       atomic_t active;
+
+       struct mutex lock;
+};
+
+struct power_clk_context_s {
+       struct power_clk_source cpu;
+       struct power_clk_source gpu;
+       struct power_clk_source emc;
+
+       struct timer_list timer;
+       unsigned int period;
+
+       struct quadd_ctx *quadd_ctx;
+};
+
+enum {
+       QUADD_POWER_CLK_CPU = 1,
+       QUADD_POWER_CLK_GPU,
+       QUADD_POWER_CLK_EMC,
+};
+
+static struct power_clk_context_s power_ctx;
+
+static void check_clks(void);
+
+static void read_source(struct power_clk_source *s)
+{
+       int i;
+
+       mutex_lock(&s->lock);
+
+       switch (s->type) {
+       case QUADD_POWER_CLK_CPU:
+               /* update cpu frequency */
+               for (i = 0; i < nr_cpu_ids; i++)
+                       s->data[i].value = cpufreq_get(i);
+               break;
+
+       case QUADD_POWER_CLK_GPU:
+               /* update gpu frequency */
+               s->clkp = clk_get_sys("3d", NULL);
+               if (s->clkp) {
+                       s->data[0].value =
+                               clk_get_rate(s->clkp) / 1000;
+                       clk_put(s->clkp);
+               }
+               break;
+
+       case QUADD_POWER_CLK_EMC:
+               /* update emc frequency */
+               s->clkp = clk_get_sys("cpu", "emc");
+               if (s->clkp) {
+                       s->data[0].value =
+                               clk_get_rate(s->clkp) / 1000;
+                       clk_put(s->clkp);
+               }
+               break;
+
+       default:
+               BUG();
+       }
+
+       mutex_unlock(&s->lock);
+       s->counter++;
+}
+
+static int
+gpu_notifier_call(struct notifier_block *nb, unsigned long val, void *ptr)
+{
+       read_source(&power_ctx.gpu);
+       check_clks();
+
+       return 0;
+}
+
+static int
+emc_notifier_call(struct notifier_block *nb, unsigned long val, void *ptr)
+{
+       read_source(&power_ctx.emc);
+       check_clks();
+
+       return 0;
+}
+
+static int
+cpu_notifier_call(struct notifier_block *nb, unsigned long val, void *ptr)
+{
+       read_source(&power_ctx.cpu);
+
+#ifndef CONFIG_COMMON_CLK
+       read_source(&power_ctx.gpu);
+       read_source(&power_ctx.emc);
+#endif
+
+       check_clks();
+
+       return 0;
+}
+
+static void make_sample(void)
+{
+       int i;
+       u32 extra_cpus[NR_CPUS];
+       struct power_clk_source *s;
+
+       struct quadd_record_data record;
+       struct quadd_power_rate_data *power_rate = &record.power_rate;
+       struct quadd_comm_data_interface *comm = power_ctx.quadd_ctx->comm;
+
+       record.magic = QUADD_RECORD_MAGIC;
+       record.record_type = QUADD_RECORD_TYPE_POWER_RATE;
+       record.cpu_mode = QUADD_CPU_MODE_NONE;
+
+       power_rate->time = quadd_get_time();
+
+       s = &power_ctx.cpu;
+       mutex_lock(&s->lock);
+       if (atomic_read(&s->active)) {
+               power_rate->nr_cpus = s->nr;
+               for (i = 0; i < s->nr; i++)
+                       extra_cpus[i] = s->data[i].value;
+       } else {
+               power_rate->nr_cpus = 0;
+       }
+       mutex_unlock(&s->lock);
+
+       s = &power_ctx.gpu;
+       mutex_lock(&s->lock);
+       if (atomic_read(&s->active))
+               power_rate->gpu = s->data[0].value;
+       else
+               power_rate->gpu = 0;
+
+       mutex_unlock(&s->lock);
+
+       s = &power_ctx.emc;
+       mutex_lock(&s->lock);
+       if (atomic_read(&s->active))
+               power_rate->emc = s->data[0].value;
+       else
+               power_rate->emc = 0;
+
+       mutex_unlock(&s->lock);
+/*
+       pr_debug("make_sample: cpu: %u/%u/%u/%u, gpu: %u, emc: %u\n",
+                extra_cpus[0], extra_cpus[1], extra_cpus[2], extra_cpus[3],
+                power_rate->gpu, power_rate->emc);
+*/
+       comm->put_sample(&record, (char *)extra_cpus,
+                        power_rate->nr_cpus * sizeof(extra_cpus[0]));
+}
+
+static inline int is_data_changed(struct power_clk_source *s)
+{
+       int i;
+
+       mutex_lock(&s->lock);
+       for (i = 0; i < s->nr; i++) {
+               if (s->data[i].value != s->data[i].prev) {
+                       mutex_unlock(&s->lock);
+                       return 1;
+               }
+       }
+       mutex_unlock(&s->lock);
+
+       return 0;
+}
+
+static inline void update_data(struct power_clk_source *s)
+{
+       int i;
+
+       mutex_lock(&s->lock);
+
+       for (i = 0; i < s->nr; i++)
+               s->data[i].prev = s->data[i].value;
+
+       mutex_unlock(&s->lock);
+}
+
+static void check_clks(void)
+{
+       int changed = 0;
+
+       if (is_data_changed(&power_ctx.cpu)) {
+               update_data(&power_ctx.cpu);
+               changed = 1;
+       }
+
+       if (is_data_changed(&power_ctx.gpu)) {
+               update_data(&power_ctx.gpu);
+               changed = 1;
+       }
+
+       if (is_data_changed(&power_ctx.emc)) {
+               update_data(&power_ctx.emc);
+               changed = 1;
+       }
+/*
+       pr_debug("cpu: %lu/%lu/%lu/%lu, gpu: %lu, emc: %lu, changed: %s\n",
+                power_ctx.cpu.data[0].value, power_ctx.cpu.data[1].value,
+                power_ctx.cpu.data[2].value, power_ctx.cpu.data[3].value,
+                power_ctx.gpu.data[0].value, power_ctx.emc.data[0].value,
+                changed ? "yes" : "no");
+*/
+       if (changed)
+               make_sample();
+}
+
+static void reset_data(struct power_clk_source *s)
+{
+       int i;
+
+       mutex_lock(&s->lock);
+       for (i = 0; i < s->nr; i++) {
+               s->data[i].value = 0;
+               s->data[i].prev = 0;
+       }
+       atomic_set(s, 0);
+       mutex_unlock(&s->lock);
+}
+
+static void init_source(struct power_clk_source *s,
+                       notifier_call_ft notifier,
+                       int nr_values,
+                       int type)
+{
+       s->type = type;
+       s->nb.notifier_call = notifier;
+       s->nr = nr_values;
+
+       mutex_init(&s->lock);
+       reset_data(s);
+}
+
+static void
+power_clk_work_func(struct work_struct *dummy)
+{
+#ifndef CONFIG_COMMON_CLK
+       read_source(&power_ctx.gpu);
+       read_source(&power_ctx.emc);
+
+       check_clks();
+#endif
+}
+
+static DECLARE_WORK(power_clk_work, power_clk_work_func);
+
+static void power_clk_timer(unsigned long data)
+{
+       struct timer_list *timer = &power_ctx.timer;
+
+       schedule_work(&power_clk_work);
+       timer->expires = jiffies + msecs_to_jiffies(power_ctx.period);
+       add_timer(timer);
+}
+
+int quadd_power_clk_is_enabled(int *period)
+{
+       struct quadd_parameters *param = &power_ctx.quadd_ctx->param;
+
+       *period = power_ctx.period;
+
+       if (param->power_rate_freq == 0)
+               return 0;
+
+       return 1;
+}
+
+int quadd_power_clk_start(void)
+{
+       struct power_clk_source *s;
+       int status;
+       struct timer_list *timer = &power_ctx.timer;
+       struct quadd_parameters *param = &power_ctx.quadd_ctx->param;
+
+       if (param->power_rate_freq == 0) {
+               pr_info("power_clk is not started\n");
+               return 0;
+       }
+
+#ifdef CONFIG_COMMON_CLK
+       power_ctx.period = 0;
+#else
+       power_ctx.period = MSEC_PER_SEC / param->power_rate_freq;
+#endif
+       pr_info("power_clk: start, freq: %d\n",
+               param->power_rate_freq);
+
+       /* setup gpu frequency */
+       s = &power_ctx.gpu;
+       s->clkp = clk_get_sys("3d", NULL);
+       if (s->clkp) {
+#ifdef CONFIG_COMMON_CLK
+               status = clk_notifier_register(s->clkp, s->nb);
+               if (status < 0) {
+                       pr_err("error: could not setup gpu freq\n");
+                       return status;
+               }
+               clk_put(s->clkp);
+#endif
+               reset_data(s);
+               atomic_set(&s->active, 1);
+       } else {
+               pr_err("error: could not setup gpu freq\n");
+               atomic_set(&s->active, 0);
+       }
+
+       /* setup emc frequency */
+       s = &power_ctx.emc;
+       s->clkp = clk_get_sys("cpu", "emc");
+       if (s->clkp) {
+#ifdef CONFIG_COMMON_CLK
+               status = clk_notifier_register(s->clkp, s->nb);
+               if (status < 0) {
+                       pr_err("error: could not setup emc freq\n");
+                       return status;
+               }
+               clk_put(s->clkp);
+#endif
+               reset_data(s);
+               atomic_set(&s->active, 1);
+       } else {
+               pr_err("error: could not setup emc freq\n");
+               atomic_set(&s->active, 0);
+       }
+
+       /* setup cpu frequency notifier */
+       s = &power_ctx.cpu;
+       status = register_cpu_notifier(&s->nb);
+       if (status < 0) {
+               pr_err("error: could not setup cpu freq\n");
+               return status;
+       }
+       reset_data(s);
+
+       if (power_ctx.period > 0) {
+               init_timer(timer);
+               timer->function = power_clk_timer;
+               timer->expires = jiffies + msecs_to_jiffies(power_ctx.period);
+               timer->data = 0;
+               add_timer(timer);
+       }
+
+       atomic_set(&s->active, 1);
+
+       return 0;
+}
+
+void quadd_power_clk_stop(void)
+{
+       struct power_clk_source *s;
+
+       if (power_ctx.quadd_ctx->param.power_rate_freq == 0)
+               return;
+
+       if (power_ctx.period > 0)
+               del_timer_sync(&power_ctx.timer);
+
+       s = &power_ctx.gpu;
+       if (atomic_cmpxchg(&s->active, 1, 0)) {
+#ifdef CONFIG_COMMON_CLK
+               if (s->clkp)
+                       clk_notifier_unregister(s->clkp, &s->nb);
+#endif
+       }
+
+       s = &power_ctx.emc;
+       if (atomic_cmpxchg(&s->active, 1, 0)) {
+#ifdef CONFIG_COMMON_CLK
+               if (s->clkp)
+                       clk_notifier_unregister(s->clkp, &s->nb);
+#endif
+       }
+
+       s = &power_ctx.cpu;
+       if (atomic_cmpxchg(&s->active, 1, 0)) {
+               pr_info("power_clk: stop\n");
+               unregister_cpu_notifier(&s->nb);
+       }
+}
+
+int quadd_power_clk_init(struct quadd_ctx *quadd_ctx)
+{
+       pr_info("power_clk: init\n");
+
+       init_source(&power_ctx.cpu, cpu_notifier_call, nr_cpu_ids,
+                   QUADD_POWER_CLK_CPU);
+       init_source(&power_ctx.gpu, gpu_notifier_call, 1, QUADD_POWER_CLK_GPU);
+       init_source(&power_ctx.emc, emc_notifier_call, 1, QUADD_POWER_CLK_EMC);
+
+       power_ctx.quadd_ctx = quadd_ctx;
+
+       return 0;
+}
+
+void quadd_power_clk_deinit(void)
+{
+       pr_info("power_clk: deinit\n");
+       quadd_power_clk_stop();
+}
diff --git a/drivers/misc/tegra-profiler/power_clk.h b/drivers/misc/tegra-profiler/power_clk.h
new file mode 100644 (file)
index 0000000..6854687
--- /dev/null
@@ -0,0 +1,30 @@
+/*
+ * drivers/misc/tegra-profiler/power_clk.h
+ *
+ * Copyright (c) 2013, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#ifndef __QUADD_POWER_CLK_H
+#define __QUADD_POWER_CLK_H
+
+struct quadd_ctx;
+
+void quadd_power_clk_deinit(void);
+int quadd_power_clk_init(struct quadd_ctx *quadd_ctx);
+
+int quadd_power_clk_start(void);
+void quadd_power_clk_stop(void);
+
+int quadd_power_clk_is_enabled(int *period);
+
+#endif /* __QUADD_POWER_CLK_H */
diff --git a/drivers/misc/tegra-profiler/quadd.h b/drivers/misc/tegra-profiler/quadd.h
new file mode 100644 (file)
index 0000000..743482d
--- /dev/null
@@ -0,0 +1,62 @@
+/*
+ * drivers/misc/tegra-profiler/quadd.h
+ *
+ * Copyright (c) 2013, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#ifndef __QUADD_H
+#define __QUADD_H
+
+#include <linux/tegra_profiler.h>
+
+/* #define QUADD_USE_EMULATE_COUNTERS  1 */
+
+struct event_data;
+struct quadd_comm_data_interface;
+struct quadd_hrt_ctx;
+struct quadd_mmap_ctx;
+
+struct quadd_event_source_interface {
+       int (*enable)(void);
+       void (*disable)(void);
+       void (*start)(void);
+       void (*stop)(void);
+       int (*read)(struct event_data *events);
+       int (*set_events)(int *events, int size);
+       int (*get_supported_events)(int *events);
+};
+
+struct source_info {
+       int supported_events[QUADD_MAX_COUNTERS];
+       int nr_supported_events;
+
+       int active;
+};
+
+struct quadd_ctx {
+       struct quadd_parameters param;
+
+       struct quadd_event_source_interface *pmu;
+       struct source_info pmu_info;
+
+       struct quadd_event_source_interface *pl310;
+       struct source_info pl310_info;
+
+       struct quadd_comm_data_interface *comm;
+       struct quadd_hrt_ctx *hrt;
+       struct quadd_mmap_ctx *mmap;
+
+       atomic_t started;
+};
+
+#endif /* __QUADD_H */
diff --git a/drivers/misc/tegra-profiler/tegra.h b/drivers/misc/tegra-profiler/tegra.h
new file mode 100644 (file)
index 0000000..013c5ab
--- /dev/null
@@ -0,0 +1,48 @@
+/*
+ * drivers/misc/tegra-profiler/tegra.h
+ *
+ * Copyright (c) 2013, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#ifndef __QUADD_TEGRA_H
+#define __QUADD_TEGRA_H
+
+#include <linux/smp.h>
+
+#ifdef CONFIG_TEGRA_CLUSTER_CONTROL
+#include <linux/io.h>
+#include <../../mach-tegra/pm.h>
+#endif
+
+static inline int quadd_get_processor_id(void)
+{
+       int cpu_id = smp_processor_id();
+
+#ifdef CONFIG_TEGRA_CLUSTER_CONTROL
+       if (is_lp_cluster())
+               cpu_id |= QM_TEGRA_POWER_CLUSTER_LP;
+#endif
+
+       return cpu_id;
+}
+
+static inline int quadd_is_cpu_with_lp_cluster(void)
+{
+#ifdef CONFIG_TEGRA_CLUSTER_CONTROL
+       return 1;
+#else
+       return 0;
+#endif
+}
+
+#endif  /* __QUADD_TEGRA_H */
diff --git a/drivers/misc/tegra-profiler/version.h b/drivers/misc/tegra-profiler/version.h
new file mode 100644 (file)
index 0000000..b5cf277
--- /dev/null
@@ -0,0 +1,23 @@
+/*
+ * drivers/misc/tegra-profiler/hrt.h
+ *
+ * Copyright (c) 2013, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#ifndef __QUADD_VERSION_H
+#define __QUADD_VERSION_H
+
+#define QUADD_MODULE_VERSION           "1.23"
+#define QUADD_MODULE_BRANCH            "Dev"
+
+#endif /* __QUADD_VERSION_H */
diff --git a/include/linux/tegra_profiler.h b/include/linux/tegra_profiler.h
new file mode 100644 (file)
index 0000000..609a78f
--- /dev/null
@@ -0,0 +1,302 @@
+/*
+ * include/linux/tegra_profiler.h
+ *
+ * Copyright (c) 2013, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#ifndef __TEGRA_PROFILER_H
+#define __TEGRA_PROFILER_H
+
+#include <linux/ioctl.h>
+
+#define QUADD_SAMPLES_VERSION  16
+#define QUADD_IO_VERSION       4
+
+#define QUADD_MAX_COUNTERS     32
+#define QUADD_MAX_PROCESS      64
+
+#define QUADD_DEVICE_NAME      "quadd"
+#define QUADD_AUTH_DEVICE_NAME "quadd_auth"
+
+#define QUADD_IOCTL    100
+
+/*
+ * Setup params (profiling frequency, etc.)
+ */
+#define IOCTL_SETUP _IOW(QUADD_IOCTL, 0, struct quadd_parameters)
+
+/*
+ * Start profiling.
+ */
+#define IOCTL_START _IO(QUADD_IOCTL, 1)
+
+/*
+ * Stop profiling.
+ */
+#define IOCTL_STOP _IO(QUADD_IOCTL, 2)
+
+/*
+ * Getting capabilities
+ */
+#define IOCTL_GET_CAP _IOR(QUADD_IOCTL, 3, struct quadd_comm_cap)
+
+/*
+ * Getting state of module
+ */
+#define IOCTL_GET_STATE _IOR(QUADD_IOCTL, 4, struct quadd_module_state)
+
+/*
+ * Getting version of module
+ */
+#define IOCTL_GET_VERSION _IOR(QUADD_IOCTL, 5, struct quadd_module_version)
+
+
+#define QUADD_HRT_SCHED_IN_FUNC                "finish_task_switch"
+
+#define QM_TEGRA_POWER_CLUSTER_LP      (1 << 29) /* LP CPU */
+
+enum quadd_events_id {
+       QUADD_EVENT_TYPE_CPU_CYCLES = 0,
+
+       QUADD_EVENT_TYPE_INSTRUCTIONS,
+       QUADD_EVENT_TYPE_BRANCH_INSTRUCTIONS,
+       QUADD_EVENT_TYPE_BRANCH_MISSES,
+       QUADD_EVENT_TYPE_BUS_CYCLES,
+
+       QUADD_EVENT_TYPE_L1_DCACHE_READ_MISSES,
+       QUADD_EVENT_TYPE_L1_DCACHE_WRITE_MISSES,
+       QUADD_EVENT_TYPE_L1_ICACHE_MISSES,
+
+       QUADD_EVENT_TYPE_L2_DCACHE_READ_MISSES,
+       QUADD_EVENT_TYPE_L2_DCACHE_WRITE_MISSES,
+       QUADD_EVENT_TYPE_L2_ICACHE_MISSES,
+
+       QUADD_EVENT_TYPE_MAX,
+};
+
+#pragma pack(push, 4)
+
+struct event_data {
+       int event_source;
+       int event_id;
+
+       u32 val;
+       u32 prev_val;
+};
+
+enum quadd_record_type {
+       QUADD_RECORD_TYPE_SAMPLE = 1,
+       QUADD_RECORD_TYPE_MMAP,
+       QUADD_RECORD_TYPE_MA,
+       QUADD_RECORD_TYPE_COMM,
+       QUADD_RECORD_TYPE_DEBUG,
+       QUADD_RECORD_TYPE_HEADER,
+       QUADD_RECORD_TYPE_POWER_RATE,
+       QUADD_RECORD_TYPE_ADDITIONAL_SAMPLE,
+};
+
+enum quadd_event_source {
+       QUADD_EVENT_SOURCE_PMU = 1,
+       QUADD_EVENT_SOURCE_PL310,
+};
+
+enum quadd_cpu_mode {
+       QUADD_CPU_MODE_KERNEL = 1,
+       QUADD_CPU_MODE_USER,
+       QUADD_CPU_MODE_NONE,
+};
+
+struct quadd_sample_data {
+       u32 event_id;
+
+       u32 ip;
+       u32 pid;
+       u64 time;
+       u32 cpu;
+       u64 period;
+
+       u32 callchain_nr;
+};
+
+struct quadd_mmap_data {
+       u32 pid;
+       u32 addr;
+       u64 len;
+       u64 pgoff;
+
+       u32 filename_length;
+};
+
+struct quadd_ma_data {
+       u32 pid;
+       u64 time;
+
+       u64 vm_size;
+       u64 rss_size;
+};
+
+struct quadd_power_rate_data {
+       u64 time;
+
+       u32 nr_cpus;
+
+       u32 gpu;
+       u32 emc;
+};
+
+struct quadd_additional_sample {
+       u32 type;
+
+       u32 values[8];
+       u32 extra_length;
+};
+
+enum {
+       QM_DEBUG_SAMPLE_TYPE_SCHED_IN = 1,
+       QM_DEBUG_SAMPLE_TYPE_SCHED_OUT,
+
+       QM_DEBUG_SAMPLE_TYPE_TIMER_HANDLE,
+       QM_DEBUG_SAMPLE_TYPE_TIMER_START,
+       QM_DEBUG_SAMPLE_TYPE_TIMER_CANCEL,
+       QM_DEBUG_SAMPLE_TYPE_TIMER_FORWARD,
+
+       QM_DEBUG_SAMPLE_TYPE_READ_COUNTER,
+
+       QM_DEBUG_SAMPLE_TYPE_SOURCE_START,
+       QM_DEBUG_SAMPLE_TYPE_SOURCE_STOP,
+};
+
+struct quadd_debug_data {
+       u32 type;
+
+       u32 cpu;
+       u32 pid;
+       u64 time;
+
+       u64 timer_period;
+
+       u32 extra_value1;
+       u32 extra_value2;
+       u32 extra_value3;
+};
+
+
+struct quadd_header_data {
+       u32 version;
+
+       u32     backtrace:1,
+               use_freq:1,
+               system_wide:1,
+               power_rate:1,
+               debug_samples:1;
+
+       u64 period;
+       u32 ma_period;
+       u32 power_rate_period;
+
+       u32 reserved[4];        /* reserved fields for future extensions */
+};
+
+#define QUADD_RECORD_MAGIC     0x33557799
+
+struct quadd_record_data {
+       u32 magic;      /* for debug */
+       u32 record_type;
+       u32 cpu_mode;
+
+       union {
+               struct quadd_sample_data        sample;
+               struct quadd_mmap_data          mmap;
+               struct quadd_ma_data            ma;
+               struct quadd_debug_data         debug;
+               struct quadd_header_data        hdr;
+               struct quadd_power_rate_data    power_rate;
+               struct quadd_additional_sample  additional_sample;
+       };
+};
+
+#define QUADD_MAX_PACKAGE_NAME 320
+
+struct quadd_parameters {
+       u32 freq;
+       u32 ma_freq;
+       u32 power_rate_freq;
+
+       u64     backtrace:1,
+               use_freq:1,
+               system_wide:1,
+               debug_samples:1;
+
+       u32 pids[QUADD_MAX_PROCESS];
+       u32 nr_pids;
+
+       u8 package_name[QUADD_MAX_PACKAGE_NAME];
+
+       u32 events[QUADD_MAX_COUNTERS];
+       u32 nr_events;
+
+       u32 reserved[16];       /* reserved fields for future extensions */
+};
+
+struct quadd_events_cap {
+       u32     cpu_cycles:1,
+               instructions:1,
+               branch_instructions:1,
+               branch_misses:1,
+               bus_cycles:1,
+
+               l1_dcache_read_misses:1,
+               l1_dcache_write_misses:1,
+               l1_icache_misses:1,
+
+               l2_dcache_read_misses:1,
+               l2_dcache_write_misses:1,
+               l2_icache_misses:1;
+};
+
+struct quadd_comm_cap {
+       u32     pmu:1,
+               power_rate:1,
+               l2_cache:1,
+               l2_multiple_events:1,
+               tegra_lp_cluster:1,
+               blocked_read:1;
+
+       struct quadd_events_cap events_cap;
+
+       u32 reserved[16];       /* reserved fields for future extensions */
+};
+
+struct quadd_module_state {
+       u64 nr_all_samples;
+       u64 nr_skipped_samples;
+
+       u32 buffer_size;
+       u32 buffer_fill_size;
+
+       u32 reserved[16];       /* reserved fields for future extensions */
+};
+
+struct quadd_module_version {
+       u8 branch[32];
+       u8 version[16];
+
+       u32 samples_version;
+       u32 io_version;
+
+       u32 reserved[4];        /* reserved fields for future extensions */
+};
+
+#pragma pack(pop)
+
+#endif  /* __TEGRA_PROFILER_H */