misc: tegra-profiler: support raw hardware events
Igor Nabirushkin [Wed, 10 May 2017 06:48:52 +0000 (10:48 +0400)]
Support arbitrary raw hardware PMU events.
There are also a few minor changes in procfs output.

Bug 1923017

Change-Id: I490817d4dba10100d7450572835c45dcba8cac32
Signed-off-by: Igor Nabirushkin <inabirushkin@nvidia.com>
Reviewed-on: http://git-master/r/1478869
(cherry picked from commit 83d257b17e00e97b3c2fc93e8a2b142c7074c23f)

12 files changed:
drivers/misc/tegra-profiler/arm_pmu.h
drivers/misc/tegra-profiler/armv7_pmu.c
drivers/misc/tegra-profiler/armv8_pmu.c
drivers/misc/tegra-profiler/debug.c
drivers/misc/tegra-profiler/debug.h
drivers/misc/tegra-profiler/hrt.c
drivers/misc/tegra-profiler/main.c
drivers/misc/tegra-profiler/pl310.c
drivers/misc/tegra-profiler/quadd.h
drivers/misc/tegra-profiler/quadd_proc.c
drivers/misc/tegra-profiler/version.h
include/linux/tegra_profiler.h

index 6071c46..79d1eef 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * drivers/misc/tegra-profiler/arm_pmu.h
  *
- * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2014-2017, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
 
 #include <linux/list.h>
 
+#include <linux/tegra_profiler.h>
+
 #define QUADD_MAX_PMU_COUNTERS 32
 
 struct quadd_pmu_event_info {
-       int quadd_event_id;
+       struct quadd_event event;
        int hw_value;
 
        struct list_head list;
@@ -41,6 +43,7 @@ struct quadd_pmu_ctx {
        struct quadd_arch_info arch;
 
        u32 counters_mask;
+       u32 raw_event_mask;
 
        struct list_head used_events;
 
@@ -48,4 +51,11 @@ struct quadd_pmu_ctx {
        unsigned int *current_map;
 };
 
+static inline int
+is_cpu_cycles(const struct quadd_event *event)
+{
+       return event->type == QUADD_EVENT_TYPE_HARDWARE &&
+              event->id == QUADD_EVENT_HW_CPU_CYCLES;
+}
+
 #endif /* __ARM_PMU_H */
index 98bea24..6a5e73e 100644 (file)
@@ -55,52 +55,52 @@ struct quadd_cntrs_info {
 
 static DEFINE_PER_CPU(struct quadd_pmu_info, cpu_pmu_info);
 
-static unsigned quadd_armv7_a9_events_map[QUADD_EVENT_TYPE_MAX] = {
-       [QUADD_EVENT_TYPE_INSTRUCTIONS] =
+static unsigned quadd_armv7_a9_events_map[QUADD_EVENT_HW_MAX] = {
+       [QUADD_EVENT_HW_INSTRUCTIONS] =
                QUADD_ARMV7_A9_HW_EVENT_INST_OUT_OF_RENAME_STAGE,
-       [QUADD_EVENT_TYPE_BRANCH_INSTRUCTIONS] =
+       [QUADD_EVENT_HW_BRANCH_INSTRUCTIONS] =
                QUADD_ARMV7_HW_EVENT_PC_WRITE,
-       [QUADD_EVENT_TYPE_BRANCH_MISSES] =
+       [QUADD_EVENT_HW_BRANCH_MISSES] =
                QUADD_ARMV7_HW_EVENT_PC_BRANCH_MIS_PRED,
-       [QUADD_EVENT_TYPE_BUS_CYCLES] =
+       [QUADD_EVENT_HW_BUS_CYCLES] =
                QUADD_ARMV7_HW_EVENT_CLOCK_CYCLES,
 
-       [QUADD_EVENT_TYPE_L1_DCACHE_READ_MISSES] =
+       [QUADD_EVENT_HW_L1_DCACHE_READ_MISSES] =
                QUADD_ARMV7_HW_EVENT_DCACHE_REFILL,
-       [QUADD_EVENT_TYPE_L1_DCACHE_WRITE_MISSES] =
+       [QUADD_EVENT_HW_L1_DCACHE_WRITE_MISSES] =
                QUADD_ARMV7_HW_EVENT_DCACHE_REFILL,
-       [QUADD_EVENT_TYPE_L1_ICACHE_MISSES] =
+       [QUADD_EVENT_HW_L1_ICACHE_MISSES] =
                QUADD_ARMV7_HW_EVENT_IFETCH_MISS,
 
-       [QUADD_EVENT_TYPE_L2_DCACHE_READ_MISSES] =
+       [QUADD_EVENT_HW_L2_DCACHE_READ_MISSES] =
                QUADD_ARMV7_UNSUPPORTED_EVENT,
-       [QUADD_EVENT_TYPE_L2_DCACHE_WRITE_MISSES] =
+       [QUADD_EVENT_HW_L2_DCACHE_WRITE_MISSES] =
                QUADD_ARMV7_UNSUPPORTED_EVENT,
-       [QUADD_EVENT_TYPE_L2_ICACHE_MISSES] =
+       [QUADD_EVENT_HW_L2_ICACHE_MISSES] =
                QUADD_ARMV7_UNSUPPORTED_EVENT,
 };
 
-static unsigned quadd_armv7_a15_events_map[QUADD_EVENT_TYPE_MAX] = {
-       [QUADD_EVENT_TYPE_INSTRUCTIONS] =
+static unsigned quadd_armv7_a15_events_map[QUADD_EVENT_HW_MAX] = {
+       [QUADD_EVENT_HW_INSTRUCTIONS] =
                                QUADD_ARMV7_HW_EVENT_INSTR_EXECUTED,
-       [QUADD_EVENT_TYPE_BRANCH_INSTRUCTIONS] =
+       [QUADD_EVENT_HW_BRANCH_INSTRUCTIONS] =
                                QUADD_ARMV7_A15_HW_EVENT_SPEC_PC_WRITE,
-       [QUADD_EVENT_TYPE_BRANCH_MISSES] =
+       [QUADD_EVENT_HW_BRANCH_MISSES] =
                                QUADD_ARMV7_HW_EVENT_PC_BRANCH_MIS_PRED,
-       [QUADD_EVENT_TYPE_BUS_CYCLES] = QUADD_ARMV7_HW_EVENT_BUS_CYCLES,
+       [QUADD_EVENT_HW_BUS_CYCLES] = QUADD_ARMV7_HW_EVENT_BUS_CYCLES,
 
-       [QUADD_EVENT_TYPE_L1_DCACHE_READ_MISSES] =
+       [QUADD_EVENT_HW_L1_DCACHE_READ_MISSES] =
                                QUADD_ARMV7_A15_HW_EVENT_L1_DCACHE_READ_REFILL,
-       [QUADD_EVENT_TYPE_L1_DCACHE_WRITE_MISSES] =
+       [QUADD_EVENT_HW_L1_DCACHE_WRITE_MISSES] =
                                QUADD_ARMV7_A15_HW_EVENT_L1_DCACHE_WRITE_REFILL,
-       [QUADD_EVENT_TYPE_L1_ICACHE_MISSES] =
+       [QUADD_EVENT_HW_L1_ICACHE_MISSES] =
                                QUADD_ARMV7_HW_EVENT_IFETCH_MISS,
 
-       [QUADD_EVENT_TYPE_L2_DCACHE_READ_MISSES] =
+       [QUADD_EVENT_HW_L2_DCACHE_READ_MISSES] =
                                QUADD_ARMV7_A15_HW_EVENT_L2_DCACHE_READ_REFILL,
-       [QUADD_EVENT_TYPE_L2_DCACHE_WRITE_MISSES] =
+       [QUADD_EVENT_HW_L2_DCACHE_WRITE_MISSES] =
                                QUADD_ARMV7_A15_HW_EVENT_L2_DCACHE_WRITE_REFILL,
-       [QUADD_EVENT_TYPE_L2_ICACHE_MISSES] =
+       [QUADD_EVENT_HW_L2_ICACHE_MISSES] =
                                QUADD_ARMV7_UNSUPPORTED_EVENT,
 };
 
@@ -300,9 +300,14 @@ get_free_counters(unsigned long *bitmap, int nbits, int *ccntr)
        return bitmap_weight(bitmap, BITS_PER_BYTE * sizeof(u32)) - cc;
 }
 
-static u32 armv7_pmu_adjust_value(u32 value, int event_id)
+static u32
+armv7_pmu_adjust_value(u32 value, const struct quadd_event *event)
 {
-       struct quadd_pmu_ctx *local_pmu_ctx = &__get_cpu_var(pmu_ctx);
+       struct quadd_pmu_ctx *local_pmu_ctx = this_cpu_ptr(&pmu_ctx);
+
+       if (event->type != QUADD_EVENT_TYPE_HARDWARE)
+               return value;
+
        /*
        * Cortex A8/A9: l1 cache performance counters
        * don't differentiate between read and write data accesses/misses,
@@ -311,10 +316,11 @@ static u32 armv7_pmu_adjust_value(u32 value, int event_id)
        if (local_pmu_ctx->l1_cache_rw &&
            (local_pmu_ctx->arch.type == QUADD_ARM_CPU_TYPE_CORTEX_A8 ||
            local_pmu_ctx->arch.type == QUADD_ARM_CPU_TYPE_CORTEX_A9) &&
-           (event_id == QUADD_EVENT_TYPE_L1_DCACHE_READ_MISSES ||
-           event_id == QUADD_EVENT_TYPE_L1_DCACHE_WRITE_MISSES)) {
+           (event->id == QUADD_EVENT_HW_L1_DCACHE_READ_MISSES ||
+           event->id == QUADD_EVENT_HW_L1_DCACHE_WRITE_MISSES)) {
                return value / 2;
        }
+
        return value;
 }
 
@@ -452,7 +458,7 @@ static void pmu_start(void)
 
                event = ei->hw_value;
 
-               if (ei->quadd_event_id == QUADD_EVENT_TYPE_CPU_CYCLES) {
+               if (is_cpu_cycles(&ei->event)) {
                        if (!ccntr) {
                                pr_err_once("Error: cpu cycles counter is already occupied\n");
                                return;
@@ -523,7 +529,7 @@ pmu_read(struct event_data *events, int max_events)
        list_for_each_entry(ei, &local_pmu_ctx->used_events, list) {
                int index;
 
-               if (ei->quadd_event_id == QUADD_EVENT_TYPE_CPU_CYCLES) {
+               if (is_cpu_cycles(&ei->event)) {
                        if (!test_bit(QUADD_ARMV7_CCNT_BIT, pi->used_cntrs)) {
                                pr_err_once("Error: ccntr is not used\n");
                                return 0;
@@ -541,17 +547,17 @@ pmu_read(struct event_data *events, int max_events)
                }
 
                val = read_counter(index);
-               val = armv7_pmu_adjust_value(val, ei->quadd_event_id);
+               val = armv7_pmu_adjust_value(val, &ei->event);
 
                events->event_source = QUADD_EVENT_SOURCE_PMU;
-               events->event_id = ei->quadd_event_id;
+               events->event = ei->event;
 
                events->val = val;
                events->prev_val = *prevp;
 
                *prevp = val;
 
-               qm_debug_read_counter(events->event_id, events->prev_val,
+               qm_debug_read_counter(&events->event, events->prev_val,
                                      events->val);
 
                if (++i >= max_events)
@@ -578,7 +584,7 @@ pmu_read_emulate(struct event_data *events, int max_events)
                if (val > 200)
                        val = 100;
 
-               events->event_id = *prevp;
+               events->event.id = *prevp;
                events->val = val;
 
                *prevp = val;
@@ -625,7 +631,8 @@ static void free_events(struct list_head *head)
        }
 }
 
-static int set_events(int cpuid, int *events, int size)
+static int
+set_events(int cpuid, const struct quadd_event *events, int size)
 {
        int free_pcntrs, err;
        int i, nr_l1_r = 0, nr_l1_w = 0;
@@ -656,10 +663,23 @@ static int set_events(int cpuid, int *events, int size)
                free_pcntrs, free_ci.ccntr);
 
        for (i = 0; i < size; i++) {
+               unsigned int type, id;
                struct quadd_pmu_event_info *ei;
 
-               if (events[i] > QUADD_EVENT_TYPE_MAX) {
-                       pr_err("Error event: %d\n", events[i]);
+               type = events[i].type;
+               id = events[i].id;
+
+               if (type == QUADD_EVENT_TYPE_HARDWARE) {
+                       if (id >= QUADD_EVENT_HW_MAX) {
+                               err = -EINVAL;
+                               goto out_free;
+                       }
+               } else if (type == QUADD_EVENT_TYPE_RAW) {
+                       if (id & ~local_pmu_ctx->raw_event_mask) {
+                               err = -EINVAL;
+                               goto out_free;
+                       }
+               } else {
                        err = -EINVAL;
                        goto out_free;
                }
@@ -673,7 +693,7 @@ static int set_events(int cpuid, int *events, int size)
                INIT_LIST_HEAD(&ei->list);
                list_add_tail(&ei->list, &local_pmu_ctx->used_events);
 
-               if (events[i] == QUADD_EVENT_TYPE_CPU_CYCLES) {
+               if (is_cpu_cycles(&events[i])) {
                        ei->hw_value = QUADD_ARMV7_CPU_CYCLE_EVENT;
                        if (!free_ci.ccntr) {
                                pr_err("Error: cpu cycles counter is already occupied\n");
@@ -687,18 +707,21 @@ static int set_events(int cpuid, int *events, int size)
                                goto out_free;
                        }
 
-                       ei->hw_value = local_pmu_ctx->current_map[events[i]];
+                       ei->hw_value = (type == QUADD_EVENT_TYPE_RAW) ? id :
+                               local_pmu_ctx->current_map[id];
                }
 
-               ei->quadd_event_id = events[i];
+               ei->event = events[i];
 
-               if (events[i] == QUADD_EVENT_TYPE_L1_DCACHE_READ_MISSES)
-                       nr_l1_r++;
-               else if (events[i] == QUADD_EVENT_TYPE_L1_DCACHE_WRITE_MISSES)
-                       nr_l1_w++;
+               if (type == QUADD_EVENT_TYPE_HARDWARE) {
+                       if (id == QUADD_EVENT_HW_L1_DCACHE_READ_MISSES)
+                               nr_l1_r++;
+                       else if (id == QUADD_EVENT_HW_L1_DCACHE_WRITE_MISSES)
+                               nr_l1_w++;
+               }
 
-               pr_info("Event has been added: id/pmu value: %s/%#x\n",
-                       quadd_get_event_str(events[i]),
+               pr_info("[%d] Event has been added: id: %#x (%s), hw value: %#x\n",
+                       cpuid, id, type == QUADD_EVENT_TYPE_RAW ? "raw" : "hw",
                        ei->hw_value);
        }
 
@@ -712,7 +735,9 @@ out_free:
        return err;
 }
 
-static int get_supported_events(int cpuid, int *events, int max_events)
+static int
+get_supported_events(int cpuid, struct quadd_event *events,
+                    int max_events, unsigned int *raw_event_mask)
 {
        int i, nr_events = 0;
        struct quadd_pmu_ctx *local_pmu_ctx = &__get_cpu_var(pmu_ctx);
@@ -720,26 +745,33 @@ static int get_supported_events(int cpuid, int *events, int max_events)
        if (!local_pmu_ctx->current_map)
                return 0;
 
-       max_events = min_t(int, QUADD_EVENT_TYPE_MAX, max_events);
+       max_events = min_t(int, QUADD_EVENT_HW_MAX, max_events);
 
        for (i = 0; i < max_events; i++) {
                unsigned int event = local_pmu_ctx->current_map[i];
 
-               if (event != QUADD_ARMV7_UNSUPPORTED_EVENT)
-                       events[nr_events++] = i;
+               if (event != QUADD_ARMV7_UNSUPPORTED_EVENT) {
+                       events[nr_events].type = QUADD_EVENT_TYPE_HARDWARE;
+                       events[nr_events].id = i;
+
+                       nr_events++;
+               }
        }
 
+       *raw_event_mask = local_pmu_ctx->raw_event_mask;
+
        return nr_events;
 }
 
-static int get_current_events(int cpuid, int *events, int max_events)
+static int
+get_current_events(int cpuid, struct quadd_event *events, int max_events)
 {
        int i = 0;
        struct quadd_pmu_event_info *ei;
        struct quadd_pmu_ctx *local_pmu_ctx = &__get_cpu_var(pmu_ctx);
 
        list_for_each_entry(ei, &local_pmu_ctx->used_events, list) {
-               events[i++] = ei->quadd_event_id;
+               events[i++] = ei->event;
 
                if (i >= max_events)
                        break;
@@ -806,6 +838,8 @@ static int quadd_armv7_pmu_init_for_cpu(int cpu)
 
                        local_pmu_ctx->counters_mask =
                                QUADD_ARMV7_COUNTERS_MASK_CORTEX_A9;
+                       local_pmu_ctx->raw_event_mask =
+                               QUADD_ARMV7_EVTSEL_MASK;
                        local_pmu_ctx->current_map = quadd_armv7_a9_events_map;
                        break;
 
@@ -818,6 +852,8 @@ static int quadd_armv7_pmu_init_for_cpu(int cpu)
 
                        local_pmu_ctx->counters_mask =
                                QUADD_ARMV7_COUNTERS_MASK_CORTEX_A15;
+                       local_pmu_ctx->raw_event_mask =
+                               QUADD_ARMV7_EVTSEL_MASK;
                        local_pmu_ctx->current_map = quadd_armv7_a15_events_map;
                        break;
 
index f4645a2..c42adb8 100644 (file)
@@ -52,80 +52,80 @@ static DEFINE_PER_CPU(struct quadd_pmu_info, cpu_pmu_info);
 static DEFINE_PER_CPU(struct quadd_pmu_ctx, pmu_ctx);
 
 static unsigned
-quadd_armv8_pmuv3_arm_events_map[QUADD_EVENT_TYPE_MAX] = {
-       [QUADD_EVENT_TYPE_INSTRUCTIONS] =
+quadd_armv8_pmuv3_arm_events_map[QUADD_EVENT_HW_MAX] = {
+       [QUADD_EVENT_HW_INSTRUCTIONS] =
                QUADD_ARMV8_HW_EVENT_INSTR_EXECUTED,
-       [QUADD_EVENT_TYPE_BRANCH_INSTRUCTIONS] =
+       [QUADD_EVENT_HW_BRANCH_INSTRUCTIONS] =
                QUADD_ARMV8_UNSUPPORTED_EVENT,
-       [QUADD_EVENT_TYPE_BRANCH_MISSES] =
+       [QUADD_EVENT_HW_BRANCH_MISSES] =
                QUADD_ARMV8_HW_EVENT_PC_BRANCH_MIS_PRED,
-       [QUADD_EVENT_TYPE_BUS_CYCLES] =
+       [QUADD_EVENT_HW_BUS_CYCLES] =
                QUADD_ARMV8_UNSUPPORTED_EVENT,
 
-       [QUADD_EVENT_TYPE_L1_DCACHE_READ_MISSES] =
+       [QUADD_EVENT_HW_L1_DCACHE_READ_MISSES] =
                QUADD_ARMV8_HW_EVENT_L1_DCACHE_REFILL,
-       [QUADD_EVENT_TYPE_L1_DCACHE_WRITE_MISSES] =
+       [QUADD_EVENT_HW_L1_DCACHE_WRITE_MISSES] =
                QUADD_ARMV8_HW_EVENT_L1_DCACHE_REFILL,
-       [QUADD_EVENT_TYPE_L1_ICACHE_MISSES] =
+       [QUADD_EVENT_HW_L1_ICACHE_MISSES] =
                QUADD_ARMV8_HW_EVENT_L1_ICACHE_REFILL,
 
-       [QUADD_EVENT_TYPE_L2_DCACHE_READ_MISSES] =
+       [QUADD_EVENT_HW_L2_DCACHE_READ_MISSES] =
                QUADD_ARMV8_HW_EVENT_L2_CACHE_REFILL,
-       [QUADD_EVENT_TYPE_L2_DCACHE_WRITE_MISSES] =
+       [QUADD_EVENT_HW_L2_DCACHE_WRITE_MISSES] =
                QUADD_ARMV8_HW_EVENT_L2_CACHE_REFILL,
-       [QUADD_EVENT_TYPE_L2_ICACHE_MISSES] =
+       [QUADD_EVENT_HW_L2_ICACHE_MISSES] =
                QUADD_ARMV8_UNSUPPORTED_EVENT,
 };
 
 static unsigned
-quadd_armv8_pmuv3_a57_events_map[QUADD_EVENT_TYPE_MAX] = {
-       [QUADD_EVENT_TYPE_INSTRUCTIONS] =
+quadd_armv8_pmuv3_a57_events_map[QUADD_EVENT_HW_MAX] = {
+       [QUADD_EVENT_HW_INSTRUCTIONS] =
                QUADD_ARMV8_HW_EVENT_INSTR_EXECUTED,
-       [QUADD_EVENT_TYPE_BRANCH_INSTRUCTIONS] =
+       [QUADD_EVENT_HW_BRANCH_INSTRUCTIONS] =
                QUADD_ARMV8_UNSUPPORTED_EVENT,
-       [QUADD_EVENT_TYPE_BRANCH_MISSES] =
+       [QUADD_EVENT_HW_BRANCH_MISSES] =
                QUADD_ARMV8_HW_EVENT_PC_BRANCH_MIS_PRED,
-       [QUADD_EVENT_TYPE_BUS_CYCLES] =
+       [QUADD_EVENT_HW_BUS_CYCLES] =
                QUADD_ARMV8_UNSUPPORTED_EVENT,
 
-       [QUADD_EVENT_TYPE_L1_DCACHE_READ_MISSES] =
+       [QUADD_EVENT_HW_L1_DCACHE_READ_MISSES] =
                QUADD_ARMV8_A57_HW_EVENT_L1D_CACHE_REFILL_LD,
-       [QUADD_EVENT_TYPE_L1_DCACHE_WRITE_MISSES] =
+       [QUADD_EVENT_HW_L1_DCACHE_WRITE_MISSES] =
                QUADD_ARMV8_A57_HW_EVENT_L1D_CACHE_REFILL_ST,
-       [QUADD_EVENT_TYPE_L1_ICACHE_MISSES] =
+       [QUADD_EVENT_HW_L1_ICACHE_MISSES] =
                QUADD_ARMV8_HW_EVENT_L1_ICACHE_REFILL,
 
-       [QUADD_EVENT_TYPE_L2_DCACHE_READ_MISSES] =
+       [QUADD_EVENT_HW_L2_DCACHE_READ_MISSES] =
                QUADD_ARMV8_A57_HW_EVENT_L2D_CACHE_REFILL_LD,
-       [QUADD_EVENT_TYPE_L2_DCACHE_WRITE_MISSES] =
+       [QUADD_EVENT_HW_L2_DCACHE_WRITE_MISSES] =
                QUADD_ARMV8_A57_HW_EVENT_L2D_CACHE_REFILL_ST,
-       [QUADD_EVENT_TYPE_L2_ICACHE_MISSES] =
+       [QUADD_EVENT_HW_L2_ICACHE_MISSES] =
                QUADD_ARMV8_UNSUPPORTED_EVENT,
 };
 
 static unsigned
-quadd_armv8_pmuv3_denver_events_map[QUADD_EVENT_TYPE_MAX] = {
-       [QUADD_EVENT_TYPE_INSTRUCTIONS] =
+quadd_armv8_pmuv3_denver_events_map[QUADD_EVENT_HW_MAX] = {
+       [QUADD_EVENT_HW_INSTRUCTIONS] =
                QUADD_ARMV8_HW_EVENT_INSTR_EXECUTED,
-       [QUADD_EVENT_TYPE_BRANCH_INSTRUCTIONS] =
+       [QUADD_EVENT_HW_BRANCH_INSTRUCTIONS] =
                QUADD_ARMV8_UNSUPPORTED_EVENT,
-       [QUADD_EVENT_TYPE_BRANCH_MISSES] =
+       [QUADD_EVENT_HW_BRANCH_MISSES] =
                QUADD_ARMV8_HW_EVENT_PC_BRANCH_MIS_PRED,
-       [QUADD_EVENT_TYPE_BUS_CYCLES] =
+       [QUADD_EVENT_HW_BUS_CYCLES] =
                QUADD_ARMV8_UNSUPPORTED_EVENT,
 
-       [QUADD_EVENT_TYPE_L1_DCACHE_READ_MISSES] =
+       [QUADD_EVENT_HW_L1_DCACHE_READ_MISSES] =
                QUADD_ARMV8_HW_EVENT_L1_DCACHE_REFILL,
-       [QUADD_EVENT_TYPE_L1_DCACHE_WRITE_MISSES] =
+       [QUADD_EVENT_HW_L1_DCACHE_WRITE_MISSES] =
                QUADD_ARMV8_HW_EVENT_L1_DCACHE_REFILL,
-       [QUADD_EVENT_TYPE_L1_ICACHE_MISSES] =
+       [QUADD_EVENT_HW_L1_ICACHE_MISSES] =
                QUADD_ARMV8_HW_EVENT_L1_ICACHE_REFILL,
 
-       [QUADD_EVENT_TYPE_L2_DCACHE_READ_MISSES] =
+       [QUADD_EVENT_HW_L2_DCACHE_READ_MISSES] =
                QUADD_ARMV8_UNSUPPORTED_EVENT,
-       [QUADD_EVENT_TYPE_L2_DCACHE_WRITE_MISSES] =
+       [QUADD_EVENT_HW_L2_DCACHE_WRITE_MISSES] =
                QUADD_ARMV8_UNSUPPORTED_EVENT,
-       [QUADD_EVENT_TYPE_L2_ICACHE_MISSES] =
+       [QUADD_EVENT_HW_L2_ICACHE_MISSES] =
                QUADD_ARMV8_UNSUPPORTED_EVENT,
 };
 
@@ -503,7 +503,7 @@ static void pmu_start(void)
 
                event = ei->hw_value;
 
-               if (ei->quadd_event_id == QUADD_EVENT_TYPE_CPU_CYCLES) {
+               if (is_cpu_cycles(&ei->event)) {
                        if (!ccntr) {
                                pr_err_once("Error: cpu cycles counter is already occupied\n");
                                return;
@@ -574,7 +574,7 @@ pmu_read(struct event_data *events, int max_events)
        list_for_each_entry(ei, &local_pmu_ctx->used_events, list) {
                int index;
 
-               if (ei->quadd_event_id == QUADD_EVENT_TYPE_CPU_CYCLES) {
+               if (is_cpu_cycles(&ei->event)) {
                        if (!test_bit(QUADD_ARMV8_CCNT_BIT, pi->used_cntrs)) {
                                pr_err_once("Error: ccntr is not used\n");
                                return 0;
@@ -594,14 +594,14 @@ pmu_read(struct event_data *events, int max_events)
                val = read_counter(index);
 
                events->event_source = QUADD_EVENT_SOURCE_PMU;
-               events->event_id = ei->quadd_event_id;
+               events->event = ei->event;
 
                events->val = val;
                events->prev_val = *prevp;
 
                *prevp = val;
 
-               qm_debug_read_counter(events->event_id, events->prev_val,
+               qm_debug_read_counter(&events->event, events->prev_val,
                                      events->val);
 
                if (++i >= max_events)
@@ -629,7 +629,7 @@ pmu_read_emulate(struct event_data *events, int max_events)
                if (val > 200)
                        val = 100;
 
-               events->event_id = *prevp;
+               events->event.id = *prevp;
                events->val = val;
 
                *prevp = val;
@@ -676,17 +676,13 @@ static void free_events(struct list_head *head)
        }
 }
 
-static int set_events(int cpuid, int *events, int size)
+static int
+set_events(int cpuid, const struct quadd_event *events, int size)
 {
-       int free_pcntrs, err;
-       int i, nr_l1_r = 0, nr_l1_w = 0;
+       int i, free_pcntrs, err;
        struct quadd_cntrs_info free_ci;
-
        struct quadd_pmu_ctx *local_pmu_ctx = &per_cpu(pmu_ctx, cpuid);
 
-
-       local_pmu_ctx->l1_cache_rw = 0;
-
        free_events(&local_pmu_ctx->used_events);
 
        if (!events || !size)
@@ -707,14 +703,24 @@ static int set_events(int cpuid, int *events, int size)
        pr_info("free counters: pcntrs/ccntr: %d/%d\n",
                free_pcntrs, free_ci.ccntr);
 
-       pr_info("event identification register: %#x\n",
-               armv8_pmu_pmceid_read());
-
        for (i = 0; i < size; i++) {
+               unsigned int type, id;
                struct quadd_pmu_event_info *ei;
 
-               if (events[i] > QUADD_EVENT_TYPE_MAX) {
-                       pr_err("error event: %d\n", events[i]);
+               type = events[i].type;
+               id = events[i].id;
+
+               if (type == QUADD_EVENT_TYPE_HARDWARE) {
+                       if (id >= QUADD_EVENT_HW_MAX) {
+                               err = -EINVAL;
+                               goto out_free;
+                       }
+               } else if (type == QUADD_EVENT_TYPE_RAW) {
+                       if (id & ~local_pmu_ctx->raw_event_mask) {
+                               err = -EINVAL;
+                               goto out_free;
+                       }
+               } else {
                        err = -EINVAL;
                        goto out_free;
                }
@@ -728,7 +734,7 @@ static int set_events(int cpuid, int *events, int size)
                INIT_LIST_HEAD(&ei->list);
                list_add_tail(&ei->list, &local_pmu_ctx->used_events);
 
-               if (events[i] == QUADD_EVENT_TYPE_CPU_CYCLES) {
+               if (is_cpu_cycles(&events[i])) {
                        ei->hw_value = QUADD_ARMV8_CPU_CYCLE_EVENT;
                        if (!free_ci.ccntr) {
                                pr_err("error: cpu cycles counter is already occupied\n");
@@ -742,24 +748,17 @@ static int set_events(int cpuid, int *events, int size)
                                goto out_free;
                        }
 
-                       ei->hw_value = local_pmu_ctx->current_map[events[i]];
+                       ei->hw_value = (type == QUADD_EVENT_TYPE_RAW) ? id :
+                               local_pmu_ctx->current_map[id];
                }
 
-               ei->quadd_event_id = events[i];
-
-               if (events[i] == QUADD_EVENT_TYPE_L1_DCACHE_READ_MISSES)
-                       nr_l1_r++;
-               else if (events[i] == QUADD_EVENT_TYPE_L1_DCACHE_WRITE_MISSES)
-                       nr_l1_w++;
+               ei->event = events[i];
 
-               pr_info("Event has been added: id/pmu value: %s/%#x\n",
-                       quadd_get_event_str(events[i]),
+               pr_info("[%d] Event has been added: id: %#x (%s), hw value: %#x\n",
+                       cpuid, id, type == QUADD_EVENT_TYPE_RAW ? "raw" : "hw",
                        ei->hw_value);
        }
 
-       if (nr_l1_r > 0 && nr_l1_w > 0)
-               local_pmu_ctx->l1_cache_rw = 1;
-
        return 0;
 
 out_free:
@@ -767,7 +766,9 @@ out_free:
        return err;
 }
 
-static int get_supported_events(int cpuid, int *events, int max_events)
+static int
+get_supported_events(int cpuid, struct quadd_event *events,
+                    int max_events, unsigned int *raw_event_mask)
 {
        int i, nr_events = 0;
 
@@ -776,28 +777,33 @@ static int get_supported_events(int cpuid, int *events, int max_events)
        if (!local_pmu_ctx->current_map)
                return 0;
 
-       max_events = min_t(int, QUADD_EVENT_TYPE_MAX, max_events);
+       max_events = min_t(int, QUADD_EVENT_HW_MAX, max_events);
 
        for (i = 0; i < max_events; i++) {
                unsigned int event = local_pmu_ctx->current_map[i];
 
-               if (event != QUADD_ARMV8_UNSUPPORTED_EVENT)
-                       events[nr_events++] = i;
+               if (event != QUADD_ARMV8_UNSUPPORTED_EVENT) {
+                       events[nr_events].type = QUADD_EVENT_TYPE_HARDWARE;
+                       events[nr_events].id = i;
+
+                       nr_events++;
+               }
        }
 
+       *raw_event_mask = local_pmu_ctx->raw_event_mask;
+
        return nr_events;
 }
 
-static int get_current_events(int cpuid, int *events, int max_events)
+static int
+get_current_events(int cpuid, struct quadd_event *events, int max_events)
 {
        int i = 0;
        struct quadd_pmu_event_info *ei;
-
        struct quadd_pmu_ctx *local_pmu_ctx = &per_cpu(pmu_ctx, cpuid);
 
-
        list_for_each_entry(ei, &local_pmu_ctx->used_events, list) {
-               events[i++] = ei->quadd_event_id;
+               events[i++] = ei->event;
 
                if (i >= max_events)
                        break;
@@ -871,6 +877,8 @@ static int quadd_armv8_pmu_init_for_cpu(int cpuid)
 
                local_pmu_ctx->counters_mask =
                        QUADD_ARMV8_COUNTERS_MASK_PMUV3;
+               local_pmu_ctx->raw_event_mask =
+                       QUADD_ARMV8_EVTSEL_MASK;
                local_pmu_ctx->current_map =
                        quadd_armv8_pmuv3_arm_events_map;
 
index 50c3d1d..da4fa51 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * drivers/misc/tegra-profiler/debug.c
  *
- * Copyright (c) 2015, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2015-2017, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -127,7 +127,7 @@ qm_debug_task_sched_in(pid_t prev_pid, pid_t current_pid, int prev_nr_active)
        quadd_put_sample_this_cpu(&record, &vec, 1);
 }
 
-void qm_debug_read_counter(int event_id, u32 prev_val, u32 val)
+void qm_debug_read_counter(struct quadd_event *event, u32 prev_val, u32 val)
 {
        struct quadd_iovec vec;
        struct quadd_record_data record;
@@ -137,7 +137,7 @@ void qm_debug_read_counter(int event_id, u32 prev_val, u32 val)
 
        s->type = QM_DEBUG_SAMPLE_TYPE_READ_COUNTER;
 
-       s->extra_value[0] = event_id;
+       s->extra_value[0] = event->id;
        s->extra_value[1] = prev_val;
 
        vec.base = &val;
index 656438d..16f645c 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * drivers/misc/tegra-profiler/debug.h
  *
- * Copyright (c) 2013-2015, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2013-2017, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -28,7 +28,7 @@ void qm_debug_timer_start(struct pt_regs *regs, u64 period);
 void qm_debug_timer_cancel(void);
 void qm_debug_task_sched_in(pid_t prev_pid, pid_t current_pid,
                            int prev_nr_active);
-void qm_debug_read_counter(int event_id, u32 prev_val, u32 val);
+void qm_debug_read_counter(struct quadd_event *event, u32 prev_val, u32 val);
 void qm_debug_start_source(int source_type);
 void qm_debug_stop_source(int source_type);
 #else
@@ -48,7 +48,8 @@ static inline void
 qm_debug_task_sched_in(pid_t prev_pid, pid_t current_pid, int prev_nr_active)
 {
 }
-static inline void qm_debug_read_counter(int event_id, u32 prev_val, u32 val)
+static inline void
+qm_debug_read_counter(struct quadd_event *event, u32 prev_val, u32 val)
 {
 }
 static inline void qm_debug_start_source(int source_type)
@@ -63,23 +64,23 @@ void quadd_test_delay(void);
 
 #define QM_ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0]))
 static inline const char *
-quadd_get_event_str(int event)
+quadd_get_hw_event_str(int event)
 {
        static const char * const str[] = {
-               [QUADD_EVENT_TYPE_CPU_CYCLES]           = "cpu-cycles",
+               [QUADD_EVENT_HW_CPU_CYCLES]             = "cpu-cycles",
 
-               [QUADD_EVENT_TYPE_INSTRUCTIONS]         = "instructions",
-               [QUADD_EVENT_TYPE_BRANCH_INSTRUCTIONS]  = "branch_instruction",
-               [QUADD_EVENT_TYPE_BRANCH_MISSES]        = "branch_misses",
-               [QUADD_EVENT_TYPE_BUS_CYCLES]           = "bus-cycles",
+               [QUADD_EVENT_HW_INSTRUCTIONS]           = "instructions",
+               [QUADD_EVENT_HW_BRANCH_INSTRUCTIONS]    = "branch_instruction",
+               [QUADD_EVENT_HW_BRANCH_MISSES]          = "branch_misses",
+               [QUADD_EVENT_HW_BUS_CYCLES]             = "bus-cycles",
 
-               [QUADD_EVENT_TYPE_L1_DCACHE_READ_MISSES]        = "l1_d_read",
-               [QUADD_EVENT_TYPE_L1_DCACHE_WRITE_MISSES]       = "l1_d_write",
-               [QUADD_EVENT_TYPE_L1_ICACHE_MISSES]             = "l1_i",
+               [QUADD_EVENT_HW_L1_DCACHE_READ_MISSES]  = "l1_d_read",
+               [QUADD_EVENT_HW_L1_DCACHE_WRITE_MISSES] = "l1_d_write",
+               [QUADD_EVENT_HW_L1_ICACHE_MISSES]       = "l1_i",
 
-               [QUADD_EVENT_TYPE_L2_DCACHE_READ_MISSES]        = "l2_d_read",
-               [QUADD_EVENT_TYPE_L2_DCACHE_WRITE_MISSES]       = "l2_d_write",
-               [QUADD_EVENT_TYPE_L2_ICACHE_MISSES]             = "l2_i",
+               [QUADD_EVENT_HW_L2_DCACHE_READ_MISSES]  = "l2_d_read",
+               [QUADD_EVENT_HW_L2_DCACHE_WRITE_MISSES] = "l2_d_write",
+               [QUADD_EVENT_HW_L2_ICACHE_MISSES]       = "l2_i",
        };
        return (event < QM_ARRAY_SIZE(str)) ? str[event] : "invalid event";
 }
index 748d243..5da8500 100644 (file)
@@ -46,7 +46,7 @@ static void
 read_all_sources(struct pt_regs *regs, struct task_struct *task, int is_sched);
 
 struct hrt_event_value {
-       int event_id;
+       struct quadd_event event;
        u32 value;
 };
 
@@ -155,7 +155,7 @@ quadd_put_sample(struct quadd_record_data *data,
 static void put_header(int cpuid)
 {
        int nr_events = 0, max_events = QUADD_MAX_COUNTERS;
-       int events[QUADD_MAX_COUNTERS];
+       struct quadd_event events[QUADD_MAX_COUNTERS];
        struct quadd_record_data record;
        struct quadd_header_data *hdr = &record.hdr;
        struct quadd_parameters *param = &hrt.quadd_ctx->param;
@@ -318,7 +318,7 @@ static int read_source(struct quadd_event_source_interface *source,
                                res_val /= nr_active;
                }
 
-               events_vals[i].event_id = s->event_id;
+               events_vals[i].event = s->event;
                events_vals[i].value = res_val;
        }
 
index 2939173..a5fd5ea 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * drivers/misc/tegra-profiler/main.c
  *
- * Copyright (c) 2013-2016, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2013-2017, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -172,16 +172,26 @@ static void stop(void)
        }
 }
 
-static inline int is_event_supported(struct source_info *si, int event)
+static inline int
+is_event_supported(struct source_info *si, const struct quadd_event *event)
 {
-       int i;
-       int nr = si->nr_supported_events;
-       int *events = si->supported_events;
+       unsigned int type, id;
+       int i, nr = si->nr_supp_events;
+       struct quadd_event *events = si->supp_events;
+
+       type = event->type;
+       id = event->id;
+
+       if (type == QUADD_EVENT_TYPE_RAW)
+               return (id & ~si->raw_event_mask) == 0;
 
-       for (i = 0; i < nr; i++) {
-               if (event == events[i])
-                       return 1;
+       if (type == QUADD_EVENT_TYPE_HARDWARE) {
+               for (i = 0; i < nr; i++) {
+                       if (id == events[i].id)
+                               return 1;
+               }
        }
+
        return 0;
 }
 
@@ -194,34 +204,36 @@ validate_freq(unsigned int freq)
 static int
 set_parameters_for_cpu(struct quadd_pmu_setup_for_cpu *params)
 {
-       int i;
-       int err;
-       int nr_pmu = 0;
+       int i, err, nr_pmu = 0;
        int cpuid = params->cpuid;
 
        struct source_info *pmu_info = &per_cpu(ctx_pmu_info, cpuid);
-       int pmu_events_id[QUADD_MAX_COUNTERS];
+       struct quadd_event pmu_events[QUADD_MAX_COUNTERS];
 
        if (!pmu_info->is_present)
                return -ENODEV;
 
+       if (pmu_info->nr_supp_events == 0)
+               return -ENODEV;
+
        for (i = 0; i < params->nr_events; i++) {
-               int event = params->events[i];
-
-               if (pmu_info->nr_supported_events > 0
-                       && is_event_supported(pmu_info, event)) {
-                       pmu_events_id[nr_pmu++] = event;
-                       pr_info("PMU active event for cpu %d: %s\n",
-                                       cpuid,
-                                       quadd_get_event_str(event));
+               struct quadd_event *event = &params->events[i];
+
+               if (is_event_supported(pmu_info, event)) {
+                       pmu_events[nr_pmu++] = *event;
+                       pr_info("[%d] PMU active event: %#x (%s)\n",
+                               cpuid, event->id,
+                               event->type == QUADD_EVENT_TYPE_RAW ?
+                               "raw" : "hw");
                } else {
-                       pr_err("Bad event: %s\n",
-                              quadd_get_event_str(event));
+                       pr_err("[%d] Bad event: %#x (%s)\n", cpuid, event->id,
+                              event->type == QUADD_EVENT_TYPE_RAW ?
+                              "raw" : "hw");
                        return -EINVAL;
                }
        }
 
-       err = ctx.pmu->set_events(cpuid, pmu_events_id, nr_pmu);
+       err = ctx.pmu->set_events(cpuid, pmu_events, nr_pmu);
        if (err) {
                pr_err("PMU set parameters: error\n");
                return err;
@@ -236,7 +248,7 @@ set_parameters(struct quadd_parameters *p)
 {
        int i, err, uid = 0;
        uid_t task_uid, current_uid;
-       int pl310_events_id;
+       struct quadd_event *pl310_events;
        int nr_pl310 = 0;
        struct task_struct *task;
        u64 *low_addr_p;
@@ -291,15 +303,22 @@ set_parameters(struct quadd_parameters *p)
        }
 
        for (i = 0; i < p->nr_events; i++) {
-               int event = p->events[i];
+               unsigned int type, id;
+               struct quadd_event *event = &p->events[i];
+
+               type = event->type;
+               id = event->id;
+
+               if (type != QUADD_EVENT_TYPE_HARDWARE)
+                       return -EINVAL;
 
                if (ctx.pl310 &&
-                   ctx.pl310_info.nr_supported_events > 0 &&
+                   ctx.pl310_info.nr_supp_events > 0 &&
                    is_event_supported(&ctx.pl310_info, event)) {
-                       pl310_events_id = p->events[i];
+                       pl310_events = event;
 
                        pr_info("PL310 active event: %s\n",
-                               quadd_get_event_str(event));
+                               quadd_get_hw_event_str(id));
 
                        if (nr_pl310++ > 1) {
                                pr_err("error: multiply pl310 events\n");
@@ -307,7 +326,7 @@ set_parameters(struct quadd_parameters *p)
                        }
                } else {
                        pr_err("Bad event: %s\n",
-                              quadd_get_event_str(event));
+                              quadd_get_hw_event_str(id));
                        return -EINVAL;
                }
        }
@@ -316,7 +335,7 @@ set_parameters(struct quadd_parameters *p)
                int cpuid = 0; /* We don't need cpuid for pl310.  */
 
                if (nr_pl310 == 1) {
-                       err = ctx.pl310->set_events(cpuid, &pl310_events_id, 1);
+                       err = ctx.pl310->set_events(cpuid, pl310_events, 1);
                        if (err) {
                                pr_info("pl310 set_parameters: error\n");
                                return err;
@@ -344,16 +363,21 @@ set_parameters(struct quadd_parameters *p)
 static void
 get_capabilities_for_cpu(int cpuid, struct quadd_comm_cap_for_cpu *cap)
 {
-       int i;
+       int i, id;
        struct quadd_events_cap *events_cap;
        struct source_info *s = &per_cpu(ctx_pmu_info, cpuid);
 
        if (!s->is_present)
                return;
 
+       cap->cpuid = cpuid;
+       cap->l2_cache = 0;
+       cap->l2_multiple_events = 0;
+
        events_cap = &cap->events_cap;
 
-       cap->cpuid = cpuid;
+       events_cap->raw_event_mask = s->raw_event_mask;
+
        events_cap->cpu_cycles = 0;
        events_cap->l1_dcache_read_misses = 0;
        events_cap->l1_dcache_write_misses = 0;
@@ -368,51 +392,52 @@ get_capabilities_for_cpu(int cpuid, struct quadd_comm_cap_for_cpu *cap)
        events_cap->l2_dcache_write_misses = 0;
        events_cap->l2_icache_misses = 0;
 
-       for (i = 0; i < s->nr_supported_events; i++) {
-               int event = s->supported_events[i];
+       for (i = 0; i < s->nr_supp_events; i++) {
+               struct quadd_event *event = &s->supp_events[i];
+
+               id = event->id;
 
-               if (event == QUADD_EVENT_TYPE_L2_DCACHE_READ_MISSES ||
-                   event == QUADD_EVENT_TYPE_L2_DCACHE_WRITE_MISSES ||
-                   event == QUADD_EVENT_TYPE_L2_ICACHE_MISSES) {
+               if (id == QUADD_EVENT_HW_L2_DCACHE_READ_MISSES ||
+                   id == QUADD_EVENT_HW_L2_DCACHE_WRITE_MISSES ||
+                   id == QUADD_EVENT_HW_L2_ICACHE_MISSES) {
                        cap->l2_cache = 1;
                        cap->l2_multiple_events = 1;
                }
 
-
-               switch (event) {
-               case QUADD_EVENT_TYPE_CPU_CYCLES:
+               switch (id) {
+               case QUADD_EVENT_HW_CPU_CYCLES:
                        events_cap->cpu_cycles = 1;
                        break;
-               case QUADD_EVENT_TYPE_INSTRUCTIONS:
+               case QUADD_EVENT_HW_INSTRUCTIONS:
                        events_cap->instructions = 1;
                        break;
-               case QUADD_EVENT_TYPE_BRANCH_INSTRUCTIONS:
+               case QUADD_EVENT_HW_BRANCH_INSTRUCTIONS:
                        events_cap->branch_instructions = 1;
                        break;
-               case QUADD_EVENT_TYPE_BRANCH_MISSES:
+               case QUADD_EVENT_HW_BRANCH_MISSES:
                        events_cap->branch_misses = 1;
                        break;
-               case QUADD_EVENT_TYPE_BUS_CYCLES:
+               case QUADD_EVENT_HW_BUS_CYCLES:
                        events_cap->bus_cycles = 1;
                        break;
 
-               case QUADD_EVENT_TYPE_L1_DCACHE_READ_MISSES:
+               case QUADD_EVENT_HW_L1_DCACHE_READ_MISSES:
                        events_cap->l1_dcache_read_misses = 1;
                        break;
-               case QUADD_EVENT_TYPE_L1_DCACHE_WRITE_MISSES:
+               case QUADD_EVENT_HW_L1_DCACHE_WRITE_MISSES:
                        events_cap->l1_dcache_write_misses = 1;
                        break;
-               case QUADD_EVENT_TYPE_L1_ICACHE_MISSES:
+               case QUADD_EVENT_HW_L1_ICACHE_MISSES:
                        events_cap->l1_icache_misses = 1;
                        break;
 
-               case QUADD_EVENT_TYPE_L2_DCACHE_READ_MISSES:
+               case QUADD_EVENT_HW_L2_DCACHE_READ_MISSES:
                        events_cap->l2_dcache_read_misses = 1;
                        break;
-               case QUADD_EVENT_TYPE_L2_DCACHE_WRITE_MISSES:
+               case QUADD_EVENT_HW_L2_DCACHE_WRITE_MISSES:
                        events_cap->l2_dcache_write_misses = 1;
                        break;
-               case QUADD_EVENT_TYPE_L2_ICACHE_MISSES:
+               case QUADD_EVENT_HW_L2_ICACHE_MISSES:
                        events_cap->l2_icache_misses = 1;
                        break;
 
@@ -475,19 +500,23 @@ get_capabilities(struct quadd_comm_cap *cap)
        events_cap->l2_icache_misses = 0;
 
        if (ctx.pl310) {
+               unsigned int type, id;
                struct source_info *s = &ctx.pl310_info;
 
-               for (i = 0; i < s->nr_supported_events; i++) {
-                       int event = s->supported_events[i];
+               for (i = 0; i < s->nr_supp_events; i++) {
+                       struct quadd_event *event = &s->supp_events[i];
+
+                       type = event->type;
+                       id = event->id;
 
-                       switch (event) {
-                       case QUADD_EVENT_TYPE_L2_DCACHE_READ_MISSES:
+                       switch (id) {
+                       case QUADD_EVENT_HW_L2_DCACHE_READ_MISSES:
                                events_cap->l2_dcache_read_misses = 1;
                                break;
-                       case QUADD_EVENT_TYPE_L2_DCACHE_WRITE_MISSES:
+                       case QUADD_EVENT_HW_L2_DCACHE_WRITE_MISSES:
                                events_cap->l2_dcache_write_misses = 1;
                                break;
-                       case QUADD_EVENT_TYPE_L2_ICACHE_MISSES:
+                       case QUADD_EVENT_HW_L2_ICACHE_MISSES:
                                events_cap->l2_icache_misses = 1;
                                break;
 
@@ -573,7 +602,8 @@ static struct quadd_comm_control_interface control = {
 static int __init quadd_module_init(void)
 {
        int i, nr_events, err;
-       int *events;
+       unsigned int raw_event_mask;
+       struct quadd_event *events;
        int cpuid;
 
        pr_info("Branch: %s\n", QUADD_MODULE_BRANCH);
@@ -620,19 +650,21 @@ static int __init quadd_module_init(void)
                pmu_info = &per_cpu(ctx_pmu_info, cpuid);
                pmu_info->is_present = 1;
 
-               events = pmu_info->supported_events;
+               events = pmu_info->supp_events;
                nr_events =
                    ctx.pmu->get_supported_events(cpuid, events,
-                                                 QUADD_MAX_COUNTERS);
+                                                 QUADD_MAX_COUNTERS,
+                                                 &raw_event_mask);
 
-               pmu_info->nr_supported_events = nr_events;
+               pmu_info->nr_supp_events = nr_events;
+               pmu_info->raw_event_mask = raw_event_mask;
 
-               pr_debug("CPU: %d PMU: amount of events: %d\n",
-                        cpuid, nr_events);
+               pr_debug("CPU: %d PMU: amount of events: %d, raw mask: %#x\n",
+                        cpuid, nr_events, raw_event_mask);
 
                for (i = 0; i < nr_events; i++)
                        pr_debug("CPU: %d PMU event: %s\n", cpuid,
-                                quadd_get_event_str(events[i]));
+                                quadd_get_hw_event_str(events[i].id));
        }
 
 #ifdef CONFIG_CACHE_L2X0
@@ -641,17 +673,18 @@ static int __init quadd_module_init(void)
        ctx.pl310 = NULL;
 #endif
        if (ctx.pl310) {
-               events = ctx.pl310_info.supported_events;
+               events = ctx.pl310_info.supp_events;
                nr_events = ctx.pl310->get_supported_events(0, events,
-                                                           QUADD_MAX_COUNTERS);
-               ctx.pl310_info.nr_supported_events = nr_events;
+                                                           QUADD_MAX_COUNTERS,
+                                                           &raw_event_mask);
+               ctx.pl310_info.nr_supp_events = nr_events;
 
                pr_info("pl310 success, amount of events: %d\n",
                        nr_events);
 
                for (i = 0; i < nr_events; i++)
                        pr_info("pl310 event: %s\n",
-                               quadd_get_event_str(events[i]));
+                               quadd_get_hw_event_str(events[i].id));
        } else {
                pr_debug("PL310 not found\n");
        }
index 0172bb1..0eaca61 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * drivers/misc/tegra-profiler/pl310.c
  *
- * Copyright (c) 2014-2015, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2014-2017, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -230,8 +230,11 @@ l2x0_events_read_emulate(int cpuid, struct event_data *events, int max_events)
        return 1;
 }
 
-static int __maybe_unused l2x0_set_events(int cpuid, int *events, int size)
+static int __maybe_unused
+l2x0_set_events(int cpuid, struct quadd_event *events, int size)
 {
+       unsigned int id, type;
+
        if (!events || size == 0) {
                l2x0_ctx.l2x0_event_type = -1;
                l2x0_ctx.event_id = -1;
@@ -243,35 +246,50 @@ static int __maybe_unused l2x0_set_events(int cpuid, int *events, int size)
                return -ENOSPC;
        }
 
-       switch (*events) {
-       case QUADD_EVENT_TYPE_L2_DCACHE_READ_MISSES:
+       type = events->type;
+       id = events->id;
+
+       if (type != QUADD_EVENT_TYPE_HARDWARE)
+               return -EINVAL;
+
+       switch (id) {
+       case QUADD_EVENT_HW_L2_DCACHE_READ_MISSES:
                l2x0_ctx.l2x0_event_type = QUADD_L2X0_TYPE_DATA_READ_MISSES;
                break;
-       case QUADD_EVENT_TYPE_L2_DCACHE_WRITE_MISSES:
+       case QUADD_EVENT_HW_L2_DCACHE_WRITE_MISSES:
                l2x0_ctx.l2x0_event_type = QUADD_L2X0_TYPE_DATA_WRITE_MISSES;
                break;
-       case QUADD_EVENT_TYPE_L2_ICACHE_MISSES:
+       case QUADD_EVENT_HW_L2_ICACHE_MISSES:
                l2x0_ctx.l2x0_event_type = QUADD_L2X0_TYPE_INSTRUCTION_MISSES;
                break;
        default:
-               pr_err("Error event: %s\n", quadd_get_event_str(*events));
+               pr_err("Error event: %s\n", quadd_get_hw_event_str(*events));
                return 1;
        }
        l2x0_ctx.event_id = *events;
 
        pr_info("Event has been added: id/l2x0: %s/%#x\n",
-               quadd_get_event_str(*events), l2x0_ctx.l2x0_event_type);
+               quadd_get_hw_event_str(id), l2x0_ctx.l2x0_event_type);
        return 0;
 }
 
-static int get_supported_events(int cpuid, int *events, int max_events)
+static int
+get_supported_events(int cpuid, int *events,
+                    int max_events, unsigned int *raw_event_mask)
 {
        if (max_events < 3)
                return 0;
 
-       events[0] = QUADD_EVENT_TYPE_L2_DCACHE_READ_MISSES;
-       events[1] = QUADD_EVENT_TYPE_L2_DCACHE_WRITE_MISSES;
-       events[2] = QUADD_EVENT_TYPE_L2_ICACHE_MISSES;
+       events[0].type = QUADD_EVENT_TYPE_HARDWARE;
+       events[0].id = QUADD_EVENT_HW_L2_DCACHE_READ_MISSES;
+
+       events[1].type = QUADD_EVENT_TYPE_HARDWARE;
+       events[1].id = QUADD_EVENT_HW_L2_DCACHE_WRITE_MISSES;
+
+       events[2].type = QUADD_EVENT_TYPE_HARDWARE;
+       events[2].id = QUADD_EVENT_HW_L2_ICACHE_MISSES;
+
+       *raw_event_mask = 0;
 
        return 3;
 }
index 1cd4cf6..80fd39f 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * drivers/misc/tegra-profiler/quadd.h
  *
- * Copyright (c) 2014-2016, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2014-2017, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
 
 /* #define QUADD_USE_EMULATE_COUNTERS  1 */
 
-struct event_data;
 struct quadd_comm_data_interface;
 struct quadd_hrt_ctx;
 struct quadd_module_state;
 struct quadd_arch_info;
 
+struct event_data {
+       int event_source;
+       struct quadd_event event;
+
+       u32 val;
+       u32 prev_val;
+};
+
 struct quadd_event_source_interface {
        int (*enable)(void);
        void (*disable)(void);
        void (*start)(void);
        void (*stop)(void);
        int (*read)(struct event_data *events, int max_events);
-       int (*set_events)(int cpuid, int *events, int size);
-       int (*get_supported_events)(int cpuid, int *events, int max_events);
-       int (*get_current_events)(int cpuid, int *events, int max_events);
+       int (*set_events)(int cpuid, const struct quadd_event *events,
+                         int size);
+       int (*get_supported_events)(int cpuid, struct quadd_event *events,
+                                   int max_events,
+                                   unsigned int *raw_event_mask);
+       int (*get_current_events)(int cpuid, struct quadd_event *events,
+                                 int max_events);
        struct quadd_arch_info * (*get_arch)(int cpuid);
 };
 
 struct source_info {
-       int supported_events[QUADD_MAX_COUNTERS];
-       int nr_supported_events;
+       struct quadd_event supp_events[QUADD_MAX_COUNTERS];
+       int nr_supp_events;
+
+       unsigned int raw_event_mask;
 
        int is_present;
        int active;
index bf9e339..a58b3e9 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * drivers/misc/tegra-profiler/quadd_proc.c
  *
- * Copyright (c) 2014-2016, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2014-2017, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -67,29 +67,8 @@ static int show_capabilities(struct seq_file *f, void *offset)
                   YES_NO(cap->tegra_lp_cluster));
        seq_printf(f, "power rate samples:                    %s\n",
                   YES_NO(cap->power_rate));
-
-       seq_printf(f, "support polling mode:                  %s\n",
-                  YES_NO(cap->blocked_read));
-       seq_printf(f, "backtrace from the kernel ctx:         %s\n",
-                  YES_NO(extra & QUADD_COMM_CAP_EXTRA_BT_KERNEL_CTX));
-       seq_printf(f, "send mmap regions at the start:        %s\n",
-                  YES_NO(extra & QUADD_COMM_CAP_EXTRA_GET_MMAP));
-       seq_printf(f, "group samples:                         %s\n",
-                  YES_NO(extra & QUADD_COMM_CAP_EXTRA_GROUP_SAMPLES));
-       seq_printf(f, "unwinding based on ex-handling tables: %s\n",
-                  YES_NO(extra & QUADD_COMM_CAP_EXTRA_BT_UNWIND_TABLES));
-       seq_printf(f, "support AArch64 architecture:          %s\n",
-                  YES_NO(extra & QUADD_COMM_CAP_EXTRA_SUPPORT_AARCH64));
-       seq_printf(f, "support special architecture mappings: %s\n",
-                  YES_NO(extra & QUADD_COMM_CAP_EXTRA_SPECIAL_ARCH_MMAP));
-       seq_printf(f, "support mixed unwinding mode:          %s\n",
-                  YES_NO(extra & QUADD_COMM_CAP_EXTRA_UNWIND_MIXED));
-       seq_printf(f, "information about unwind entry:        %s\n",
-                  YES_NO(extra & QUADD_COMM_CAP_EXTRA_UNW_ENTRY_TYPE));
        seq_printf(f, "arch timer is available:               %s\n",
                   YES_NO(extra & QUADD_COMM_CAP_EXTRA_ARCH_TIMER));
-       seq_printf(f, "ring buffer mmap operation:            %s\n",
-                  YES_NO(extra & QUADD_COMM_CAP_EXTRA_RB_MMAP_OP));
 
        seq_puts(f, "\n");
 
@@ -142,6 +121,9 @@ static int show_capabilities(struct seq_file *f, void *offset)
                                   YES_NO(event->l2_dcache_write_misses));
                        seq_printf(f, "  l2_icache_misses:               %s\n",
                                   YES_NO(event->l2_icache_misses));
+
+                       seq_printf(f, "raw_event_mask:                   %#x\n",
+                                  event->raw_event_mask);
                }
        }
 
index d30186e..aee99ef 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * drivers/misc/tegra-profiler/version.h
  *
- * Copyright (c) 2013-2016, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2013-2017, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -18,7 +18,7 @@
 #ifndef __QUADD_VERSION_H
 #define __QUADD_VERSION_H
 
-#define QUADD_MODULE_VERSION           "1.109"
+#define QUADD_MODULE_VERSION           "1.110"
 #define QUADD_MODULE_BRANCH            "Dev"
 
 #endif /* __QUADD_VERSION_H */
index 8405fa4..d71917f 100644 (file)
@@ -19,8 +19,8 @@
 
 #include <linux/ioctl.h>
 
-#define QUADD_SAMPLES_VERSION  37
-#define QUADD_IO_VERSION       21
+#define QUADD_SAMPLES_VERSION  38
+#define QUADD_IO_VERSION       22
 
 #define QUADD_IO_VERSION_DYNAMIC_RB            5
 #define QUADD_IO_VERSION_RB_MAX_FILL_COUNT     6
@@ -39,6 +39,7 @@
 #define QUADD_IO_VERSION_PER_CPU_SETUP         19
 #define QUADD_IO_VERSION_TRACE_ALL_TASKS       20
 #define QUADD_IO_VERSION_CB_POWER_OF_2         21
+#define QUADD_IO_VERSION_RAW_EVENTS            22
 
 #define QUADD_SAMPLE_VERSION_THUMB_MODE_FLAG   17
 #define QUADD_SAMPLE_VERSION_GROUP_SAMPLES     18
@@ -59,6 +60,7 @@
 #define QUADD_SAMPLE_VERSION_PER_CPU_SETUP     35
 #define QUADD_SAMPLE_VERSION_REPORT_TGID       36
 #define QUADD_SAMPLE_VERSION_MMAP_TS           37
+#define QUADD_SAMPLE_VERSION_RAW_EVENTS                38
 
 #define QUADD_MMAP_HEADER_VERSION              1
 
 #define QUADD_CPUMODE_THUMB                    (1 << 30)       /* thumb mode */
 
 enum quadd_events_id {
-       QUADD_EVENT_TYPE_CPU_CYCLES = 0,
+       QUADD_EVENT_HW_CPU_CYCLES = 0,
 
-       QUADD_EVENT_TYPE_INSTRUCTIONS,
-       QUADD_EVENT_TYPE_BRANCH_INSTRUCTIONS,
-       QUADD_EVENT_TYPE_BRANCH_MISSES,
-       QUADD_EVENT_TYPE_BUS_CYCLES,
+       QUADD_EVENT_HW_INSTRUCTIONS,
+       QUADD_EVENT_HW_BRANCH_INSTRUCTIONS,
+       QUADD_EVENT_HW_BRANCH_MISSES,
+       QUADD_EVENT_HW_BUS_CYCLES,
 
-       QUADD_EVENT_TYPE_L1_DCACHE_READ_MISSES,
-       QUADD_EVENT_TYPE_L1_DCACHE_WRITE_MISSES,
-       QUADD_EVENT_TYPE_L1_ICACHE_MISSES,
+       QUADD_EVENT_HW_L1_DCACHE_READ_MISSES,
+       QUADD_EVENT_HW_L1_DCACHE_WRITE_MISSES,
+       QUADD_EVENT_HW_L1_ICACHE_MISSES,
 
-       QUADD_EVENT_TYPE_L2_DCACHE_READ_MISSES,
-       QUADD_EVENT_TYPE_L2_DCACHE_WRITE_MISSES,
-       QUADD_EVENT_TYPE_L2_ICACHE_MISSES,
+       QUADD_EVENT_HW_L2_DCACHE_READ_MISSES,
+       QUADD_EVENT_HW_L2_DCACHE_WRITE_MISSES,
+       QUADD_EVENT_HW_L2_ICACHE_MISSES,
 
-       QUADD_EVENT_TYPE_MAX,
-};
-
-struct event_data {
-       int event_source;
-       int event_id;
-
-       u32 val;
-       u32 prev_val;
+       QUADD_EVENT_HW_MAX,
 };
 
 enum quadd_record_type {
@@ -414,6 +408,18 @@ enum {
 #define QUADD_PARAM_EXTRA_BT_DWARF             (1 << 7)
 #define QUADD_PARAM_EXTRA_PER_PMU_SETUP                (1 << 8)
 
+enum {
+       QUADD_EVENT_TYPE_RAW            = 0,
+       QUADD_EVENT_TYPE_HARDWARE       = 1,
+
+       QUADD_EVENT_TYPE_MAX,
+};
+
+struct quadd_event {
+       u32 type;
+       u32 id;
+};
+
 struct quadd_parameters {
        u32 freq;
        u32 ma_freq;
@@ -430,7 +436,7 @@ struct quadd_parameters {
 
        u8 package_name[QUADD_MAX_PACKAGE_NAME];
 
-       u32 events[QUADD_MAX_COUNTERS];
+       struct quadd_event events[QUADD_MAX_COUNTERS];
        u32 nr_events;
 
        u32 reserved[16];       /* reserved fields for future extensions */
@@ -438,7 +444,8 @@ struct quadd_parameters {
 
 struct quadd_pmu_setup_for_cpu {
        u32 cpuid;
-       u32 events[QUADD_MAX_COUNTERS];
+
+       struct quadd_event events[QUADD_MAX_COUNTERS];
        u32 nr_events;
 
        u32 reserved[16];
@@ -458,6 +465,8 @@ struct quadd_events_cap {
                l2_dcache_read_misses:1,
                l2_dcache_write_misses:1,
                l2_icache_misses:1;
+
+       u32 raw_event_mask;
 };
 
 enum {
@@ -493,7 +502,8 @@ struct quadd_comm_cap {
 struct quadd_comm_cap_for_cpu {
        u32     l2_cache:1,
                l2_multiple_events:1;
-       int cpuid;
+
+       u8 cpuid;
        struct quadd_events_cap events_cap;
 };