tegra-profiler: non-disruptive counter tracking
Igor Nabirushkin [Mon, 27 Jan 2014 08:42:30 +0000 (12:42 +0400)]
Tegra Profiler: do not affect the already used counters

Bug 1447839

Change-Id: I4da319e6c2bc853a63c9e1ae4210be9b5a60cc3b
Signed-off-by: Igor Nabirushkin <inabirushkin@nvidia.com>
Reviewed-on: http://git-master/r/360296
Reviewed-by: Automatic_Commit_Validation_User
Reviewed-by: Bharat Nihalani <bnihalani@nvidia.com>
Tested-by: Bharat Nihalani <bnihalani@nvidia.com>

drivers/misc/tegra-profiler/armv7_pmu.c
drivers/misc/tegra-profiler/armv7_pmu.h
drivers/misc/tegra-profiler/main.c
drivers/misc/tegra-profiler/version.h

index 63abd0f..5c94dba 100644 (file)
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
-#include <linux/module.h>
+#include <linux/err.h>
+#include <linux/bitmap.h>
+#include <linux/slab.h>
+
 #include <asm/cputype.h>
 #include <asm/pmu.h>
 
 
 static struct armv7_pmu_ctx pmu_ctx;
 
-DEFINE_PER_CPU(u32[QUADD_MAX_PMU_COUNTERS], pmu_prev_val);
+struct quadd_pmu_info {
+       DECLARE_BITMAP(used_cntrs, QUADD_MAX_PMU_COUNTERS);
+       u32 prev_vals[QUADD_MAX_PMU_COUNTERS];
+       int is_already_active;
+};
+
+struct quadd_cntrs_info {
+       int pcntrs;
+       int ccntr;
+
+       spinlock_t lock;
+};
+
+static DEFINE_PER_CPU(struct quadd_pmu_info, cpu_pmu_info);
 
 static unsigned quadd_armv7_a9_events_map[QUADD_EVENT_TYPE_MAX] = {
        [QUADD_EVENT_TYPE_INSTRUCTIONS] =
@@ -79,138 +95,238 @@ static unsigned quadd_armv7_a15_events_map[QUADD_EVENT_TYPE_MAX] = {
                                QUADD_ARMV7_UNSUPPORTED_EVENT,
 };
 
-static u32 armv7_pmu_pmnc_read(void)
+static inline u32
+armv7_pmu_pmnc_read(void)
 {
        u32 val;
        asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r"(val));
        return val;
 }
 
-static void armv7_pmu_pmnc_write(u32 val)
+static inline void
+armv7_pmu_pmnc_write(u32 val)
 {
-       val &= QUADD_ARMV7_PMNC_MASK;
-       asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r"(val));
+       /* Read Performance MoNitor Control (PMNC) register */
+       asm volatile("mcr p15, 0, %0, c9, c12, 0" : :
+                    "r"(val & QUADD_ARMV7_PMNC_MASK));
 }
 
-static void armv7_pmu_pmnc_enable_counter(int index)
+static inline u32
+armv7_pmu_cntens_read(void)
 {
        u32 val;
 
-       if (index == QUADD_ARMV7_CYCLE_COUNTER)
-               val = QUADD_ARMV7_CCNT;
-       else
-               val = 1 << index;
+       /* Read CouNT ENable Set (CNTENS) register */
+       asm volatile("mrc p15, 0, %0, c9, c12, 1" : "=r"(val));
+       return val;
+}
 
+static inline void
+armv7_pmu_cntens_write(u32 val)
+{
+       /* Write CouNT ENable Set (CNTENS) register */
        asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r" (val));
 }
 
-static void armv7_pmu_select_counter(unsigned int idx)
+static inline void
+armv7_pmu_cntenc_write(u32 val)
 {
-       u32 val;
+       /* Write CouNT ENable Clear (CNTENC) register */
+       asm volatile("mcr p15, 0, %0, c9, c12, 2" : : "r" (val));
+}
 
-       val = idx & QUADD_ARMV7_SELECT_MASK;
-       asm volatile("mcr p15, 0, %0, c9, c12, 5" : : "r" (val));
+static inline void
+armv7_pmu_pmnxsel_write(u32 val)
+{
+       /* Read Performance Counter SELection (PMNXSEL) register */
+       asm volatile("mcr p15, 0, %0, c9, c12, 5" : :
+                    "r" (val & QUADD_ARMV7_SELECT_MASK));
 }
 
-static u32 armv7_pmu_adjust_value(u32 value, int event_id)
+static inline u32
+armv7_pmu_ccnt_read(void)
 {
-       /*
-       * Cortex A8/A9: l1 cache performance counters
-       * don't differentiate between read and write data accesses/misses,
-       * so currently we are devided by two
-       */
-       if (pmu_ctx.l1_cache_rw &&
-           (pmu_ctx.arch == QUADD_ARM_CPU_TYPE_CORTEX_A8 ||
-           pmu_ctx.arch == QUADD_ARM_CPU_TYPE_CORTEX_A9) &&
-           (event_id == QUADD_EVENT_TYPE_L1_DCACHE_READ_MISSES ||
-           event_id == QUADD_EVENT_TYPE_L1_DCACHE_WRITE_MISSES)) {
-               return value / 2;
-       }
-       return value;
+       u32 val;
+
+       /* Read Cycle CouNT (CCNT) register */
+       asm volatile ("mrc p15, 0, %0, c9, c13, 0" : "=r"(val));
+       return val;
 }
 
-static u32 armv7_pmu_read_counter(int idx)
+static inline void
+armv7_pmu_ccnt_write(u32 val)
 {
-       u32 val = 0;
+       /* Write Cycle CouNT (CCNT) register */
+       asm volatile ("mcr p15, 0, %0, c9, c13, 0" : : "r"(val));
+}
 
-       if (idx == QUADD_ARMV7_CYCLE_COUNTER) {
-               /* Cycle count register (PMCCNTR) reading */
-               asm volatile ("MRC p15, 0, %0, c9, c13, 0" : "=r"(val));
-       } else {
-               /* counter selection*/
-               armv7_pmu_select_counter(idx);
-               /* event count register reading */
-               asm volatile ("MRC p15, 0, %0, c9, c13, 2" : "=r"(val));
-       }
+static inline u32
+armv7_pmu_pmcnt_read(void)
+{
+       u32 val;
 
+       /* Read Performance Monitor CouNT (PMCNTx) registers */
+       asm volatile ("mrc p15, 0, %0, c9, c13, 2" : "=r"(val));
        return val;
 }
 
-static __attribute__((unused)) void armv7_pmu_write_counter(int idx, u32 value)
+static inline void
+armv7_pmu_pmcnt_write(u32 val)
 {
-       if (idx == QUADD_ARMV7_CYCLE_COUNTER) {
-               /* Cycle count register (PMCCNTR) writing */
-               asm volatile("mcr p15, 0, %0, c9, c13, 0" : : "r" (value));
-       } else {
-               /* counter selection*/
-               armv7_pmu_select_counter(idx);
-               /* event count register writing */
-               asm volatile("mcr p15, 0, %0, c9, c13, 2" : : "r" (value));
-       }
+       /* Write Performance Monitor CouNT (PMCNTx) registers */
+       asm volatile ("mcr p15, 0, %0, c9, c13, 2" : : "r"(val));
 }
 
-static void armv7_pmu_event_select(u32 event)
+static inline void
+armv7_pmu_evtsel_write(u32 event)
 {
-       event &= QUADD_ARMV7_EVTSEL_MASK;
-       asm volatile("mcr p15, 0, %0, c9, c13, 1" : : "r" (event));
+       /* Write Event SELection (EVTSEL) register */
+       asm volatile("mcr p15, 0, %0, c9, c13, 1" : :
+                    "r" (event & QUADD_ARMV7_EVTSEL_MASK));
 }
 
-static __attribute__((unused)) void armv7_pmnc_enable_interrupt(int idx)
+static inline u32
+armv7_pmu_intens_read(void)
 {
        u32 val;
 
-       if (idx == QUADD_ARMV7_CYCLE_COUNTER)
-               val = QUADD_ARMV7_CCNT;
-       else
-               val = 1 << idx;
+       /* Read INTerrupt ENable Set (INTENS) register */
+       asm volatile ("mrc p15, 0, %0, c9, c14, 1" : "=r"(val));
+       return val;
+}
+
+static inline void
+armv7_pmu_intens_write(u32 val)
+{
+       /* Write INTerrupt ENable Set (INTENS) register */
+       asm volatile ("mcr p15, 0, %0, c9, c14, 1" : : "r"(val));
+}
+
+static inline void
+armv7_pmu_intenc_write(u32 val)
+{
+       /* Write INTerrupt ENable Clear (INTENC) register */
+       asm volatile ("mcr p15, 0, %0, c9, c14, 2" : : "r"(val));
+}
 
-       asm volatile("mcr p15, 0, %0, c9, c14, 1" : : "r" (val));
+static void enable_counter(int idx)
+{
+       armv7_pmu_cntens_write(1UL << idx);
 }
 
-static __attribute__((unused)) void armv7_pmnc_disable_interrupt(int idx)
+static void disable_counter(int idx)
+{
+       armv7_pmu_cntenc_write(1UL << idx);
+}
+
+static void select_counter(unsigned int counter)
+{
+       armv7_pmu_pmnxsel_write(counter);
+}
+
+static int is_pmu_enabled(void)
+{
+       u32 pmnc = armv7_pmu_pmnc_read();
+
+       if (pmnc & QUADD_ARMV7_PMNC_E) {
+               u32 cnten = armv7_pmu_cntens_read();
+               cnten &= pmu_ctx.counters_mask | QUADD_ARMV7_CCNT;
+               return cnten ? 1 : 0;
+       }
+
+       return 0;
+}
+
+static u32 read_counter(int idx)
 {
        u32 val;
 
-       if (idx == QUADD_ARMV7_CYCLE_COUNTER)
-               val = QUADD_ARMV7_CCNT;
-       else
-               val = 1 << idx;
+       if (idx == QUADD_ARMV7_CCNT_BIT) {
+               val = armv7_pmu_ccnt_read();
+       } else {
+               select_counter(idx);
+               val = armv7_pmu_pmcnt_read();
+       }
 
-       asm volatile("mcr p15, 0, %0, c9, c14, 2" : : "r" (val));
+       return val;
 }
 
-static void armv7_pmnc_disable_all_interrupts(void)
+static void write_counter(int idx, u32 value)
 {
-       u32 val = QUADD_ARMV7_CCNT | pmu_ctx.counters_mask;
+       if (idx == QUADD_ARMV7_CCNT_BIT) {
+               armv7_pmu_ccnt_write(value);
+       } else {
+               select_counter(idx);
+               armv7_pmu_pmcnt_write(value);
+       }
+}
+
+static int
+get_free_counters(unsigned long *bitmap, int nbits, int *ccntr)
+{
+       int cc;
+       u32 cntens;
+
+       cntens = armv7_pmu_cntens_read();
+       cntens = ~cntens & (pmu_ctx.counters_mask | QUADD_ARMV7_CCNT);
+
+       bitmap_zero(bitmap, nbits);
+       bitmap_copy(bitmap, (unsigned long *)&cntens,
+                   BITS_PER_BYTE * sizeof(u32));
+
+       cc = (cntens & QUADD_ARMV7_CCNT) ? 1 : 0;
+
+       if (ccntr)
+               *ccntr = cc;
+
+       return bitmap_weight(bitmap, BITS_PER_BYTE * sizeof(u32)) - cc;
+}
+
+static u32 armv7_pmu_adjust_value(u32 value, int event_id)
+{
+       /*
+       * Cortex A8/A9: l1 cache performance counters
+       * don't differentiate between read and write data accesses/misses,
+       * so currently we are devided by two
+       */
+       if (pmu_ctx.l1_cache_rw &&
+           (pmu_ctx.arch == QUADD_ARM_CPU_TYPE_CORTEX_A8 ||
+           pmu_ctx.arch == QUADD_ARM_CPU_TYPE_CORTEX_A9) &&
+           (event_id == QUADD_EVENT_TYPE_L1_DCACHE_READ_MISSES ||
+           event_id == QUADD_EVENT_TYPE_L1_DCACHE_WRITE_MISSES)) {
+               return value / 2;
+       }
+       return value;
+}
 
-       asm volatile("mcr p15, 0, %0, c9, c14, 2" : : "r" (val));
+static void __maybe_unused
+disable_interrupt(int idx)
+{
+       armv7_pmu_intenc_write(1UL << idx);
 }
 
-static void armv7_pmnc_reset_overflow_flags(void)
+static void
+disable_all_interrupts(void)
 {
        u32 val = QUADD_ARMV7_CCNT | pmu_ctx.counters_mask;
+       armv7_pmu_intenc_write(val);
+}
 
+static void
+armv7_pmnc_reset_overflow_flags(void)
+{
+       u32 val = QUADD_ARMV7_CCNT | pmu_ctx.counters_mask;
        asm volatile("mcr p15, 0, %0, c9, c12, 3" : : "r" (val));
 }
 
-static inline void select_event(unsigned int idx, unsigned int event)
+static void
+select_event(unsigned int idx, unsigned int event)
 {
-       /* counter selection */
-       armv7_pmu_select_counter(idx);
-       armv7_pmu_event_select(event);
+       select_counter(idx);
+       armv7_pmu_evtsel_write(event);
 }
 
-static inline void disable_all_counters(void)
+static void disable_all_counters(void)
 {
        u32 val;
 
@@ -218,9 +334,11 @@ static inline void disable_all_counters(void)
        val = armv7_pmu_pmnc_read();
        if (val & QUADD_ARMV7_PMNC_E)
                armv7_pmu_pmnc_write(val & ~QUADD_ARMV7_PMNC_E);
+
+       armv7_pmu_cntenc_write(QUADD_ARMV7_CCNT | pmu_ctx.counters_mask);
 }
 
-static inline void enable_all_counters(void)
+static void enable_all_counters(void)
 {
        u32 val;
 
@@ -230,13 +348,7 @@ static inline void enable_all_counters(void)
        armv7_pmu_pmnc_write(val);
 }
 
-static inline void quadd_init_pmu(void)
-{
-       armv7_pmnc_reset_overflow_flags();
-       armv7_pmnc_disable_all_interrupts();
-}
-
-static inline void reset_all_counters(void)
+static void reset_all_counters(void)
 {
        u32 val;
 
@@ -245,48 +357,123 @@ static inline void reset_all_counters(void)
        armv7_pmu_pmnc_write(val);
 }
 
+static void quadd_init_pmu(void)
+{
+       armv7_pmnc_reset_overflow_flags();
+       disable_all_interrupts();
+}
+
 static int pmu_enable(void)
 {
        return 0;
 }
 
+static void __pmu_disable(void *arg)
+{
+       struct quadd_pmu_info *pi = &__get_cpu_var(cpu_pmu_info);
+
+       if (!pi->is_already_active) {
+               pr_info("[%d] reset all counters\n",
+                       smp_processor_id());
+
+               disable_all_counters();
+               reset_all_counters();
+       } else {
+               int idx;
+
+               for_each_set_bit(idx, pi->used_cntrs, QUADD_MAX_PMU_COUNTERS) {
+                       pr_info("[%d] reset counter: %d\n",
+                               smp_processor_id(), idx);
+
+                       disable_counter(idx);
+                       write_counter(idx, 0);
+               }
+       }
+}
+
 static void pmu_disable(void)
 {
+       on_each_cpu(__pmu_disable, NULL, 1);
 }
 
 static void pmu_start(void)
 {
-       int i, idx;
+       int idx = 0, pcntrs, ccntr;
        u32 event;
-       u32 *prevp = __get_cpu_var(pmu_prev_val);
+       DECLARE_BITMAP(free_bitmap, QUADD_MAX_PMU_COUNTERS);
+       struct quadd_pmu_info *pi = &__get_cpu_var(cpu_pmu_info);
+       u32 *prevp = pi->prev_vals;
+       struct quadd_pmu_event_info *ei;
 
-       disable_all_counters();
-       quadd_init_pmu();
+       bitmap_zero(pi->used_cntrs, QUADD_MAX_PMU_COUNTERS);
 
-       for (i = 0; i < pmu_ctx.nr_used_counters; i++) {
-               struct quadd_pmu_event_info *pmu_event = &pmu_ctx.pmu_events[i];
+       if (is_pmu_enabled()) {
+               pi->is_already_active = 1;
+       } else {
+               disable_all_counters();
+               quadd_init_pmu();
+
+               pi->is_already_active = 0;
+       }
+
+       pcntrs = get_free_counters(free_bitmap, QUADD_MAX_PMU_COUNTERS, &ccntr);
+
+       list_for_each_entry(ei, &pmu_ctx.used_events, list) {
+               int index;
 
-               prevp[i] = 0;
+               *prevp++ = 0;
 
-               event = pmu_event->hw_value;
-               idx = pmu_event->counter_idx;
+               event = ei->hw_value;
 
-               if (idx != QUADD_ARMV7_CYCLE_COUNTER)
-                       select_event(idx, event);
+               if (ei->quadd_event_id == QUADD_EVENT_TYPE_CPU_CYCLES) {
+                       if (!ccntr) {
+                               pr_err_once("Error: cpu cycles counter is already occupied\n");
+                               return;
+                       }
+                       index = QUADD_ARMV7_CCNT_BIT;
+               } else {
+                       if (!pcntrs--) {
+                               pr_err_once("Error: too many performance events\n");
+                               return;
+                       }
+
+                       index = find_next_bit(free_bitmap,
+                                             QUADD_MAX_PMU_COUNTERS, idx);
+                       if (index >= QUADD_MAX_PMU_COUNTERS) {
+                               pr_err_once("Error: too many events\n");
+                               return;
+                       }
+                       idx = index + 1;
+                       select_event(index, event);
+               }
+               set_bit(index, pi->used_cntrs);
 
-               armv7_pmu_pmnc_enable_counter(idx);
+               write_counter(index, 0);
+               enable_counter(index);
        }
 
-       reset_all_counters();
-       enable_all_counters();
+       if (!pi->is_already_active) {
+               reset_all_counters();
+               enable_all_counters();
+       }
 
        qm_debug_start_source(QUADD_EVENT_SOURCE_PMU);
 }
 
 static void pmu_stop(void)
 {
-       reset_all_counters();
-       disable_all_counters();
+       int idx;
+       struct quadd_pmu_info *pi = &__get_cpu_var(cpu_pmu_info);
+
+       if (!pi->is_already_active) {
+               disable_all_counters();
+               reset_all_counters();
+       } else {
+               for_each_set_bit(idx, pi->used_cntrs, QUADD_MAX_PMU_COUNTERS) {
+                       disable_counter(idx);
+                       write_counter(idx, 0);
+               }
+       }
 
        qm_debug_stop_source(QUADD_EVENT_SOURCE_PMU);
 }
@@ -294,104 +481,185 @@ static void pmu_stop(void)
 static int __maybe_unused
 pmu_read(struct event_data *events, int max_events)
 {
-       int idx, i, nr;
        u32 val;
-       u32 *prevp = __get_cpu_var(pmu_prev_val);
+       int idx = 0, i = 0;
+       struct quadd_pmu_info *pi = &__get_cpu_var(cpu_pmu_info);
+       u32 *prevp = pi->prev_vals;
+       struct quadd_pmu_event_info *ei;
 
-       if (pmu_ctx.nr_used_counters == 0) {
-               pr_warn_once("error: counters were not initialized\n");
+       if (bitmap_empty(pi->used_cntrs, QUADD_MAX_PMU_COUNTERS)) {
+               pr_err_once("Error: counters were not initialized\n");
                return 0;
        }
 
-       nr = min_t(int, pmu_ctx.nr_used_counters, max_events);
+       list_for_each_entry(ei, &pmu_ctx.used_events, list) {
+               int index;
+
+               if (ei->quadd_event_id == QUADD_EVENT_TYPE_CPU_CYCLES) {
+                       if (!test_bit(QUADD_ARMV7_CCNT_BIT, pi->used_cntrs)) {
+                               pr_err_once("Error: ccntr is not used\n");
+                               return 0;
+                       }
+                       index = QUADD_ARMV7_CCNT_BIT;
+               } else {
+                       index = find_next_bit(pi->used_cntrs,
+                                             QUADD_MAX_PMU_COUNTERS, idx);
+                       idx = index + 1;
+
+                       if (index >= QUADD_MAX_PMU_COUNTERS) {
+                               pr_err_once("Error: perf counter is not used\n");
+                               return 0;
+                       }
+               }
 
-       for (i = 0; i < nr; i++) {
-               struct quadd_pmu_event_info *pmu_event = &pmu_ctx.pmu_events[i];
+               val = read_counter(index);
+               val = armv7_pmu_adjust_value(val, ei->quadd_event_id);
 
-               idx = pmu_event->counter_idx;
+               events->event_source = QUADD_EVENT_SOURCE_PMU;
+               events->event_id = ei->quadd_event_id;
 
-               val = armv7_pmu_read_counter(idx);
-               val = armv7_pmu_adjust_value(val, pmu_event->quadd_event_id);
+               events->val = val;
+               events->prev_val = *prevp;
 
-               events[i].event_source = QUADD_EVENT_SOURCE_PMU;
-               events[i].event_id = pmu_event->quadd_event_id;
+               *prevp = val;
 
-               events[i].val = val;
-               events[i].prev_val = prevp[i];
+               qm_debug_read_counter(events->event_id, events->prev_val,
+                                     events->val);
 
-               prevp[i] = val;
+               if (++i >= max_events)
+                       break;
 
-               qm_debug_read_counter(events[i].event_id, events[i].prev_val,
-                                     events[i].val);
+               events++;
+               prevp++;
        }
 
-       return nr;
+       return i;
 }
 
 static int __maybe_unused
 pmu_read_emulate(struct event_data *events, int max_events)
 {
-       int i, nr;
+       int i = 0;
        static u32 val = 100;
-       u32 *prevp = __get_cpu_var(pmu_prev_val);
+       struct quadd_pmu_info *pi = &__get_cpu_var(cpu_pmu_info);
+       u32 *prevp = pi->prev_vals;
+       struct quadd_pmu_event_info *ei;
 
-       nr = min_t(int, pmu_ctx.nr_used_counters, max_events);
-
-       for (i = 0; i < nr; i++) {
+       list_for_each_entry(ei, &pmu_ctx.used_events, list) {
                if (val > 200)
                        val = 100;
 
-               events[i].event_id = prevp[i];
-               events[i].val = val;
+               events->event_id = *prevp;
+               events->val = val;
 
+               *prevp = val;
                val += 5;
+
+               if (++i >= max_events)
+                       break;
+
+               events++;
+               prevp++;
        }
 
-       return nr;
+       return i;
+}
+
+static void __get_free_counters(void *arg)
+{
+       int pcntrs, ccntr;
+       DECLARE_BITMAP(free_bitmap, QUADD_MAX_PMU_COUNTERS);
+       struct quadd_cntrs_info *ci = arg;
+
+       pcntrs = get_free_counters(free_bitmap, QUADD_MAX_PMU_COUNTERS, &ccntr);
+
+       spin_lock(&ci->lock);
+
+       ci->pcntrs = min_t(int, pcntrs, ci->pcntrs);
+
+       if (!ccntr)
+               ci->ccntr = 0;
+
+       pr_info("[%d] pcntrs/ccntr: %d/%d, free_bitmap: %#lx\n",
+               smp_processor_id(), pcntrs, ccntr, free_bitmap[0]);
+
+       spin_unlock(&ci->lock);
+}
+
+static void free_events(struct list_head *head)
+{
+       struct quadd_pmu_event_info *entry, *next;
+
+       list_for_each_entry_safe(entry, next, head, list) {
+               list_del(&entry->list);
+               kfree(entry);
+       }
 }
 
 static int set_events(int *events, int size)
 {
-       int i, nr_l1_r = 0, nr_l1_w = 0, curr_idx = 0;
+       int free_pcntrs, err;
+       int i, nr_l1_r = 0, nr_l1_w = 0;
+       struct quadd_cntrs_info free_ci;
 
        pmu_ctx.l1_cache_rw = 0;
-       pmu_ctx.nr_used_counters = 0;
 
-       if (!events || size == 0)
-               return 0;
+       free_events(&pmu_ctx.used_events);
 
-       if (size > QUADD_MAX_PMU_COUNTERS) {
-               pr_err("Too many events (> %d)\n", QUADD_MAX_PMU_COUNTERS);
-               return -ENOSPC;
-       }
+       if (!events || !size)
+               return 0;
 
        if (!pmu_ctx.current_map) {
                pr_err("Invalid current_map\n");
                return -ENODEV;
        }
 
+       spin_lock_init(&free_ci.lock);
+       free_ci.pcntrs = QUADD_MAX_PMU_COUNTERS;
+       free_ci.ccntr = 1;
+
+       on_each_cpu(__get_free_counters, &free_ci, 1);
+
+       free_pcntrs = free_ci.pcntrs;
+       pr_info("free counters: pcntrs/ccntr: %d/%d\n",
+               free_pcntrs, free_ci.ccntr);
+
        for (i = 0; i < size; i++) {
-               struct quadd_pmu_event_info *pmu_event = &pmu_ctx.pmu_events[i];
+               struct quadd_pmu_event_info *ei;
 
                if (events[i] > QUADD_EVENT_TYPE_MAX) {
                        pr_err("Error event: %d\n", events[i]);
-                       return -EINVAL;
+                       err = -EINVAL;
+                       goto out_free;
                }
 
-               if (curr_idx >= pmu_ctx.nr_counters) {
-                       pr_err("Too many events (> %d)\n",
-                              pmu_ctx.nr_counters);
-                       return -ENOSPC;
+               ei = kzalloc(sizeof(*ei), GFP_KERNEL);
+               if (!ei) {
+                       err = -ENOMEM;
+                       goto out_free;
                }
 
+               INIT_LIST_HEAD(&ei->list);
+               list_add_tail(&ei->list, &pmu_ctx.used_events);
+
                if (events[i] == QUADD_EVENT_TYPE_CPU_CYCLES) {
-                       pmu_event->hw_value = QUADD_ARMV7_CPU_CYCLE_EVENT;
-                       pmu_event->counter_idx = QUADD_ARMV7_CYCLE_COUNTER;
+                       ei->hw_value = QUADD_ARMV7_CPU_CYCLE_EVENT;
+                       if (!free_ci.ccntr) {
+                               pr_err("Error: cpu cycles counter is already occupied\n");
+                               err = -EBUSY;
+                               goto out_free;
+                       }
                } else {
-                       pmu_event->hw_value = pmu_ctx.current_map[events[i]];
-                       pmu_event->counter_idx = curr_idx++;
+                       if (!free_pcntrs--) {
+                               pr_err("Error: too many performance events\n");
+                               err = -ENOSPC;
+                               goto out_free;
+                       }
+
+                       ei->hw_value = pmu_ctx.current_map[events[i]];
                }
-               pmu_event->quadd_event_id = events[i];
+
+               ei->quadd_event_id = events[i];
 
                if (events[i] == QUADD_EVENT_TYPE_L1_DCACHE_READ_MISSES)
                        nr_l1_r++;
@@ -400,14 +668,17 @@ static int set_events(int *events, int size)
 
                pr_info("Event has been added: id/pmu value: %s/%#x\n",
                        quadd_get_event_str(events[i]),
-                       pmu_event->hw_value);
+                       ei->hw_value);
        }
-       pmu_ctx.nr_used_counters = size;
 
        if (nr_l1_r > 0 && nr_l1_w > 0)
                pmu_ctx.l1_cache_rw = 1;
 
        return 0;
+
+out_free:
+       free_events(&pmu_ctx.used_events);
+       return err;
 }
 
 static int get_supported_events(int *events, int max_events)
@@ -425,14 +696,17 @@ static int get_supported_events(int *events, int max_events)
 
 static int get_current_events(int *events, int max_events)
 {
-       int i;
+       int i = 0;
+       struct quadd_pmu_event_info *ei;
 
-       max_events = min_t(int, pmu_ctx.nr_used_counters, max_events);
+       list_for_each_entry(ei, &pmu_ctx.used_events, list) {
+               events[i++] = ei->quadd_event_id;
 
-       for (i = 0; i < max_events; i++)
-               events[i] = pmu_ctx.pmu_events[i].quadd_event_id;
+               if (i >= max_events)
+                       break;
+       }
 
-       return max_events;
+       return i;
 }
 
 static struct quadd_event_source_interface pmu_armv7_int = {
@@ -461,22 +735,20 @@ struct quadd_event_source_interface *quadd_armv7_pmu_init(void)
        cpu_implementer = cpu_id >> 24;
        part_number = cpu_id & 0xFFF0;
 
-       if (cpu_implementer == QUADD_ARM_CPU_IMPLEMENTER) {
+       if (cpu_implementer == ARM_CPU_IMP_ARM) {
                switch (part_number) {
-               case QUADD_ARM_CPU_PART_NUMBER_CORTEX_A9:
+               case ARM_CPU_PART_CORTEX_A9:
                        pmu_ctx.arch = QUADD_ARM_CPU_TYPE_CORTEX_A9;
                        strcpy(pmu_ctx.arch_name, "Cortex A9");
-                       pmu_ctx.nr_counters = 6;
                        pmu_ctx.counters_mask =
                                QUADD_ARMV7_COUNTERS_MASK_CORTEX_A9;
                        pmu_ctx.current_map = quadd_armv7_a9_events_map;
                        pmu = &pmu_armv7_int;
                        break;
 
-               case QUADD_ARM_CPU_PART_NUMBER_CORTEX_A15:
+               case ARM_CPU_PART_CORTEX_A15:
                        pmu_ctx.arch = QUADD_ARM_CPU_TYPE_CORTEX_A15;
                        strcpy(pmu_ctx.arch_name, "Cortex A15");
-                       pmu_ctx.nr_counters = 6;
                        pmu_ctx.counters_mask =
                                QUADD_ARMV7_COUNTERS_MASK_CORTEX_A15;
                        pmu_ctx.current_map = quadd_armv7_a15_events_map;
@@ -486,13 +758,19 @@ struct quadd_event_source_interface *quadd_armv7_pmu_init(void)
                default:
                        pmu_ctx.arch = QUADD_ARM_CPU_TYPE_UNKNOWN;
                        strcpy(pmu_ctx.arch_name, "Unknown");
-                       pmu_ctx.nr_counters = 0;
                        pmu_ctx.current_map = NULL;
                        break;
                }
        }
 
-       pr_info("arch: %s, number of counters: %d\n",
-               pmu_ctx.arch_name, pmu_ctx.nr_counters);
+       INIT_LIST_HEAD(&pmu_ctx.used_events);
+
+       pr_info("arch: %s\n", pmu_ctx.arch_name);
+
        return pmu;
 }
+
+void quadd_armv7_pmu_deinit(void)
+{
+       free_events(&pmu_ctx.used_events);
+}
index 827fe42..dd38e53 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * drivers/misc/tegra-profiler/armv7_pmu.h
  *
- * Copyright (c) 2013, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -17,7 +17,7 @@
 #ifndef __ARMV7_PMU_H
 #define __ARMV7_PMU_H
 
-#define QUADD_ARM_CPU_IMPLEMENTER 0x41
+#include <linux/list.h>
 
 enum {
        QUADD_ARM_CPU_TYPE_UNKNOWN,
@@ -27,37 +27,32 @@ enum {
        QUADD_ARM_CPU_TYPE_CORTEX_A15,
 };
 
-#define QUADD_ARM_CPU_PART_NUMBER_CORTEX_A5    0xC050
-#define QUADD_ARM_CPU_PART_NUMBER_CORTEX_A8    0xC080
-#define QUADD_ARM_CPU_PART_NUMBER_CORTEX_A9    0xC090
-#define QUADD_ARM_CPU_PART_NUMBER_CORTEX_A15   0xC0F0
-
-
 #define QUADD_MAX_PMU_COUNTERS 32
 
 struct quadd_pmu_event_info {
        int quadd_event_id;
        int hw_value;
-       int counter_idx;
+
+       struct list_head list;
 };
 
 struct armv7_pmu_ctx {
        int arch;
        char arch_name[32];
 
-       int nr_counters;
        u32 counters_mask;
 
-       struct quadd_pmu_event_info pmu_events[QUADD_MAX_PMU_COUNTERS];
-       int nr_used_counters;
+       struct list_head used_events;
 
        int l1_cache_rw;
        int *current_map;
 };
 
+
 struct quadd_event_source_interface;
 
 extern struct quadd_event_source_interface *quadd_armv7_pmu_init(void);
+extern void quadd_armv7_pmu_deinit(void);
 
 /*
  * PMNC Register
@@ -80,10 +75,9 @@ extern struct quadd_event_source_interface *quadd_armv7_pmu_init(void);
 /* Mask for writable bits */
 #define        QUADD_ARMV7_PMNC_MASK           0x3f
 
-
-#define QUADD_ARMV7_CCNT               (1 << 31)       /* Cycle counter */
-
-#define QUADD_ARMV7_CYCLE_COUNTER      -1
+/* Cycle counter */
+#define QUADD_ARMV7_CCNT_BIT           31
+#define QUADD_ARMV7_CCNT               (1 << QUADD_ARMV7_CCNT_BIT)
 
 /*
  * CNTENS: counters enable reg
index 01544d0..c28748b 100644 (file)
@@ -518,6 +518,7 @@ static void __exit quadd_module_exit(void)
        quadd_comm_events_exit();
        quadd_auth_deinit();
        quadd_proc_deinit();
+       quadd_armv7_pmu_deinit();
 }
 
 module_init(quadd_module_init);
index 7260267..d4a6b60 100644 (file)
@@ -18,7 +18,7 @@
 #ifndef __QUADD_VERSION_H
 #define __QUADD_VERSION_H
 
-#define QUADD_MODULE_VERSION           "1.45"
+#define QUADD_MODULE_VERSION           "1.46"
 #define QUADD_MODULE_BRANCH            "Dev"
 
 #endif /* __QUADD_VERSION_H */