misc: tegra-profiler: add group samples
Igor Nabirushkin [Sun, 26 Jan 2014 13:59:25 +0000 (17:59 +0400)]
Group CPU cycles and cache misses samples.
To reduce the amount of data passed from the target to the host,
we can group samples that have a lot of common information.

Bug 1447582

Change-Id: I9b16bf4f18455ff6219fd58373eceba4cb71e352
Signed-off-by: Igor Nabirushkin <inabirushkin@nvidia.com>
Reviewed-on: http://git-master/r/365849
Reviewed-by: Automatic_Commit_Validation_User
Tested-by: Daniel Horowitz <dhorowitz@nvidia.com>
Reviewed-by: Gabby Lee <galee@nvidia.com>

19 files changed:
drivers/misc/tegra-profiler/armv7_pmu.c
drivers/misc/tegra-profiler/backtrace.c
drivers/misc/tegra-profiler/backtrace.h
drivers/misc/tegra-profiler/comm.c
drivers/misc/tegra-profiler/comm.h
drivers/misc/tegra-profiler/debug.c
drivers/misc/tegra-profiler/hrt.c
drivers/misc/tegra-profiler/hrt.h
drivers/misc/tegra-profiler/ma.c
drivers/misc/tegra-profiler/main.c
drivers/misc/tegra-profiler/mmap.c
drivers/misc/tegra-profiler/pl310.c
drivers/misc/tegra-profiler/power_clk.c
drivers/misc/tegra-profiler/power_clk.h
drivers/misc/tegra-profiler/quadd.h
drivers/misc/tegra-profiler/quadd_proc.c
drivers/misc/tegra-profiler/tegra.h
drivers/misc/tegra-profiler/version.h
include/linux/tegra_profiler.h

index 04436f8..9ca4e53 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * drivers/misc/tegra-profiler/armv7_pmu.c
  *
- * Copyright (c) 2013, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -301,9 +301,10 @@ static void pmu_stop(void)
        qm_debug_stop_source(QUADD_EVENT_SOURCE_PMU);
 }
 
-static int __maybe_unused pmu_read(struct event_data *events)
+static int __maybe_unused
+pmu_read(struct event_data *events, int max_events)
 {
-       int idx, i;
+       int idx, i, nr;
        u32 val;
        u32 *prevp = __get_cpu_var(pmu_prev_val);
 
@@ -312,7 +313,9 @@ static int __maybe_unused pmu_read(struct event_data *events)
                return 0;
        }
 
-       for (i = 0; i < pmu_ctx.nr_used_counters; i++) {
+       nr = min_t(int, pmu_ctx.nr_used_counters, max_events);
+
+       for (i = 0; i < nr; i++) {
                struct quadd_pmu_event_info *pmu_event = &pmu_ctx.pmu_events[i];
 
                idx = pmu_event->counter_idx;
@@ -332,16 +335,19 @@ static int __maybe_unused pmu_read(struct event_data *events)
                                      events[i].val);
        }
 
-       return pmu_ctx.nr_used_counters;
+       return nr;
 }
 
-static int __maybe_unused pmu_read_emulate(struct event_data *events)
+static int __maybe_unused
+pmu_read_emulate(struct event_data *events, int max_events)
 {
-       int i;
+       int i, nr;
        static u32 val = 100;
        u32 *prevp = __get_cpu_var(pmu_prev_val);
 
-       for (i = 0; i < pmu_ctx.nr_used_counters; i++) {
+       nr = min_t(int, pmu_ctx.nr_used_counters, max_events);
+
+       for (i = 0; i < nr; i++) {
                if (val > 200)
                        val = 100;
 
@@ -351,7 +357,7 @@ static int __maybe_unused pmu_read_emulate(struct event_data *events)
                val += 5;
        }
 
-       return pmu_ctx.nr_used_counters;
+       return nr;
 }
 
 static int set_events(int *events, int size)
@@ -414,17 +420,31 @@ static int set_events(int *events, int size)
        return 0;
 }
 
-static int get_supported_events(int *events)
+static int get_supported_events(int *events, int max_events)
 {
        int i, nr_events = 0;
 
-       for (i = 0; i < QUADD_EVENT_TYPE_MAX; i++) {
+       max_events = min_t(int, QUADD_EVENT_TYPE_MAX, max_events);
+
+       for (i = 0; i < max_events; i++) {
                if (pmu_ctx.current_map[i] != QUADD_ARMV7_UNSUPPORTED_EVENT)
                        events[nr_events++] = i;
        }
        return nr_events;
 }
 
+static int get_current_events(int *events, int max_events)
+{
+       int i;
+
+       max_events = min_t(int, pmu_ctx.nr_used_counters, max_events);
+
+       for (i = 0; i < max_events; i++)
+               events[i] = pmu_ctx.pmu_events[i].quadd_event_id;
+
+       return max_events;
+}
+
 static struct quadd_event_source_interface pmu_armv7_int = {
        .enable                 = pmu_enable,
        .disable                = pmu_disable,
@@ -439,6 +459,7 @@ static struct quadd_event_source_interface pmu_armv7_int = {
 #endif
        .set_events             = set_events,
        .get_supported_events   = get_supported_events,
+       .get_current_events     = get_current_events,
 };
 
 struct quadd_event_source_interface *quadd_armv7_pmu_init(void)
index 3191def..ce02f82 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * drivers/misc/tegra-profiler/backtrace.c
  *
- * Copyright (c) 2013, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
 #define QUADD_USER_SPACE_MIN_ADDR      0x8000
 
 static inline void
-quadd_callchain_store(struct quadd_callchain *callchain_data, u32 ip)
+quadd_callchain_store(struct quadd_callchain *callchain_data,
+                     quadd_bt_addr_t ip)
 {
-       if (callchain_data->nr < QUADD_MAX_STACK_DEPTH) {
-               /* pr_debug("[%d] Add entry: %#llx\n",
-                           callchain_data->nr, ip); */
+       if (callchain_data->nr < QUADD_MAX_STACK_DEPTH)
                callchain_data->callchain[callchain_data->nr++] = ip;
-       }
 }
 
 static int
index 82b55db..ce76082 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * drivers/misc/tegra-profiler/backtrace.h
  *
- * Copyright (c) 2013, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -23,7 +23,7 @@
 
 struct quadd_callchain {
        int nr;
-       u32 callchain[QUADD_MAX_STACK_DEPTH];
+       quadd_bt_addr_t callchain[QUADD_MAX_STACK_DEPTH];
 };
 
 unsigned int
index 35a4a8e..8d15fb1 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * drivers/misc/tegra-profiler/comm.c
  *
- * Copyright (c) 2013, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -24,6 +24,7 @@
 #include <linux/miscdevice.h>
 #include <linux/sched.h>
 #include <linux/poll.h>
+#include <linux/bitops.h>
 
 #include <linux/tegra_profiler.h>
 
@@ -136,10 +137,10 @@ rb_write(struct quadd_ring_buffer *rb, char *data, size_t length)
        return length;
 }
 
-static size_t rb_read_undo(struct quadd_ring_buffer *rb, size_t length)
+static ssize_t rb_read_undo(struct quadd_ring_buffer *rb, size_t length)
 {
        if (rb_get_free_space(rb) < length)
-               return 0;
+               return -EIO;
 
        if (rb->pos_read > length)
                rb->pos_read -= length;
@@ -174,7 +175,7 @@ static size_t rb_read(struct quadd_ring_buffer *rb, char *data, size_t length)
        return length;
 }
 
-static size_t
+static ssize_t
 rb_read_user(struct quadd_ring_buffer *rb, char __user *data, size_t length)
 {
        size_t new_pos_read, chunk1;
@@ -186,23 +187,17 @@ rb_read_user(struct quadd_ring_buffer *rb, char __user *data, size_t length)
 
        if (new_pos_read < rb->pos_read) {
                chunk1 = rb->size - rb->pos_read;
-               if (copy_to_user(data, rb->buf + rb->pos_read, chunk1)) {
-                       pr_err_once("Error: copy_to_user\n");
-                       return 0;
-               }
+               if (copy_to_user(data, rb->buf + rb->pos_read, chunk1))
+                       return -EFAULT;
 
                if (new_pos_read > 0) {
                        if (copy_to_user(data + chunk1, rb->buf,
-                                        new_pos_read)) {
-                               pr_err_once("Error: copy_to_user\n");
-                               return 0;
-                       }
+                                        new_pos_read))
+                               return -EFAULT;
                }
        } else {
-               if (copy_to_user(data, rb->buf + rb->pos_read, length)) {
-                       pr_err_once("Error: copy_to_user\n");
-                       return 0;
-               }
+               if (copy_to_user(data, rb->buf + rb->pos_read, length))
+                       return -EFAULT;
        }
 
        rb->pos_read = new_pos_read;
@@ -212,17 +207,22 @@ rb_read_user(struct quadd_ring_buffer *rb, char __user *data, size_t length)
 }
 
 static void
-write_sample(struct quadd_record_data *sample, void *extra_data,
-            size_t extra_length)
+write_sample(struct quadd_record_data *sample,
+            struct quadd_iovec *vec, int vec_count)
 {
+       int i;
        unsigned long flags;
        struct quadd_ring_buffer *rb = &comm_ctx.rb;
-       int length_sample = sizeof(struct quadd_record_data) + extra_length;
+       size_t length_sample;
+
+       length_sample = sizeof(struct quadd_record_data);
+       for (i = 0; i < vec_count; i++)
+               length_sample += vec[i].len;
 
        spin_lock_irqsave(&rb->lock, flags);
 
        if (length_sample > rb_get_free_space(rb)) {
-               pr_err_once("Error: Buffer overflowed, skip sample\n");
+               pr_err_once("Error: Buffer has been overflowed\n");
                spin_unlock_irqrestore(&rb->lock, flags);
                return;
        }
@@ -232,10 +232,10 @@ write_sample(struct quadd_record_data *sample, void *extra_data,
                return;
        }
 
-       if (extra_data && extra_length > 0) {
-               if (!rb_write(rb, extra_data, extra_length)) {
-                       pr_err_once("Buffer overflowed, skip sample\n");
+       for (i = 0; i < vec_count; i++) {
+               if (!rb_write(rb, vec[i].base, vec[i].len)) {
                        spin_unlock_irqrestore(&rb->lock, flags);
+                       pr_err_once("%s: error: ring buffer\n", __func__);
                        return;
                }
        }
@@ -248,55 +248,59 @@ write_sample(struct quadd_record_data *sample, void *extra_data,
        wake_up_interruptible(&comm_ctx.read_wait);
 }
 
-static int read_sample(char __user *buffer, size_t max_length)
+static ssize_t read_sample(char __user *buffer, size_t max_length)
 {
+       int retval = -EIO;
        unsigned long flags;
        struct quadd_ring_buffer *rb = &comm_ctx.rb;
        struct quadd_record_data record;
-       size_t length_extra = 0;
+       size_t length_extra = 0, nr_events;
+       struct quadd_sample_data *sample;
 
        spin_lock_irqsave(&rb->lock, flags);
 
        if (rb_is_empty(rb)) {
-               spin_unlock_irqrestore(&rb->lock, flags);
-               return 0;
+               retval = 0;
+               goto out;
        }
 
-       if (rb->fill_count < sizeof(struct quadd_record_data)) {
-               pr_err_once("Error: data\n");
-               spin_unlock_irqrestore(&rb->lock, flags);
-               return 0;
-       }
+       if (rb->fill_count < sizeof(struct quadd_record_data))
+               goto out;
 
-       if (!rb_read(rb, (char *)&record, sizeof(struct quadd_record_data))) {
-               pr_err_once("Error: read sample\n");
-               spin_unlock_irqrestore(&rb->lock, flags);
-               return 0;
-       }
+       if (!rb_read(rb, (char *)&record, sizeof(struct quadd_record_data)))
+               goto out;
 
        if (record.magic != QUADD_RECORD_MAGIC) {
-               pr_err_once("Bad magic: %#x\n", record.magic);
-               spin_unlock_irqrestore(&rb->lock, flags);
-               return 0;
+               pr_err("Error: bad magic: %#x\n", record.magic);
+               goto out;
        }
 
        switch (record.record_type) {
        case QUADD_RECORD_TYPE_SAMPLE:
-               length_extra = record.sample.callchain_nr *
-                                       sizeof(record.sample.ip);
+               sample = &record.sample;
+               length_extra = sample->callchain_nr * sizeof(quadd_bt_addr_t);
+
+               nr_events = __sw_hweight32(record.sample.events_flags);
+               length_extra += nr_events * sizeof(u32);
                break;
 
        case QUADD_RECORD_TYPE_MMAP:
                if (record.mmap.filename_length > 0) {
                        length_extra = record.mmap.filename_length;
                } else {
-                       length_extra = 0;
-                       pr_err_once("Error: filename\n");
+                       pr_err("Error: filename is empty\n");
+                       goto out;
                }
                break;
 
-       case QUADD_RECORD_TYPE_DEBUG:
        case QUADD_RECORD_TYPE_HEADER:
+               length_extra = record.hdr.nr_events * sizeof(u32);
+               break;
+
+       case QUADD_RECORD_TYPE_DEBUG:
+               length_extra = record.debug.extra_length;
+               break;
+
        case QUADD_RECORD_TYPE_MA:
                length_extra = 0;
                break;
@@ -310,50 +314,49 @@ static int read_sample(char __user *buffer, size_t max_length)
                break;
 
        default:
-               pr_err_once("Error: Unknown sample: %u\n", record.record_type);
-               spin_unlock_irqrestore(&rb->lock, flags);
-               return 0;
+               goto out;
        }
 
        if (sizeof(struct quadd_record_data) + length_extra > max_length) {
-               if (!rb_read_undo(rb, sizeof(struct quadd_record_data)))
-                       pr_err_once("Error: rb_read_undo\n");
-               spin_unlock_irqrestore(&rb->lock, flags);
-               return 0;
-       }
+               retval = rb_read_undo(rb, sizeof(struct quadd_record_data));
+               if (retval < 0)
+                       goto out;
 
-       if (length_extra > rb_get_free_space(rb)) {
-               pr_err_once("Error: Incompleted sample\n");
-               spin_unlock_irqrestore(&rb->lock, flags);
-               return 0;
+               retval = 0;
+               goto out;
        }
 
-       if (copy_to_user(buffer, &record, sizeof(struct quadd_record_data))) {
-               pr_err_once("Error: copy_to_user\n");
-               spin_unlock_irqrestore(&rb->lock, flags);
-               return 0;
-       }
+       if (length_extra > rb->fill_count)
+               goto out;
+
+       if (copy_to_user(buffer, &record, sizeof(struct quadd_record_data)))
+               goto out_fault_error;
 
        if (length_extra > 0) {
-               if (!rb_read_user(rb, buffer + sizeof(struct quadd_record_data),
-                                 length_extra)) {
-                       pr_err_once("Error: copy_to_user\n");
-                       spin_unlock_irqrestore(&rb->lock, flags);
-                       return 0;
-               }
+               retval = rb_read_user(rb, buffer + sizeof(record),
+                                     length_extra);
+               if (retval <= 0)
+                       goto out;
        }
 
        spin_unlock_irqrestore(&rb->lock, flags);
        return sizeof(struct quadd_record_data) + length_extra;
+
+out_fault_error:
+       retval = -EFAULT;
+
+out:
+       spin_unlock_irqrestore(&rb->lock, flags);
+       return retval;
 }
 
-static void put_sample(struct quadd_record_data *data, char *extra_data,
-                      unsigned int extra_length)
+static void put_sample(struct quadd_record_data *data,
+                      struct quadd_iovec *vec, int vec_count)
 {
        if (!atomic_read(&comm_ctx.active))
                return;
 
-       write_sample(data, extra_data, extra_length);
+       write_sample(data, vec, vec_count);
 }
 
 static void comm_reset(void)
@@ -460,11 +463,17 @@ device_read(struct file *filp,
 
        if (!atomic_read(&comm_ctx.active)) {
                mutex_unlock(&comm_ctx.io_mutex);
-               return -1;
+               return -EPIPE;
        }
 
        while (was_read + sizeof(struct quadd_record_data) < length) {
                res = read_sample(buffer + was_read, length - was_read);
+               if (res < 0) {
+                       mutex_unlock(&comm_ctx.io_mutex);
+                       pr_err("Error: data is corrupted\n");
+                       return res;
+               }
+
                if (res == 0)
                        break;
 
index a19319d..1bed2d9 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * drivers/misc/tegra-profiler/comm.h
  *
- * Copyright (c) 2013, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -35,6 +35,11 @@ struct quadd_ring_buffer {
        size_t max_fill_count;
 };
 
+struct quadd_iovec {
+       void *base;
+       size_t len;
+};
+
 struct quadd_parameters;
 
 struct quadd_comm_control_interface {
@@ -47,8 +52,8 @@ struct quadd_comm_control_interface {
 };
 
 struct quadd_comm_data_interface {
-       void (*put_sample)(struct quadd_record_data *data, char *extra_data,
-                          unsigned int extra_length);
+       void (*put_sample)(struct quadd_record_data *data,
+                          struct quadd_iovec *vec, int vec_count);
        void (*reset)(void);
        int (*is_active)(void);
 };
index fbe95b5..b7acd94 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * drivers/misc/tegra-profiler/debug.c
  *
- * Copyright (c) 2013, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
 #include "debug.h"
 #include "hrt.h"
 #include "tegra.h"
+#include "comm.h"
 
 #ifdef QM_DEBUG_SAMPLES_ENABLE
 
 static inline void
 init_sample(struct quadd_record_data *record, struct pt_regs *regs)
 {
+       unsigned int flags;
        struct quadd_debug_data *s = &record->debug;
 
        record->magic = QUADD_RECORD_MAGIC;
@@ -37,19 +39,24 @@ init_sample(struct quadd_record_data *record, struct pt_regs *regs)
                regs = get_irq_regs();
 
        if (!regs)
-               record->cpu_mode = QUADD_CPU_MODE_NONE;
+               s->user_mode = 0;
        else
-               record->cpu_mode = user_mode(regs) ?
-                       QUADD_CPU_MODE_USER : QUADD_CPU_MODE_KERNEL;
+               s->user_mode = user_mode(regs) ? 1 : 0;
+
+       s->cpu = quadd_get_processor_id(regs, &flags);
+
+       s->lp_mode = flags & QUADD_CPUMODE_TEGRA_POWER_CLUSTER_LP ? 1 : 0;
+       s->thumb_mode = flags & QUADD_CPUMODE_THUMB ? 1 : 0;
+
+       s->reserved = 0;
 
-       s->cpu = quadd_get_processor_id(regs);
        s->pid = 0;
        s->time = quadd_get_time();
-       s->timer_period = 0;
 
-       s->extra_value1 = 0;
-       s->extra_value2 = 0;
-       s->extra_value3 = 0;
+       s->extra_value[0] = 0;
+       s->extra_value[1] = 0;
+
+       s->extra_length = 0;
 }
 
 void qm_debug_handler_sample(struct pt_regs *regs)
@@ -72,7 +79,6 @@ void qm_debug_timer_forward(struct pt_regs *regs, u64 period)
        init_sample(&record, regs);
 
        s->type = QM_DEBUG_SAMPLE_TYPE_TIMER_FORWARD;
-       s->timer_period = period;
 
        quadd_put_sample(&record, NULL, 0);
 }
@@ -85,7 +91,6 @@ void qm_debug_timer_start(struct pt_regs *regs, u64 period)
        init_sample(&record, regs);
 
        s->type = QM_DEBUG_SAMPLE_TYPE_TIMER_START;
-       s->timer_period = period;
 
        quadd_put_sample(&record, NULL, 0);
 }
@@ -105,6 +110,7 @@ void qm_debug_timer_cancel(void)
 void
 qm_debug_task_sched_in(pid_t prev_pid, pid_t current_pid, int prev_nr_active)
 {
+       struct quadd_iovec vec;
        struct quadd_record_data record;
        struct quadd_debug_data *s = &record.debug;
 
@@ -112,15 +118,18 @@ qm_debug_task_sched_in(pid_t prev_pid, pid_t current_pid, int prev_nr_active)
 
        s->type = QM_DEBUG_SAMPLE_TYPE_SCHED_IN;
 
-       s->extra_value1 = prev_pid;
-       s->extra_value2 = current_pid;
-       s->extra_value3 = prev_nr_active;
+       s->extra_value[0] = prev_pid;
+       s->extra_value[1] = current_pid;
 
-       quadd_put_sample(&record, NULL, 0);
+       vec.base = &prev_nr_active;
+       vec.len = s->extra_length = sizeof(prev_nr_active);
+
+       quadd_put_sample(&record, &vec, 1);
 }
 
 void qm_debug_read_counter(int event_id, u32 prev_val, u32 val)
 {
+       struct quadd_iovec vec;
        struct quadd_record_data record;
        struct quadd_debug_data *s = &record.debug;
 
@@ -128,11 +137,13 @@ void qm_debug_read_counter(int event_id, u32 prev_val, u32 val)
 
        s->type = QM_DEBUG_SAMPLE_TYPE_READ_COUNTER;
 
-       s->extra_value1 = event_id;
-       s->extra_value2 = prev_val;
-       s->extra_value3 = val;
+       s->extra_value[0] = event_id;
+       s->extra_value[1] = prev_val;
 
-       quadd_put_sample(&record, NULL, 0);
+       vec.base = &val;
+       vec.len = s->extra_length = sizeof(val);
+
+       quadd_put_sample(&record, &vec, 1);
 }
 
 void qm_debug_start_source(int source_type)
@@ -143,7 +154,7 @@ void qm_debug_start_source(int source_type)
        init_sample(&record, NULL);
 
        s->type = QM_DEBUG_SAMPLE_TYPE_SOURCE_START;
-       s->extra_value1 = source_type;
+       s->extra_value[0] = source_type;
 
        quadd_put_sample(&record, NULL, 0);
 }
@@ -156,7 +167,7 @@ void qm_debug_stop_source(int source_type)
        init_sample(&record, NULL);
 
        s->type = QM_DEBUG_SAMPLE_TYPE_SOURCE_STOP;
-       s->extra_value1 = source_type;
+       s->extra_value[0] = source_type;
 
        quadd_put_sample(&record, NULL, 0);
 }
index fef37de..04fcf13 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * drivers/misc/tegra-profiler/hrt.c
  *
- * Copyright (c) 2013, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -46,6 +46,11 @@ static void sample_time_prepare(void);
 static void sample_time_finish(void);
 static void sample_time_reset(struct quadd_cpu_context *cpu_ctx);
 
+struct hrt_event_value {
+       int event_id;
+       u32 value;
+};
+
 static enum hrtimer_restart hrtimer_handler(struct hrtimer *hrtimer)
 {
        struct pt_regs *regs;
@@ -142,16 +147,21 @@ static void sample_time_reset(struct quadd_cpu_context *cpu_ctx)
 
 static void put_header(void)
 {
-       int power_rate_period;
+       int nr_events = 0, max_events = QUADD_MAX_COUNTERS;
+       unsigned int events[QUADD_MAX_COUNTERS];
        struct quadd_record_data record;
        struct quadd_header_data *hdr = &record.hdr;
        struct quadd_parameters *param = &hrt.quadd_ctx->param;
-       struct quadd_comm_data_interface *comm = hrt.quadd_ctx->comm;
+       unsigned int extra = param->reserved[QUADD_PARAM_IDX_EXTRA];
+       struct quadd_iovec vec;
+       struct quadd_ctx *ctx = hrt.quadd_ctx;
+       struct quadd_event_source_interface *pmu = ctx->pmu;
+       struct quadd_event_source_interface *pl310 = ctx->pl310;
 
        record.magic = QUADD_RECORD_MAGIC;
        record.record_type = QUADD_RECORD_TYPE_HEADER;
-       record.cpu_mode = QUADD_CPU_MODE_NONE;
 
+       hdr->magic = QUADD_HEADER_MAGIC;
        hdr->version = QUADD_SAMPLES_VERSION;
 
        hdr->backtrace = param->backtrace;
@@ -165,152 +175,190 @@ static void put_header(void)
        hdr->debug_samples = 0;
 #endif
 
-       hdr->period = hrt.sample_period;
-       hdr->ma_period = hrt.ma_period;
+       hdr->freq = param->freq;
+       hdr->ma_freq = param->ma_freq;
+       hdr->power_rate_freq = param->power_rate_freq;
+
+       hdr->power_rate = hdr->power_rate_freq > 0 ? 1 : 0;
+       hdr->get_mmap = (extra & QUADD_PARAM_IDX_EXTRA_GET_MMAP) ? 1 : 0;
+
+       hdr->reserved = 0;
+       hdr->extra_length = 0;
 
-       hdr->power_rate = quadd_power_clk_is_enabled(&power_rate_period);
-       hdr->power_rate_period = power_rate_period;
+       if (pmu)
+               nr_events += pmu->get_current_events(events, max_events);
 
-       comm->put_sample(&record, NULL, 0);
+       if (pl310)
+               nr_events += pl310->get_current_events(events + nr_events,
+                                                      max_events - nr_events);
+
+       hdr->nr_events = nr_events;
+
+       vec.base = events;
+       vec.len = nr_events * sizeof(events[0]);
+
+       quadd_put_sample(&record, &vec, 1);
 }
 
 void quadd_put_sample(struct quadd_record_data *data,
-                     char *extra_data, unsigned int extra_length)
+                     struct quadd_iovec *vec, int vec_count)
 {
        struct quadd_comm_data_interface *comm = hrt.quadd_ctx->comm;
 
-       if (data->record_type == QUADD_RECORD_TYPE_SAMPLE &&
-               data->sample.period > 0x7FFFFFFF) {
-               struct quadd_sample_data *sample = &data->sample;
-               pr_err_once("very big period, sample id: %d\n",
-                           sample->event_id);
-               return;
-       }
-
-       comm->put_sample(data, extra_data, extra_length);
+       comm->put_sample(data, vec, vec_count);
        atomic64_inc(&hrt.counter_samples);
 }
 
-static int get_sample_data(struct event_data *event,
-                          struct pt_regs *regs,
-                          struct quadd_sample_data *sample)
+static int get_sample_data(struct quadd_sample_data *sample,
+                          struct pt_regs *regs, pid_t pid)
 {
-       u32 period;
-       u32 prev_val, val;
+       unsigned int cpu, flags;
+       struct quadd_thread_data *t_data;
+       struct quadd_ctx *quadd_ctx = hrt.quadd_ctx;
+       struct quadd_cpu_context *cpu_ctx = this_cpu_ptr(hrt.cpu_ctx);
 
-       prev_val = event->prev_val;
-       val = event->val;
+       cpu = quadd_get_processor_id(regs, &flags);
+       sample->cpu = cpu;
 
-       sample->event_id = event->event_id;
+       sample->lp_mode =
+               (flags & QUADD_CPUMODE_TEGRA_POWER_CLUSTER_LP) ? 1 : 0;
+       sample->thumb_mode = (flags & QUADD_CPUMODE_THUMB) ? 1 : 0;
+       sample->user_mode = user_mode(regs) ? 1 : 0;
 
        sample->ip = instruction_pointer(regs);
-       sample->cpu = quadd_get_processor_id(regs);
-       sample->time = get_sample_time();
 
-       if (prev_val <= val)
-               period = val - prev_val;
+       /* For security reasons, hide IPs from the kernel space. */
+       if (!sample->user_mode && !quadd_ctx->collect_kernel_ips)
+               sample->ip = 0;
        else
-               period = QUADD_U32_MAX - prev_val + val;
+               sample->ip = instruction_pointer(regs);
+
+       sample->time = get_sample_time();
+       sample->reserved = 0;
 
-       if (event->event_source == QUADD_EVENT_SOURCE_PL310) {
-               int nr_current_active = atomic_read(&hrt.nr_active_all_core);
-               if (nr_current_active > 1)
-                       period = period / nr_current_active;
+       if (pid > 0) {
+               sample->pid = pid;
+       } else {
+               t_data = &cpu_ctx->active_thread;
+               sample->pid = t_data->pid;
        }
 
-       sample->period = period;
        return 0;
 }
 
-static void read_source(struct quadd_event_source_interface *source,
-                       struct pt_regs *regs, pid_t pid)
+static int read_source(struct quadd_event_source_interface *source,
+                      struct pt_regs *regs, pid_t pid,
+                      struct hrt_event_value *events_vals, int max_events)
 {
        int nr_events, i;
+       u32 prev_val, val, res_val;
        struct event_data events[QUADD_MAX_COUNTERS];
-       struct quadd_record_data record_data;
-       struct quadd_thread_data *t_data;
-       char *extra_data = NULL;
-       unsigned int extra_length = 0, callchain_nr = 0;
-       struct quadd_cpu_context *cpu_ctx = this_cpu_ptr(hrt.cpu_ctx);
-       struct quadd_callchain *callchain_data = &cpu_ctx->callchain_data;
-       struct quadd_ctx *quadd_ctx = hrt.quadd_ctx;
-       struct pt_regs *user_regs;
 
        if (!source)
-               return;
+               return 0;
 
-       nr_events = source->read(events);
+       max_events = min_t(int, max_events, QUADD_MAX_COUNTERS);
+       nr_events = source->read(events, max_events);
 
-       if (nr_events == 0 || nr_events > QUADD_MAX_COUNTERS) {
-               pr_err_once("Error number of counters: %d, source: %p\n",
-                               nr_events, source);
-               return;
-       }
-
-       if (atomic_read(&cpu_ctx->nr_active) == 0)
-               return;
+       for (i = 0; i < nr_events; i++) {
+               struct event_data *s = &events[i];
 
-       if (user_mode(regs))
-               user_regs = regs;
-       else
-               user_regs = task_pt_regs(current);
+               prev_val = s->prev_val;
+               val = s->val;
 
-       if (hrt.quadd_ctx->param.backtrace) {
-               callchain_nr =
-                       quadd_get_user_callchain(user_regs, callchain_data);
-               if (callchain_nr > 0) {
-                       extra_data = (char *)cpu_ctx->callchain_data.callchain;
-                       extra_length = callchain_nr * sizeof(u32);
-               }
-       }
+               if (prev_val <= val)
+                       res_val = val - prev_val;
+               else
+                       res_val = QUADD_U32_MAX - prev_val + val;
 
-       for (i = 0; i < nr_events; i++) {
-               if (get_sample_data(&events[i], regs, &record_data.sample))
-                       return;
-
-               record_data.magic = QUADD_RECORD_MAGIC;
-               record_data.record_type = QUADD_RECORD_TYPE_SAMPLE;
-               record_data.cpu_mode = user_mode(regs) ?
-                       QUADD_CPU_MODE_USER : QUADD_CPU_MODE_KERNEL;
-
-               /* For security reasons, hide IPs from the kernel space. */
-               if (record_data.cpu_mode == QUADD_CPU_MODE_KERNEL &&
-                   !quadd_ctx->collect_kernel_ips)
-                       record_data.sample.ip = 0;
-
-               if (pid > 0) {
-                       record_data.sample.pid = pid;
-               } else {
-                       t_data = &cpu_ctx->active_thread;
-                       record_data.sample.pid = t_data->pid;
+               if (s->event_source == QUADD_EVENT_SOURCE_PL310) {
+                       int nr_active = atomic_read(&hrt.nr_active_all_core);
+                       if (nr_active > 1)
+                               res_val = res_val / nr_active;
                }
 
-               if (i == 0) {
-                       record_data.sample.callchain_nr = callchain_nr;
-                       quadd_put_sample(&record_data, extra_data,
-                                        extra_length);
-               } else {
-                       record_data.sample.callchain_nr = 0;
-                       quadd_put_sample(&record_data, NULL, 0);
-               }
+               events_vals[i].event_id = s->event_id;
+               events_vals[i].value = res_val;
        }
+
+       return nr_events;
 }
 
 static void read_all_sources(struct pt_regs *regs, pid_t pid)
 {
+       int i, vec_idx = 0, bt_size = 0;
+       int nr_events = 0, nr_positive_events = 0;
+       struct pt_regs *user_regs;
+       struct quadd_iovec vec[2];
+       struct hrt_event_value events[QUADD_MAX_COUNTERS];
+       u32 events_extra[QUADD_MAX_COUNTERS];
+
+       struct quadd_record_data record_data;
+       struct quadd_sample_data *s = &record_data.sample;
+
        struct quadd_ctx *ctx = hrt.quadd_ctx;
        struct quadd_cpu_context *cpu_ctx = this_cpu_ptr(hrt.cpu_ctx);
+       struct quadd_callchain *cc_data = &cpu_ctx->callchain_data;
 
        if (!regs)
                return;
 
+       if (atomic_read(&cpu_ctx->nr_active) == 0)
+               return;
+
        quadd_get_mmap_object(cpu_ctx, regs, pid);
 
        if (ctx->pmu && ctx->pmu_info.active)
-               read_source(ctx->pmu, regs, pid);
+               nr_events += read_source(ctx->pmu, regs, pid,
+                                        events, QUADD_MAX_COUNTERS);
 
        if (ctx->pl310 && ctx->pl310_info.active)
-               read_source(ctx->pl310, regs, pid);
+               nr_events += read_source(ctx->pl310, regs, pid,
+                                        events + nr_events,
+                                        QUADD_MAX_COUNTERS - nr_events);
+
+       if (!nr_events)
+               return;
+
+       if (user_mode(regs))
+               user_regs = regs;
+       else
+               user_regs = task_pt_regs(current);
+
+       if (get_sample_data(s, regs, pid))
+               return;
+
+       if (ctx->param.backtrace) {
+               bt_size = quadd_get_user_callchain(user_regs, cc_data);
+               if (bt_size > 0) {
+                       vec[vec_idx].base = cc_data->callchain;
+                       vec[vec_idx].len =
+                               bt_size * sizeof(cc_data->callchain[0]);
+                       vec_idx++;
+               }
+       }
+       s->callchain_nr = bt_size;
+
+       record_data.magic = QUADD_RECORD_MAGIC;
+       record_data.record_type = QUADD_RECORD_TYPE_SAMPLE;
+
+       s->events_flags = 0;
+       for (i = 0; i < nr_events; i++) {
+               u32 value = events[i].value;
+               if (value > 0) {
+                       s->events_flags |= 1 << i;
+                       events_extra[nr_positive_events++] = value;
+               }
+       }
+
+       if (nr_positive_events == 0)
+               return;
+
+       vec[vec_idx].base = events_extra;
+       vec[vec_idx].len = nr_positive_events * sizeof(events_extra[0]);
+       vec_idx++;
+
+       quadd_put_sample(&record_data, vec, vec_idx);
 }
 
 static inline int is_profile_process(pid_t pid)
@@ -389,7 +437,7 @@ void __quadd_task_sched_in(struct task_struct *prev,
                                ctx->pmu->start();
 
                        if (ctx->pl310)
-                               ctx->pl310->read(events);
+                               ctx->pl310->read(events, 1);
 
                        start_hrtimer(cpu_ctx);
                        atomic_inc(&hrt.nr_active_all_core);
index 7eff30a..1e3eb72 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * drivers/misc/tegra-profiler/hrt.h
  *
- * Copyright (c) 2013, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -72,6 +72,7 @@ struct quadd_hrt_ctx {
 struct quadd_hrt_ctx;
 struct quadd_record_data;
 struct quadd_module_state;
+struct quadd_iovec;
 
 struct quadd_hrt_ctx *quadd_hrt_init(struct quadd_ctx *ctx);
 void quadd_hrt_deinit(void);
@@ -80,7 +81,7 @@ int quadd_hrt_start(void);
 void quadd_hrt_stop(void);
 
 void quadd_put_sample(struct quadd_record_data *data,
-                     char *extra_data, unsigned int extra_length);
+                     struct quadd_iovec *vec, int vec_count);
 
 void quadd_hrt_get_state(struct quadd_module_state *state);
 u64 quadd_get_time(void);
index d561a24..a1422cc 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * drivers/misc/tegra-profiler/ma.c
  *
- * Copyright (c) 2013, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -39,18 +39,13 @@ static void make_sample(struct quadd_hrt_ctx *hrt_ctx,
 
        record.magic = QUADD_RECORD_MAGIC;
        record.record_type = QUADD_RECORD_TYPE_MA;
-       record.cpu_mode = QUADD_CPU_MODE_NONE;
 
        ma->pid = pid;
        ma->time = quadd_get_time();
 
-       ma->vm_size = vm_size << PAGE_SHIFT;
-       ma->rss_size = rss_size << PAGE_SHIFT;
-/*
-       pr_debug("vm: %llu bytes (%llu mb), rss: %llu bytes (%llu mb)\n",
-               ma->vm_size, ma->vm_size / 0x100000,
-               ma->rss_size, ma->rss_size / 0x100000);
-*/
+       ma->vm_size = vm_size << (PAGE_SHIFT-10);
+       ma->rss_size = rss_size << (PAGE_SHIFT-10);
+
        comm->put_sample(&record, NULL, 0);
 }
 
index 2b9b8ad..7186e35 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * drivers/misc/tegra-profiler/main.c
  *
- * Copyright (c) 2013, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -303,8 +303,9 @@ static void get_capabilities(struct quadd_comm_cap *cap)
                                break;
 
                        default:
-                               BUG();
-                               break;
+                               pr_err_once("%s: error: invalid event\n",
+                                           __func__);
+                               return;
                        }
                }
        }
@@ -352,8 +353,9 @@ static void get_capabilities(struct quadd_comm_cap *cap)
                                break;
 
                        default:
-                               BUG();
-                               break;
+                               pr_err_once("%s: error: invalid event\n",
+                                           __func__);
+                               return;
                        }
                }
        }
@@ -364,6 +366,7 @@ static void get_capabilities(struct quadd_comm_cap *cap)
 
        extra |= QUADD_COMM_CAP_EXTRA_BT_KERNEL_CTX;
        extra |= QUADD_COMM_CAP_EXTRA_GET_MMAP;
+       extra |= QUADD_COMM_CAP_EXTRA_GROUP_SAMPLES;
 
        cap->reserved[QUADD_COMM_CAP_IDX_EXTRA] = extra;
 }
@@ -417,7 +420,8 @@ static int __init quadd_module_init(void)
                return -ENODEV;
        } else {
                events = ctx.pmu_info.supported_events;
-               nr_events = ctx.pmu->get_supported_events(events);
+               nr_events = ctx.pmu->get_supported_events(events,
+                                                         QUADD_MAX_COUNTERS);
                ctx.pmu_info.nr_supported_events = nr_events;
 
                pr_info("PMU: amount of events: %d\n", nr_events);
@@ -430,7 +434,8 @@ static int __init quadd_module_init(void)
        ctx.pl310 = quadd_l2x0_events_init();
        if (ctx.pl310) {
                events = ctx.pl310_info.supported_events;
-               nr_events = ctx.pl310->get_supported_events(events);
+               nr_events = ctx.pl310->get_supported_events(events,
+                                                           QUADD_MAX_COUNTERS);
                ctx.pl310_info.nr_supported_events = nr_events;
 
                pr_info("pl310 success, amount of events: %d\n",
index cf6f408..bcec4b3 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * drivers/misc/tegra-profiler/mmap.c
  *
- * Copyright (c) 2013, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -27,6 +27,7 @@
 #include <linux/tegra_profiler.h>
 
 #include "mmap.h"
+#include "comm.h"
 #include "hrt.h"
 #include "debug.h"
 
@@ -122,22 +123,25 @@ static int find_file(const char *file_name, unsigned long addr,
 }
 
 static void
-put_mmap_sample(struct quadd_mmap_data *s, char *extra_data,
-               size_t extra_length)
+put_mmap_sample(struct quadd_mmap_data *s, char *filename,
+               size_t length)
 {
        struct quadd_record_data r;
+       struct quadd_iovec vec;
 
        r.magic = QUADD_RECORD_MAGIC;
        r.record_type = QUADD_RECORD_TYPE_MMAP;
-       r.cpu_mode = QUADD_CPU_MODE_USER;
 
        memcpy(&r.mmap, s, sizeof(*s));
-       r.mmap.filename_length = extra_length;
+       r.mmap.filename_length = length;
 
-       pr_debug("MMAP: pid: %d, file_name: '%s', addr: %#x, length: %u",
-                s->pid, extra_data, s->addr, extra_length);
+       vec.base = filename;
+       vec.len = length;
 
-       quadd_put_sample(&r, extra_data, extra_length);
+       pr_debug("MMAP: pid: %u, file_name: '%s', addr: %#llx, length: %llu",
+                s->pid, filename, s->addr, s->len);
+
+       quadd_put_sample(&r, &vec, 1);
 }
 
 void quadd_get_mmap_object(struct quadd_cpu_context *cpu_ctx,
@@ -176,8 +180,7 @@ void quadd_get_mmap_object(struct quadd_cpu_context *cpu_ctx,
                        } else {
                                sample.addr = vma->vm_start;
                                sample.len = vma->vm_end - vma->vm_start;
-                               sample.pgoff =
-                                       (u64)vma->vm_pgoff << PAGE_SHIFT;
+                               sample.pgoff = vma->vm_pgoff;
                        }
                        break;
                }
@@ -262,12 +265,11 @@ int quadd_get_current_mmap(struct quadd_cpu_context *cpu_ctx, pid_t pid)
                sample.pid = pid;
                sample.addr = vma->vm_start;
                sample.len = vma->vm_end - vma->vm_start;
-               sample.pgoff = (u64)vma->vm_pgoff << PAGE_SHIFT;
+               sample.pgoff = vma->vm_pgoff;
 
                if (!find_file(file_name, sample.addr, sample.len)) {
                        strcpy(cpu_ctx->mmap_filename, file_name);
                        length_aligned = ALIGN(length, 8);
-
                        put_mmap_sample(&sample, file_name, length_aligned);
                }
        }
index 0108308..f2ad842 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * drivers/misc/tegra-profiler/pl310.c
  *
- * Copyright (c) 2013, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -176,14 +176,16 @@ static void l2x0_events_stop(void)
        qm_debug_stop_source(QUADD_EVENT_SOURCE_PL310);
 }
 
-static int __maybe_unused l2x0_events_read(struct event_data *events)
+static int __maybe_unused
+l2x0_events_read(struct event_data *events, int max_events)
 {
        unsigned long flags;
 
-       if (l2x0_ctx.l2x0_event_type < 0) {
-               pr_err_once("pl310 value: %u\n", events[0].val);
+       if (l2x0_ctx.l2x0_event_type < 0)
+               return 0;
+
+       if (max_events == 0)
                return 0;
-       }
 
        events[0].event_source = QUADD_EVENT_SOURCE_PL310;
        events[0].event_id = l2x0_ctx.event_id;
@@ -202,10 +204,14 @@ static int __maybe_unused l2x0_events_read(struct event_data *events)
        return 1;
 }
 
-static int __maybe_unused l2x0_events_read_emulate(struct event_data *events)
+static int __maybe_unused
+l2x0_events_read_emulate(struct event_data *events, int max_events)
 {
        static u32 val;
 
+       if (max_events == 0)
+               return 0;
+
        if (val > 100)
                val = 0;
 
@@ -256,14 +262,28 @@ static int l2x0_set_events(int *events, int size)
        return 0;
 }
 
-static int get_supported_events(int *events)
+static int get_supported_events(int *events, int max_events)
 {
+       if (max_events < 3)
+               return 0;
+
        events[0] = QUADD_EVENT_TYPE_L2_DCACHE_READ_MISSES;
        events[1] = QUADD_EVENT_TYPE_L2_DCACHE_WRITE_MISSES;
        events[2] = QUADD_EVENT_TYPE_L2_ICACHE_MISSES;
+
        return 3;
 }
 
+static int get_current_events(int *events, int max_events)
+{
+       if (max_events == 0)
+               return 0;
+
+       *events = l2x0_ctx.event_id;
+
+       return 1;
+}
+
 static struct quadd_event_source_interface l2x0_int = {
        .enable                 = l2x0_events_enable,
        .disable                = l2x0_events_disable,
@@ -278,6 +298,7 @@ static struct quadd_event_source_interface l2x0_int = {
 #endif
        .set_events             = l2x0_set_events,
        .get_supported_events   = get_supported_events,
+       .get_current_events     = get_current_events,
 };
 
 struct quadd_event_source_interface *quadd_l2x0_events_init(void)
index b5b73f4..261cf87 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * drivers/misc/tegra-profiler/power_clk.c
  *
- * Copyright (c) 2013, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -110,7 +110,8 @@ static void read_source(struct power_clk_source *s)
                break;
 
        default:
-               BUG();
+               pr_err_once("%s: error: invalid power_clk type\n", __func__);
+               return;
        }
 
        mutex_unlock(&s->lock);
@@ -155,6 +156,7 @@ static void make_sample(void)
        int i;
        u32 extra_cpus[NR_CPUS];
        struct power_clk_source *s;
+       struct quadd_iovec vec;
 
        struct quadd_record_data record;
        struct quadd_power_rate_data *power_rate = &record.power_rate;
@@ -162,7 +164,6 @@ static void make_sample(void)
 
        record.magic = QUADD_RECORD_MAGIC;
        record.record_type = QUADD_RECORD_TYPE_POWER_RATE;
-       record.cpu_mode = QUADD_CPU_MODE_NONE;
 
        power_rate->time = quadd_get_time();
 
@@ -199,8 +200,10 @@ static void make_sample(void)
                 extra_cpus[0], extra_cpus[1], extra_cpus[2], extra_cpus[3],
                 power_rate->gpu, power_rate->emc);
 */
-       comm->put_sample(&record, (char *)extra_cpus,
-                        power_rate->nr_cpus * sizeof(extra_cpus[0]));
+       vec.base = extra_cpus;
+       vec.len = power_rate->nr_cpus * sizeof(extra_cpus[0]);
+
+       comm->put_sample(&record, &vec, 1);
 }
 
 static inline int is_data_changed(struct power_clk_source *s)
@@ -308,18 +311,6 @@ static void power_clk_timer(unsigned long data)
        add_timer(timer);
 }
 
-int quadd_power_clk_is_enabled(int *period)
-{
-       struct quadd_parameters *param = &power_ctx.quadd_ctx->param;
-
-       *period = power_ctx.period;
-
-       if (param->power_rate_freq == 0)
-               return 0;
-
-       return 1;
-}
-
 int quadd_power_clk_start(void)
 {
        struct power_clk_source *s;
index 6854687..eae519b 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * drivers/misc/tegra-profiler/power_clk.h
  *
- * Copyright (c) 2013, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -25,6 +25,4 @@ int quadd_power_clk_init(struct quadd_ctx *quadd_ctx);
 int quadd_power_clk_start(void);
 void quadd_power_clk_stop(void);
 
-int quadd_power_clk_is_enabled(int *period);
-
 #endif /* __QUADD_POWER_CLK_H */
index 41367c3..3815f61 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * drivers/misc/tegra-profiler/quadd.h
  *
- * Copyright (c) 2013, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -32,9 +32,10 @@ struct quadd_event_source_interface {
        void (*disable)(void);
        void (*start)(void);
        void (*stop)(void);
-       int (*read)(struct event_data *events);
+       int (*read)(struct event_data *events, int max_events);
        int (*set_events)(int *events, int size);
-       int (*get_supported_events)(int *events);
+       int (*get_supported_events)(int *events, int max_events);
+       int (*get_current_events)(int *events, int max_events);
 };
 
 struct source_info {
index 980a810..36201a7 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * drivers/misc/tegra-profiler/quadd_proc.c
  *
- * Copyright (c) 2013, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -58,48 +58,52 @@ static int show_capabilities(struct seq_file *f, void *offset)
        struct quadd_events_cap *event = &cap->events_cap;
        unsigned int extra = cap->reserved[QUADD_COMM_CAP_IDX_EXTRA];
 
-       seq_printf(f, "pmu:                           %s\n",
+       seq_printf(f, "pmu:                            %s\n",
                   YES_NO(cap->pmu));
-       seq_printf(f, "tegra 3 LP cluster:            %s\n",
+       seq_printf(f, "tegra 3 LP cluster:             %s\n",
                   YES_NO(cap->tegra_lp_cluster));
-       seq_printf(f, "power rate samples:            %s\n",
+       seq_printf(f, "power rate samples:             %s\n",
                   YES_NO(cap->power_rate));
 
-       seq_printf(f, "l2 cache:                      %s\n",
+       seq_printf(f, "l2 cache:                       %s\n",
                   YES_NO(cap->l2_cache));
        if (cap->l2_cache) {
-               seq_printf(f, "multiple l2 events:            %s\n",
+               seq_printf(f, "multiple l2 events:             %s\n",
                           YES_NO(cap->l2_multiple_events));
        }
 
-       seq_printf(f, "support polling mode:          %s\n",
+       seq_printf(f, "support polling mode:           %s\n",
                   YES_NO(cap->blocked_read));
-       seq_printf(f, "backtrace from the kernel ctx: %s\n",
+       seq_printf(f, "backtrace from the kernel ctx:  %s\n",
                   YES_NO(extra & QUADD_COMM_CAP_EXTRA_BT_KERNEL_CTX));
+       seq_printf(f, "send mmap regions at the start: %s\n",
+                  YES_NO(extra & QUADD_COMM_CAP_EXTRA_GET_MMAP));
+       seq_printf(f, "group samples:                  %s\n",
+                  YES_NO(extra & QUADD_COMM_CAP_EXTRA_GROUP_SAMPLES));
 
        seq_puts(f, "\n");
        seq_puts(f, "Supported events:\n");
-       seq_printf(f, "cpu_cycles:                    %s\n",
+       seq_printf(f, "cpu_cycles:                     %s\n",
                   YES_NO(event->cpu_cycles));
-       seq_printf(f, "instructions:                  %s\n",
+       seq_printf(f, "instructions:                   %s\n",
                   YES_NO(event->instructions));
-       seq_printf(f, "branch_instructions:           %s\n",
+       seq_printf(f, "branch_instructions:            %s\n",
                   YES_NO(event->branch_instructions));
-       seq_printf(f, "branch_misses:                 %s\n",
+       seq_printf(f, "branch_misses:                  %s\n",
                   YES_NO(event->branch_misses));
-       seq_printf(f, "bus_cycles:                    %s\n",
+       seq_printf(f, "bus_cycles:                     %s\n",
                   YES_NO(event->bus_cycles));
-       seq_printf(f, "l1_dcache_read_misses:         %s\n",
+       seq_printf(f, "l1_dcache_read_misses:          %s\n",
                   YES_NO(event->l1_dcache_read_misses));
-       seq_printf(f, "l1_dcache_write_misses:        %s\n",
+       seq_printf(f, "l1_dcache_write_misses:         %s\n",
                   YES_NO(event->l1_dcache_write_misses));
-       seq_printf(f, "l1_icache_misses:              %s\n",
+       seq_printf(f, "l1_icache_misses:               %s\n",
                   YES_NO(event->l1_icache_misses));
-       seq_printf(f, "l2_dcache_read_misses:         %s\n",
+       seq_printf(f, "l2_dcache_read_misses:          %s\n",
                   YES_NO(event->l2_dcache_read_misses));
-       seq_printf(f, "l2_dcache_write_misses:        %s\n",
+       seq_printf(f, "l2_dcache_write_misses:         %s\n",
                   YES_NO(event->l2_dcache_write_misses));
-       seq_printf(f, "l2_icache_misses:              %s\n",
+       seq_printf(f, "l2_icache_misses:               %s\n",
                   YES_NO(event->l2_icache_misses));
 
        return 0;
index 65cd6cd..fd7fc31 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * drivers/misc/tegra-profiler/tegra.h
  *
- * Copyright (c) 2013, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
 #include <../../mach-tegra/pm.h>
 #endif
 
-static inline int quadd_get_processor_id(struct pt_regs *regs)
+static inline unsigned int
+quadd_get_processor_id(struct pt_regs *regs, unsigned int *flags)
 {
-       int cpu_id = smp_processor_id();
+       if (flags) {
+               *flags = 0;
 
 #ifdef CONFIG_TEGRA_CLUSTER_CONTROL
-       if (is_lp_cluster())
-               cpu_id |= QUADD_CPUMODE_TEGRA_POWER_CLUSTER_LP;
+               if (is_lp_cluster())
+                       *flags |= QUADD_CPUMODE_TEGRA_POWER_CLUSTER_LP;
 #endif
 
-       if (thumb_mode(regs))
-               cpu_id |= QUADD_CPUMODE_THUMB;
+               if (regs && thumb_mode(regs))
+                       *flags |= QUADD_CPUMODE_THUMB;
+       }
 
-       return cpu_id;
+       return smp_processor_id();
 }
 
 static inline int quadd_is_cpu_with_lp_cluster(void)
index 3dddadb..b6b9a9c 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * drivers/misc/tegra-profiler/hrt.h
  *
- * Copyright (c) 2013, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -18,7 +18,7 @@
 #ifndef __QUADD_VERSION_H
 #define __QUADD_VERSION_H
 
-#define QUADD_MODULE_VERSION           "1.40"
+#define QUADD_MODULE_VERSION           "1.41"
 #define QUADD_MODULE_BRANCH            "Dev"
 
 #endif /* __QUADD_VERSION_H */
index b122b97..e3f90a2 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * include/linux/tegra_profiler.h
  *
- * Copyright (c) 2013, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -19,7 +19,7 @@
 
 #include <linux/ioctl.h>
 
-#define QUADD_SAMPLES_VERSION  17
+#define QUADD_SAMPLES_VERSION  18
 #define QUADD_IO_VERSION       9
 
 #define QUADD_IO_VERSION_DYNAMIC_RB            5
@@ -29,6 +29,7 @@
 #define QUADD_IO_VERSION_GET_MMAP              9
 
 #define QUADD_SAMPLE_VERSION_THUMB_MODE_FLAG   17
+#define QUADD_SAMPLE_VERSION_GROUP_SAMPLES     18
 
 #define QUADD_MAX_COUNTERS     32
 #define QUADD_MAX_PROCESS      64
@@ -93,8 +94,6 @@ enum quadd_events_id {
        QUADD_EVENT_TYPE_MAX,
 };
 
-#pragma pack(push, 4)
-
 struct event_data {
        int event_source;
        int event_id;
@@ -125,49 +124,56 @@ enum quadd_cpu_mode {
        QUADD_CPU_MODE_NONE,
 };
 
-struct quadd_sample_data {
-       u32 event_id;
+typedef u32 quadd_bt_addr_t;
+
+#pragma pack(push, 1)
 
-       u32 ip;
+struct quadd_sample_data {
+       u64 ip;
        u32 pid;
        u64 time;
-       u32 cpu;
-       u64 period;
 
-       u32 callchain_nr;
+       u16     cpu:6,
+               user_mode:1,
+               lp_mode:1,
+               thumb_mode:1,
+               reserved:7;
+
+       u8 callchain_nr;
+       u32 events_flags;
 };
 
 struct quadd_mmap_data {
        u32 pid;
-       u32 addr;
+       u64 addr;
        u64 len;
-       u64 pgoff;
+       u32 pgoff;
 
-       u32 filename_length;
+       u16 filename_length;
 };
 
 struct quadd_ma_data {
        u32 pid;
        u64 time;
 
-       u64 vm_size;
-       u64 rss_size;
+       u32 vm_size;
+       u32 rss_size;
 };
 
 struct quadd_power_rate_data {
        u64 time;
 
-       u32 nr_cpus;
+       u8 nr_cpus;
 
        u32 gpu;
        u32 emc;
 };
 
 struct quadd_additional_sample {
-       u32 type;
+       u8 type;
 
-       u32 values[8];
-       u32 extra_length;
+       u32 values[6];
+       u16 extra_length;
 };
 
 enum {
@@ -186,42 +192,48 @@ enum {
 };
 
 struct quadd_debug_data {
-       u32 type;
+       u8 type;
 
-       u32 cpu;
        u32 pid;
        u64 time;
 
-       u64 timer_period;
+       u16     cpu:6,
+               user_mode:1,
+               lp_mode:1,
+               thumb_mode:1,
+               reserved:7;
 
-       u32 extra_value1;
-       u32 extra_value2;
-       u32 extra_value3;
+       u32 extra_value[2];
+       u16 extra_length;
 };
 
+#define QUADD_HEADER_MAGIC     0x1122
 
 struct quadd_header_data {
-       u32 version;
+       u16 magic;
+       u16 version;
 
        u32     backtrace:1,
                use_freq:1,
                system_wide:1,
                power_rate:1,
-               debug_samples:1;
+               debug_samples:1,
+               get_mmap:1,
+               reserved:26;    /* reserved fields for future extensions */
 
-       u64 period;
-       u32 ma_period;
-       u32 power_rate_period;
+       u32 freq;
+       u16 ma_freq;
+       u16 power_rate_freq;
 
-       u32 reserved[4];        /* reserved fields for future extensions */
+       u8 nr_events;
+       u16 extra_length;
 };
 
-#define QUADD_RECORD_MAGIC     0x33557799
+#define QUADD_RECORD_MAGIC     0x335577aa
 
 struct quadd_record_data {
-       u32 magic;      /* for debug */
-       u32 record_type;
-       u32 cpu_mode;
+       u32 magic;      /* temporary, it will be removed later */
+       u8 record_type;
 
        union {
                struct quadd_sample_data        sample;
@@ -232,7 +244,9 @@ struct quadd_record_data {
                struct quadd_power_rate_data    power_rate;
                struct quadd_additional_sample  additional_sample;
        };
-};
+} __aligned(4);
+
+#pragma pack(4)
 
 #define QUADD_MAX_PACKAGE_NAME 320
 
@@ -286,6 +300,7 @@ enum {
 
 #define QUADD_COMM_CAP_EXTRA_BT_KERNEL_CTX     (1 << 0)
 #define QUADD_COMM_CAP_EXTRA_GET_MMAP          (1 << 1)
+#define QUADD_COMM_CAP_EXTRA_GROUP_SAMPLES     (1 << 2)
 
 struct quadd_comm_cap {
        u32     pmu:1,