perf: Rework/fix the whole read vs group stuff
Peter Zijlstra [Thu, 13 Aug 2009 09:47:53 +0000 (11:47 +0200)]
Replace PERF_SAMPLE_GROUP with PERF_SAMPLE_READ and introduce
PERF_FORMAT_GROUP to deal with group reads in a more generic
way.

This allows you to get group reads out of read() as well.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Corey J Ashford <cjashfor@us.ibm.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: stephane eranian <eranian@googlemail.com>
LKML-Reference: <20090813103655.117411814@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

include/linux/perf_counter.h
kernel/perf_counter.c

index 2b36afe..b53f700 100644 (file)
@@ -115,7 +115,7 @@ enum perf_counter_sample_format {
        PERF_SAMPLE_TID                         = 1U << 1,
        PERF_SAMPLE_TIME                        = 1U << 2,
        PERF_SAMPLE_ADDR                        = 1U << 3,
-       PERF_SAMPLE_GROUP                       = 1U << 4,
+       PERF_SAMPLE_READ                        = 1U << 4,
        PERF_SAMPLE_CALLCHAIN                   = 1U << 5,
        PERF_SAMPLE_ID                          = 1U << 6,
        PERF_SAMPLE_CPU                         = 1U << 7,
@@ -127,16 +127,32 @@ enum perf_counter_sample_format {
 };
 
 /*
- * Bits that can be set in attr.read_format to request that
- * reads on the counter should return the indicated quantities,
- * in increasing order of bit value, after the counter value.
+ * The format of the data returned by read() on a perf counter fd,
+ * as specified by attr.read_format:
+ *
+ * struct read_format {
+ *     { u64           value;
+ *       { u64         time_enabled; } && PERF_FORMAT_ENABLED
+ *       { u64         time_running; } && PERF_FORMAT_RUNNING
+ *       { u64         id;           } && PERF_FORMAT_ID
+ *     } && !PERF_FORMAT_GROUP
+ *
+ *     { u64           nr;
+ *       { u64         time_enabled; } && PERF_FORMAT_ENABLED
+ *       { u64         time_running; } && PERF_FORMAT_RUNNING
+ *       { u64         value;
+ *         { u64       id;           } && PERF_FORMAT_ID
+ *       }             cntr[nr];
+ *     } && PERF_FORMAT_GROUP
+ * };
  */
 enum perf_counter_read_format {
        PERF_FORMAT_TOTAL_TIME_ENABLED          = 1U << 0,
        PERF_FORMAT_TOTAL_TIME_RUNNING          = 1U << 1,
        PERF_FORMAT_ID                          = 1U << 2,
+       PERF_FORMAT_GROUP                       = 1U << 3,
 
-       PERF_FORMAT_MAX = 1U << 3,              /* non-ABI */
+       PERF_FORMAT_MAX = 1U << 4,              /* non-ABI */
 };
 
 #define PERF_ATTR_SIZE_VER0    64      /* sizeof first published struct */
@@ -343,10 +359,8 @@ enum perf_event_type {
         * struct {
         *      struct perf_event_header        header;
         *      u32                             pid, tid;
-        *      u64                             value;
-        *      { u64           time_enabled;   } && PERF_FORMAT_ENABLED
-        *      { u64           time_running;   } && PERF_FORMAT_RUNNING
-        *      { u64           parent_id;      } && PERF_FORMAT_ID
+        *
+        *      struct read_format              values;
         * };
         */
        PERF_EVENT_READ                 = 8,
@@ -364,11 +378,22 @@ enum perf_event_type {
         *      { u32                   cpu, res; } && PERF_SAMPLE_CPU
         *      { u64                   period;   } && PERF_SAMPLE_PERIOD
         *
-        *      { u64                   nr;
-        *        { u64 id, val; }      cnt[nr];  } && PERF_SAMPLE_GROUP
+        *      { struct read_format    values;   } && PERF_SAMPLE_READ
         *
         *      { u64                   nr,
         *        u64                   ips[nr];  } && PERF_SAMPLE_CALLCHAIN
+        *
+        *      #
+        *      # The RAW record below is opaque data wrt the ABI
+        *      #
+        *      # That is, the ABI doesn't make any promises wrt to
+        *      # the stability of its content, it may vary depending
+        *      # on event, hardware, kernel version and phase of
+        *      # the moon.
+        *      #
+        *      # In other words, PERF_SAMPLE_RAW contents are not an ABI.
+        *      #
+        *
         *      { u32          &nb