perf stat: Refactor aggregation code
[linux-3.10.git] / tools / perf / builtin-stat.c
1 /*
2  * builtin-stat.c
3  *
4  * Builtin stat command: Give a precise performance counters summary
5  * overview about any workload, CPU or specific PID.
6  *
7  * Sample output:
8
9    $ perf stat ./hackbench 10
10
11   Time: 0.118
12
13   Performance counter stats for './hackbench 10':
14
15        1708.761321 task-clock                #   11.037 CPUs utilized
16             41,190 context-switches          #    0.024 M/sec
17              6,735 CPU-migrations            #    0.004 M/sec
18             17,318 page-faults               #    0.010 M/sec
19      5,205,202,243 cycles                    #    3.046 GHz
20      3,856,436,920 stalled-cycles-frontend   #   74.09% frontend cycles idle
21      1,600,790,871 stalled-cycles-backend    #   30.75% backend  cycles idle
22      2,603,501,247 instructions              #    0.50  insns per cycle
23                                              #    1.48  stalled cycles per insn
24        484,357,498 branches                  #  283.455 M/sec
25          6,388,934 branch-misses             #    1.32% of all branches
26
27         0.154822978  seconds time elapsed
28
29  *
30  * Copyright (C) 2008-2011, Red Hat Inc, Ingo Molnar <mingo@redhat.com>
31  *
32  * Improvements and fixes by:
33  *
34  *   Arjan van de Ven <arjan@linux.intel.com>
35  *   Yanmin Zhang <yanmin.zhang@intel.com>
36  *   Wu Fengguang <fengguang.wu@intel.com>
37  *   Mike Galbraith <efault@gmx.de>
38  *   Paul Mackerras <paulus@samba.org>
39  *   Jaswinder Singh Rajput <jaswinder@kernel.org>
40  *
41  * Released under the GPL v2. (and only v2, not any later version)
42  */
43
44 #include "perf.h"
45 #include "builtin.h"
46 #include "util/util.h"
47 #include "util/parse-options.h"
48 #include "util/parse-events.h"
49 #include "util/event.h"
50 #include "util/evlist.h"
51 #include "util/evsel.h"
52 #include "util/debug.h"
53 #include "util/color.h"
54 #include "util/stat.h"
55 #include "util/header.h"
56 #include "util/cpumap.h"
57 #include "util/thread.h"
58 #include "util/thread_map.h"
59
60 #include <stdlib.h>
61 #include <sys/prctl.h>
62 #include <locale.h>
63
64 #define DEFAULT_SEPARATOR       " "
65 #define CNTR_NOT_SUPPORTED      "<not supported>"
66 #define CNTR_NOT_COUNTED        "<not counted>"
67
68 static void print_stat(int argc, const char **argv);
69 static void print_counter_aggr(struct perf_evsel *counter, char *prefix);
70 static void print_counter(struct perf_evsel *counter, char *prefix);
71 static void print_aggr(char *prefix);
72
73 static struct perf_evlist       *evsel_list;
74
75 static struct perf_target       target = {
76         .uid    = UINT_MAX,
77 };
78
79 enum aggr_mode {
80         AGGR_NONE,
81         AGGR_GLOBAL,
82         AGGR_SOCKET,
83 };
84
85 static int                      run_count                       =  1;
86 static bool                     no_inherit                      = false;
87 static bool                     scale                           =  true;
88 static enum aggr_mode           aggr_mode                       = AGGR_GLOBAL;
89 static pid_t                    child_pid                       = -1;
90 static bool                     null_run                        =  false;
91 static int                      detailed_run                    =  0;
92 static bool                     big_num                         =  true;
93 static int                      big_num_opt                     =  -1;
94 static const char               *csv_sep                        = NULL;
95 static bool                     csv_output                      = false;
96 static bool                     group                           = false;
97 static FILE                     *output                         = NULL;
98 static const char               *pre_cmd                        = NULL;
99 static const char               *post_cmd                       = NULL;
100 static bool                     sync_run                        = false;
101 static unsigned int             interval                        = 0;
102 static bool                     forever                         = false;
103 static struct timespec          ref_time;
104 static struct cpu_map           *aggr_map;
105 static int                      (*aggr_get_id)(struct cpu_map *m, int cpu);
106
107 static volatile int done = 0;
108
109 struct perf_stat {
110         struct stats      res_stats[3];
111 };
112
113 static inline void diff_timespec(struct timespec *r, struct timespec *a,
114                                  struct timespec *b)
115 {
116         r->tv_sec = a->tv_sec - b->tv_sec;
117         if (a->tv_nsec < b->tv_nsec) {
118                 r->tv_nsec = a->tv_nsec + 1000000000L - b->tv_nsec;
119                 r->tv_sec--;
120         } else {
121                 r->tv_nsec = a->tv_nsec - b->tv_nsec ;
122         }
123 }
124
125 static inline struct cpu_map *perf_evsel__cpus(struct perf_evsel *evsel)
126 {
127         return (evsel->cpus && !target.cpu_list) ? evsel->cpus : evsel_list->cpus;
128 }
129
130 static inline int perf_evsel__nr_cpus(struct perf_evsel *evsel)
131 {
132         return perf_evsel__cpus(evsel)->nr;
133 }
134
135 static void perf_evsel__reset_stat_priv(struct perf_evsel *evsel)
136 {
137         memset(evsel->priv, 0, sizeof(struct perf_stat));
138 }
139
140 static int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel)
141 {
142         evsel->priv = zalloc(sizeof(struct perf_stat));
143         return evsel->priv == NULL ? -ENOMEM : 0;
144 }
145
146 static void perf_evsel__free_stat_priv(struct perf_evsel *evsel)
147 {
148         free(evsel->priv);
149         evsel->priv = NULL;
150 }
151
152 static int perf_evsel__alloc_prev_raw_counts(struct perf_evsel *evsel)
153 {
154         void *addr;
155         size_t sz;
156
157         sz = sizeof(*evsel->counts) +
158              (perf_evsel__nr_cpus(evsel) * sizeof(struct perf_counts_values));
159
160         addr = zalloc(sz);
161         if (!addr)
162                 return -ENOMEM;
163
164         evsel->prev_raw_counts =  addr;
165
166         return 0;
167 }
168
169 static void perf_evsel__free_prev_raw_counts(struct perf_evsel *evsel)
170 {
171         free(evsel->prev_raw_counts);
172         evsel->prev_raw_counts = NULL;
173 }
174
175 static void perf_evlist__free_stats(struct perf_evlist *evlist)
176 {
177         struct perf_evsel *evsel;
178
179         list_for_each_entry(evsel, &evlist->entries, node) {
180                 perf_evsel__free_stat_priv(evsel);
181                 perf_evsel__free_counts(evsel);
182                 perf_evsel__free_prev_raw_counts(evsel);
183         }
184 }
185
186 static int perf_evlist__alloc_stats(struct perf_evlist *evlist, bool alloc_raw)
187 {
188         struct perf_evsel *evsel;
189
190         list_for_each_entry(evsel, &evlist->entries, node) {
191                 if (perf_evsel__alloc_stat_priv(evsel) < 0 ||
192                     perf_evsel__alloc_counts(evsel, perf_evsel__nr_cpus(evsel)) < 0 ||
193                     (alloc_raw && perf_evsel__alloc_prev_raw_counts(evsel) < 0))
194                         goto out_free;
195         }
196
197         return 0;
198
199 out_free:
200         perf_evlist__free_stats(evlist);
201         return -1;
202 }
203
204 static struct stats runtime_nsecs_stats[MAX_NR_CPUS];
205 static struct stats runtime_cycles_stats[MAX_NR_CPUS];
206 static struct stats runtime_stalled_cycles_front_stats[MAX_NR_CPUS];
207 static struct stats runtime_stalled_cycles_back_stats[MAX_NR_CPUS];
208 static struct stats runtime_branches_stats[MAX_NR_CPUS];
209 static struct stats runtime_cacherefs_stats[MAX_NR_CPUS];
210 static struct stats runtime_l1_dcache_stats[MAX_NR_CPUS];
211 static struct stats runtime_l1_icache_stats[MAX_NR_CPUS];
212 static struct stats runtime_ll_cache_stats[MAX_NR_CPUS];
213 static struct stats runtime_itlb_cache_stats[MAX_NR_CPUS];
214 static struct stats runtime_dtlb_cache_stats[MAX_NR_CPUS];
215 static struct stats walltime_nsecs_stats;
216
217 static void perf_stat__reset_stats(struct perf_evlist *evlist)
218 {
219         struct perf_evsel *evsel;
220
221         list_for_each_entry(evsel, &evlist->entries, node) {
222                 perf_evsel__reset_stat_priv(evsel);
223                 perf_evsel__reset_counts(evsel, perf_evsel__nr_cpus(evsel));
224         }
225
226         memset(runtime_nsecs_stats, 0, sizeof(runtime_nsecs_stats));
227         memset(runtime_cycles_stats, 0, sizeof(runtime_cycles_stats));
228         memset(runtime_stalled_cycles_front_stats, 0, sizeof(runtime_stalled_cycles_front_stats));
229         memset(runtime_stalled_cycles_back_stats, 0, sizeof(runtime_stalled_cycles_back_stats));
230         memset(runtime_branches_stats, 0, sizeof(runtime_branches_stats));
231         memset(runtime_cacherefs_stats, 0, sizeof(runtime_cacherefs_stats));
232         memset(runtime_l1_dcache_stats, 0, sizeof(runtime_l1_dcache_stats));
233         memset(runtime_l1_icache_stats, 0, sizeof(runtime_l1_icache_stats));
234         memset(runtime_ll_cache_stats, 0, sizeof(runtime_ll_cache_stats));
235         memset(runtime_itlb_cache_stats, 0, sizeof(runtime_itlb_cache_stats));
236         memset(runtime_dtlb_cache_stats, 0, sizeof(runtime_dtlb_cache_stats));
237         memset(&walltime_nsecs_stats, 0, sizeof(walltime_nsecs_stats));
238 }
239
240 static int create_perf_stat_counter(struct perf_evsel *evsel)
241 {
242         struct perf_event_attr *attr = &evsel->attr;
243
244         if (scale)
245                 attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
246                                     PERF_FORMAT_TOTAL_TIME_RUNNING;
247
248         attr->inherit = !no_inherit;
249
250         if (perf_target__has_cpu(&target))
251                 return perf_evsel__open_per_cpu(evsel, perf_evsel__cpus(evsel));
252
253         if (!perf_target__has_task(&target) &&
254             perf_evsel__is_group_leader(evsel)) {
255                 attr->disabled = 1;
256                 attr->enable_on_exec = 1;
257         }
258
259         return perf_evsel__open_per_thread(evsel, evsel_list->threads);
260 }
261
262 /*
263  * Does the counter have nsecs as a unit?
264  */
265 static inline int nsec_counter(struct perf_evsel *evsel)
266 {
267         if (perf_evsel__match(evsel, SOFTWARE, SW_CPU_CLOCK) ||
268             perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK))
269                 return 1;
270
271         return 0;
272 }
273
274 /*
275  * Update various tracking values we maintain to print
276  * more semantic information such as miss/hit ratios,
277  * instruction rates, etc:
278  */
279 static void update_shadow_stats(struct perf_evsel *counter, u64 *count)
280 {
281         if (perf_evsel__match(counter, SOFTWARE, SW_TASK_CLOCK))
282                 update_stats(&runtime_nsecs_stats[0], count[0]);
283         else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES))
284                 update_stats(&runtime_cycles_stats[0], count[0]);
285         else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND))
286                 update_stats(&runtime_stalled_cycles_front_stats[0], count[0]);
287         else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND))
288                 update_stats(&runtime_stalled_cycles_back_stats[0], count[0]);
289         else if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS))
290                 update_stats(&runtime_branches_stats[0], count[0]);
291         else if (perf_evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES))
292                 update_stats(&runtime_cacherefs_stats[0], count[0]);
293         else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1D))
294                 update_stats(&runtime_l1_dcache_stats[0], count[0]);
295         else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1I))
296                 update_stats(&runtime_l1_icache_stats[0], count[0]);
297         else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_LL))
298                 update_stats(&runtime_ll_cache_stats[0], count[0]);
299         else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_DTLB))
300                 update_stats(&runtime_dtlb_cache_stats[0], count[0]);
301         else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB))
302                 update_stats(&runtime_itlb_cache_stats[0], count[0]);
303 }
304
305 /*
306  * Read out the results of a single counter:
307  * aggregate counts across CPUs in system-wide mode
308  */
309 static int read_counter_aggr(struct perf_evsel *counter)
310 {
311         struct perf_stat *ps = counter->priv;
312         u64 *count = counter->counts->aggr.values;
313         int i;
314
315         if (__perf_evsel__read(counter, perf_evsel__nr_cpus(counter),
316                                thread_map__nr(evsel_list->threads), scale) < 0)
317                 return -1;
318
319         for (i = 0; i < 3; i++)
320                 update_stats(&ps->res_stats[i], count[i]);
321
322         if (verbose) {
323                 fprintf(output, "%s: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n",
324                         perf_evsel__name(counter), count[0], count[1], count[2]);
325         }
326
327         /*
328          * Save the full runtime - to allow normalization during printout:
329          */
330         update_shadow_stats(counter, count);
331
332         return 0;
333 }
334
335 /*
336  * Read out the results of a single counter:
337  * do not aggregate counts across CPUs in system-wide mode
338  */
339 static int read_counter(struct perf_evsel *counter)
340 {
341         u64 *count;
342         int cpu;
343
344         for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
345                 if (__perf_evsel__read_on_cpu(counter, cpu, 0, scale) < 0)
346                         return -1;
347
348                 count = counter->counts->cpu[cpu].values;
349
350                 update_shadow_stats(counter, count);
351         }
352
353         return 0;
354 }
355
356 static void print_interval(void)
357 {
358         static int num_print_interval;
359         struct perf_evsel *counter;
360         struct perf_stat *ps;
361         struct timespec ts, rs;
362         char prefix[64];
363
364         if (aggr_mode == AGGR_GLOBAL) {
365                 list_for_each_entry(counter, &evsel_list->entries, node) {
366                         ps = counter->priv;
367                         memset(ps->res_stats, 0, sizeof(ps->res_stats));
368                         read_counter_aggr(counter);
369                 }
370         } else  {
371                 list_for_each_entry(counter, &evsel_list->entries, node) {
372                         ps = counter->priv;
373                         memset(ps->res_stats, 0, sizeof(ps->res_stats));
374                         read_counter(counter);
375                 }
376         }
377
378         clock_gettime(CLOCK_MONOTONIC, &ts);
379         diff_timespec(&rs, &ts, &ref_time);
380         sprintf(prefix, "%6lu.%09lu%s", rs.tv_sec, rs.tv_nsec, csv_sep);
381
382         if (num_print_interval == 0 && !csv_output) {
383                 switch (aggr_mode) {
384                 case AGGR_SOCKET:
385                         fprintf(output, "#           time socket cpus             counts events\n");
386                         break;
387                 case AGGR_NONE:
388                         fprintf(output, "#           time CPU                 counts events\n");
389                         break;
390                 case AGGR_GLOBAL:
391                 default:
392                         fprintf(output, "#           time             counts events\n");
393                 }
394         }
395
396         if (++num_print_interval == 25)
397                 num_print_interval = 0;
398
399         switch (aggr_mode) {
400         case AGGR_SOCKET:
401                 print_aggr(prefix);
402                 break;
403         case AGGR_NONE:
404                 list_for_each_entry(counter, &evsel_list->entries, node)
405                         print_counter(counter, prefix);
406                 break;
407         case AGGR_GLOBAL:
408         default:
409                 list_for_each_entry(counter, &evsel_list->entries, node)
410                         print_counter_aggr(counter, prefix);
411         }
412 }
413
414 static int __run_perf_stat(int argc, const char **argv)
415 {
416         char msg[512];
417         unsigned long long t0, t1;
418         struct perf_evsel *counter;
419         struct timespec ts;
420         int status = 0;
421         const bool forks = (argc > 0);
422
423         if (interval) {
424                 ts.tv_sec  = interval / 1000;
425                 ts.tv_nsec = (interval % 1000) * 1000000;
426         } else {
427                 ts.tv_sec  = 1;
428                 ts.tv_nsec = 0;
429         }
430
431         if (forks) {
432                 if (perf_evlist__prepare_workload(evsel_list, &target, argv,
433                                                   false, false) < 0) {
434                         perror("failed to prepare workload");
435                         return -1;
436                 }
437         }
438
439         if (group)
440                 perf_evlist__set_leader(evsel_list);
441
442         list_for_each_entry(counter, &evsel_list->entries, node) {
443                 if (create_perf_stat_counter(counter) < 0) {
444                         /*
445                          * PPC returns ENXIO for HW counters until 2.6.37
446                          * (behavior changed with commit b0a873e).
447                          */
448                         if (errno == EINVAL || errno == ENOSYS ||
449                             errno == ENOENT || errno == EOPNOTSUPP ||
450                             errno == ENXIO) {
451                                 if (verbose)
452                                         ui__warning("%s event is not supported by the kernel.\n",
453                                                     perf_evsel__name(counter));
454                                 counter->supported = false;
455                                 continue;
456                         }
457
458                         perf_evsel__open_strerror(counter, &target,
459                                                   errno, msg, sizeof(msg));
460                         ui__error("%s\n", msg);
461
462                         if (child_pid != -1)
463                                 kill(child_pid, SIGTERM);
464
465                         return -1;
466                 }
467                 counter->supported = true;
468         }
469
470         if (perf_evlist__apply_filters(evsel_list)) {
471                 error("failed to set filter with %d (%s)\n", errno,
472                         strerror(errno));
473                 return -1;
474         }
475
476         /*
477          * Enable counters and exec the command:
478          */
479         t0 = rdclock();
480         clock_gettime(CLOCK_MONOTONIC, &ref_time);
481
482         if (forks) {
483                 perf_evlist__start_workload(evsel_list);
484
485                 if (interval) {
486                         while (!waitpid(child_pid, &status, WNOHANG)) {
487                                 nanosleep(&ts, NULL);
488                                 print_interval();
489                         }
490                 }
491                 wait(&status);
492                 if (WIFSIGNALED(status))
493                         psignal(WTERMSIG(status), argv[0]);
494         } else {
495                 while (!done) {
496                         nanosleep(&ts, NULL);
497                         if (interval)
498                                 print_interval();
499                 }
500         }
501
502         t1 = rdclock();
503
504         update_stats(&walltime_nsecs_stats, t1 - t0);
505
506         if (aggr_mode == AGGR_GLOBAL) {
507                 list_for_each_entry(counter, &evsel_list->entries, node) {
508                         read_counter_aggr(counter);
509                         perf_evsel__close_fd(counter, perf_evsel__nr_cpus(counter),
510                                              thread_map__nr(evsel_list->threads));
511                 }
512         } else {
513                 list_for_each_entry(counter, &evsel_list->entries, node) {
514                         read_counter(counter);
515                         perf_evsel__close_fd(counter, perf_evsel__nr_cpus(counter), 1);
516                 }
517         }
518
519         return WEXITSTATUS(status);
520 }
521
522 static int run_perf_stat(int argc __maybe_unused, const char **argv)
523 {
524         int ret;
525
526         if (pre_cmd) {
527                 ret = system(pre_cmd);
528                 if (ret)
529                         return ret;
530         }
531
532         if (sync_run)
533                 sync();
534
535         ret = __run_perf_stat(argc, argv);
536         if (ret)
537                 return ret;
538
539         if (post_cmd) {
540                 ret = system(post_cmd);
541                 if (ret)
542                         return ret;
543         }
544
545         return ret;
546 }
547
548 static void print_noise_pct(double total, double avg)
549 {
550         double pct = rel_stddev_stats(total, avg);
551
552         if (csv_output)
553                 fprintf(output, "%s%.2f%%", csv_sep, pct);
554         else if (pct)
555                 fprintf(output, "  ( +-%6.2f%% )", pct);
556 }
557
558 static void print_noise(struct perf_evsel *evsel, double avg)
559 {
560         struct perf_stat *ps;
561
562         if (run_count == 1)
563                 return;
564
565         ps = evsel->priv;
566         print_noise_pct(stddev_stats(&ps->res_stats[0]), avg);
567 }
568
569 static void aggr_printout(struct perf_evsel *evsel, int cpu, int nr)
570 {
571         switch (aggr_mode) {
572         case AGGR_SOCKET:
573                 fprintf(output, "S%*d%s%*d%s",
574                         csv_output ? 0 : -5,
575                         cpu,
576                         csv_sep,
577                         csv_output ? 0 : 4,
578                         nr,
579                         csv_sep);
580                         break;
581         case AGGR_NONE:
582                 fprintf(output, "CPU%*d%s",
583                         csv_output ? 0 : -4,
584                         perf_evsel__cpus(evsel)->map[cpu], csv_sep);
585                 break;
586         case AGGR_GLOBAL:
587         default:
588                 break;
589         }
590 }
591
592 static void nsec_printout(int cpu, int nr, struct perf_evsel *evsel, double avg)
593 {
594         double msecs = avg / 1e6;
595         const char *fmt = csv_output ? "%.6f%s%s" : "%18.6f%s%-25s";
596
597         aggr_printout(evsel, cpu, nr);
598
599         fprintf(output, fmt, msecs, csv_sep, perf_evsel__name(evsel));
600
601         if (evsel->cgrp)
602                 fprintf(output, "%s%s", csv_sep, evsel->cgrp->name);
603
604         if (csv_output || interval)
605                 return;
606
607         if (perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK))
608                 fprintf(output, " # %8.3f CPUs utilized          ",
609                         avg / avg_stats(&walltime_nsecs_stats));
610         else
611                 fprintf(output, "                                   ");
612 }
613
614 /* used for get_ratio_color() */
615 enum grc_type {
616         GRC_STALLED_CYCLES_FE,
617         GRC_STALLED_CYCLES_BE,
618         GRC_CACHE_MISSES,
619         GRC_MAX_NR
620 };
621
622 static const char *get_ratio_color(enum grc_type type, double ratio)
623 {
624         static const double grc_table[GRC_MAX_NR][3] = {
625                 [GRC_STALLED_CYCLES_FE] = { 50.0, 30.0, 10.0 },
626                 [GRC_STALLED_CYCLES_BE] = { 75.0, 50.0, 20.0 },
627                 [GRC_CACHE_MISSES]      = { 20.0, 10.0, 5.0 },
628         };
629         const char *color = PERF_COLOR_NORMAL;
630
631         if (ratio > grc_table[type][0])
632                 color = PERF_COLOR_RED;
633         else if (ratio > grc_table[type][1])
634                 color = PERF_COLOR_MAGENTA;
635         else if (ratio > grc_table[type][2])
636                 color = PERF_COLOR_YELLOW;
637
638         return color;
639 }
640
641 static void print_stalled_cycles_frontend(int cpu,
642                                           struct perf_evsel *evsel
643                                           __maybe_unused, double avg)
644 {
645         double total, ratio = 0.0;
646         const char *color;
647
648         total = avg_stats(&runtime_cycles_stats[cpu]);
649
650         if (total)
651                 ratio = avg / total * 100.0;
652
653         color = get_ratio_color(GRC_STALLED_CYCLES_FE, ratio);
654
655         fprintf(output, " #  ");
656         color_fprintf(output, color, "%6.2f%%", ratio);
657         fprintf(output, " frontend cycles idle   ");
658 }
659
660 static void print_stalled_cycles_backend(int cpu,
661                                          struct perf_evsel *evsel
662                                          __maybe_unused, double avg)
663 {
664         double total, ratio = 0.0;
665         const char *color;
666
667         total = avg_stats(&runtime_cycles_stats[cpu]);
668
669         if (total)
670                 ratio = avg / total * 100.0;
671
672         color = get_ratio_color(GRC_STALLED_CYCLES_BE, ratio);
673
674         fprintf(output, " #  ");
675         color_fprintf(output, color, "%6.2f%%", ratio);
676         fprintf(output, " backend  cycles idle   ");
677 }
678
679 static void print_branch_misses(int cpu,
680                                 struct perf_evsel *evsel __maybe_unused,
681                                 double avg)
682 {
683         double total, ratio = 0.0;
684         const char *color;
685
686         total = avg_stats(&runtime_branches_stats[cpu]);
687
688         if (total)
689                 ratio = avg / total * 100.0;
690
691         color = get_ratio_color(GRC_CACHE_MISSES, ratio);
692
693         fprintf(output, " #  ");
694         color_fprintf(output, color, "%6.2f%%", ratio);
695         fprintf(output, " of all branches        ");
696 }
697
698 static void print_l1_dcache_misses(int cpu,
699                                    struct perf_evsel *evsel __maybe_unused,
700                                    double avg)
701 {
702         double total, ratio = 0.0;
703         const char *color;
704
705         total = avg_stats(&runtime_l1_dcache_stats[cpu]);
706
707         if (total)
708                 ratio = avg / total * 100.0;
709
710         color = get_ratio_color(GRC_CACHE_MISSES, ratio);
711
712         fprintf(output, " #  ");
713         color_fprintf(output, color, "%6.2f%%", ratio);
714         fprintf(output, " of all L1-dcache hits  ");
715 }
716
717 static void print_l1_icache_misses(int cpu,
718                                    struct perf_evsel *evsel __maybe_unused,
719                                    double avg)
720 {
721         double total, ratio = 0.0;
722         const char *color;
723
724         total = avg_stats(&runtime_l1_icache_stats[cpu]);
725
726         if (total)
727                 ratio = avg / total * 100.0;
728
729         color = get_ratio_color(GRC_CACHE_MISSES, ratio);
730
731         fprintf(output, " #  ");
732         color_fprintf(output, color, "%6.2f%%", ratio);
733         fprintf(output, " of all L1-icache hits  ");
734 }
735
736 static void print_dtlb_cache_misses(int cpu,
737                                     struct perf_evsel *evsel __maybe_unused,
738                                     double avg)
739 {
740         double total, ratio = 0.0;
741         const char *color;
742
743         total = avg_stats(&runtime_dtlb_cache_stats[cpu]);
744
745         if (total)
746                 ratio = avg / total * 100.0;
747
748         color = get_ratio_color(GRC_CACHE_MISSES, ratio);
749
750         fprintf(output, " #  ");
751         color_fprintf(output, color, "%6.2f%%", ratio);
752         fprintf(output, " of all dTLB cache hits ");
753 }
754
755 static void print_itlb_cache_misses(int cpu,
756                                     struct perf_evsel *evsel __maybe_unused,
757                                     double avg)
758 {
759         double total, ratio = 0.0;
760         const char *color;
761
762         total = avg_stats(&runtime_itlb_cache_stats[cpu]);
763
764         if (total)
765                 ratio = avg / total * 100.0;
766
767         color = get_ratio_color(GRC_CACHE_MISSES, ratio);
768
769         fprintf(output, " #  ");
770         color_fprintf(output, color, "%6.2f%%", ratio);
771         fprintf(output, " of all iTLB cache hits ");
772 }
773
774 static void print_ll_cache_misses(int cpu,
775                                   struct perf_evsel *evsel __maybe_unused,
776                                   double avg)
777 {
778         double total, ratio = 0.0;
779         const char *color;
780
781         total = avg_stats(&runtime_ll_cache_stats[cpu]);
782
783         if (total)
784                 ratio = avg / total * 100.0;
785
786         color = get_ratio_color(GRC_CACHE_MISSES, ratio);
787
788         fprintf(output, " #  ");
789         color_fprintf(output, color, "%6.2f%%", ratio);
790         fprintf(output, " of all LL-cache hits   ");
791 }
792
793 static void abs_printout(int cpu, int nr, struct perf_evsel *evsel, double avg)
794 {
795         double total, ratio = 0.0;
796         const char *fmt;
797
798         if (csv_output)
799                 fmt = "%.0f%s%s";
800         else if (big_num)
801                 fmt = "%'18.0f%s%-25s";
802         else
803                 fmt = "%18.0f%s%-25s";
804
805         aggr_printout(evsel, cpu, nr);
806
807         if (aggr_mode == AGGR_GLOBAL)
808                 cpu = 0;
809
810         fprintf(output, fmt, avg, csv_sep, perf_evsel__name(evsel));
811
812         if (evsel->cgrp)
813                 fprintf(output, "%s%s", csv_sep, evsel->cgrp->name);
814
815         if (csv_output || interval)
816                 return;
817
818         if (perf_evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) {
819                 total = avg_stats(&runtime_cycles_stats[cpu]);
820                 if (total)
821                         ratio = avg / total;
822
823                 fprintf(output, " #   %5.2f  insns per cycle        ", ratio);
824
825                 total = avg_stats(&runtime_stalled_cycles_front_stats[cpu]);
826                 total = max(total, avg_stats(&runtime_stalled_cycles_back_stats[cpu]));
827
828                 if (total && avg) {
829                         ratio = total / avg;
830                         fprintf(output, "\n                                             #   %5.2f  stalled cycles per insn", ratio);
831                 }
832
833         } else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES) &&
834                         runtime_branches_stats[cpu].n != 0) {
835                 print_branch_misses(cpu, evsel, avg);
836         } else if (
837                 evsel->attr.type == PERF_TYPE_HW_CACHE &&
838                 evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_L1D |
839                                         ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
840                                         ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
841                         runtime_l1_dcache_stats[cpu].n != 0) {
842                 print_l1_dcache_misses(cpu, evsel, avg);
843         } else if (
844                 evsel->attr.type == PERF_TYPE_HW_CACHE &&
845                 evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_L1I |
846                                         ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
847                                         ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
848                         runtime_l1_icache_stats[cpu].n != 0) {
849                 print_l1_icache_misses(cpu, evsel, avg);
850         } else if (
851                 evsel->attr.type == PERF_TYPE_HW_CACHE &&
852                 evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_DTLB |
853                                         ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
854                                         ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
855                         runtime_dtlb_cache_stats[cpu].n != 0) {
856                 print_dtlb_cache_misses(cpu, evsel, avg);
857         } else if (
858                 evsel->attr.type == PERF_TYPE_HW_CACHE &&
859                 evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_ITLB |
860                                         ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
861                                         ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
862                         runtime_itlb_cache_stats[cpu].n != 0) {
863                 print_itlb_cache_misses(cpu, evsel, avg);
864         } else if (
865                 evsel->attr.type == PERF_TYPE_HW_CACHE &&
866                 evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_LL |
867                                         ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
868                                         ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
869                         runtime_ll_cache_stats[cpu].n != 0) {
870                 print_ll_cache_misses(cpu, evsel, avg);
871         } else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES) &&
872                         runtime_cacherefs_stats[cpu].n != 0) {
873                 total = avg_stats(&runtime_cacherefs_stats[cpu]);
874
875                 if (total)
876                         ratio = avg * 100 / total;
877
878                 fprintf(output, " # %8.3f %% of all cache refs    ", ratio);
879
880         } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) {
881                 print_stalled_cycles_frontend(cpu, evsel, avg);
882         } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) {
883                 print_stalled_cycles_backend(cpu, evsel, avg);
884         } else if (perf_evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) {
885                 total = avg_stats(&runtime_nsecs_stats[cpu]);
886
887                 if (total)
888                         ratio = 1.0 * avg / total;
889
890                 fprintf(output, " # %8.3f GHz                    ", ratio);
891         } else if (runtime_nsecs_stats[cpu].n != 0) {
892                 char unit = 'M';
893
894                 total = avg_stats(&runtime_nsecs_stats[cpu]);
895
896                 if (total)
897                         ratio = 1000.0 * avg / total;
898                 if (ratio < 0.001) {
899                         ratio *= 1000;
900                         unit = 'K';
901                 }
902
903                 fprintf(output, " # %8.3f %c/sec                  ", ratio, unit);
904         } else {
905                 fprintf(output, "                                   ");
906         }
907 }
908
909 static void print_aggr(char *prefix)
910 {
911         struct perf_evsel *counter;
912         int cpu, s, s2, id, nr;
913         u64 ena, run, val;
914
915         if (!(aggr_map || aggr_get_id))
916                 return;
917
918         for (s = 0; s < aggr_map->nr; s++) {
919                 id = aggr_map->map[s];
920                 list_for_each_entry(counter, &evsel_list->entries, node) {
921                         val = ena = run = 0;
922                         nr = 0;
923                         for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
924                                 s2 = aggr_get_id(evsel_list->cpus, cpu);
925                                 if (s2 != id)
926                                         continue;
927                                 val += counter->counts->cpu[cpu].val;
928                                 ena += counter->counts->cpu[cpu].ena;
929                                 run += counter->counts->cpu[cpu].run;
930                                 nr++;
931                         }
932                         if (prefix)
933                                 fprintf(output, "%s", prefix);
934
935                         if (run == 0 || ena == 0) {
936                                 aggr_printout(counter, cpu, nr);
937
938                                 fprintf(output, "%*s%s%*s",
939                                         csv_output ? 0 : 18,
940                                         counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED,
941                                         csv_sep,
942                                         csv_output ? 0 : -24,
943                                         perf_evsel__name(counter));
944
945                                 if (counter->cgrp)
946                                         fprintf(output, "%s%s",
947                                                 csv_sep, counter->cgrp->name);
948
949                                 fputc('\n', output);
950                                 continue;
951                         }
952
953                         if (nsec_counter(counter))
954                                 nsec_printout(id, nr, counter, val);
955                         else
956                                 abs_printout(id, nr, counter, val);
957
958                         if (!csv_output) {
959                                 print_noise(counter, 1.0);
960
961                                 if (run != ena)
962                                         fprintf(output, "  (%.2f%%)",
963                                                 100.0 * run / ena);
964                         }
965                         fputc('\n', output);
966                 }
967         }
968 }
969
970 /*
971  * Print out the results of a single counter:
972  * aggregated counts in system-wide mode
973  */
974 static void print_counter_aggr(struct perf_evsel *counter, char *prefix)
975 {
976         struct perf_stat *ps = counter->priv;
977         double avg = avg_stats(&ps->res_stats[0]);
978         int scaled = counter->counts->scaled;
979
980         if (prefix)
981                 fprintf(output, "%s", prefix);
982
983         if (scaled == -1) {
984                 fprintf(output, "%*s%s%*s",
985                         csv_output ? 0 : 18,
986                         counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED,
987                         csv_sep,
988                         csv_output ? 0 : -24,
989                         perf_evsel__name(counter));
990
991                 if (counter->cgrp)
992                         fprintf(output, "%s%s", csv_sep, counter->cgrp->name);
993
994                 fputc('\n', output);
995                 return;
996         }
997
998         if (nsec_counter(counter))
999                 nsec_printout(-1, 0, counter, avg);
1000         else
1001                 abs_printout(-1, 0, counter, avg);
1002
1003         print_noise(counter, avg);
1004
1005         if (csv_output) {
1006                 fputc('\n', output);
1007                 return;
1008         }
1009
1010         if (scaled) {
1011                 double avg_enabled, avg_running;
1012
1013                 avg_enabled = avg_stats(&ps->res_stats[1]);
1014                 avg_running = avg_stats(&ps->res_stats[2]);
1015
1016                 fprintf(output, " [%5.2f%%]", 100 * avg_running / avg_enabled);
1017         }
1018         fprintf(output, "\n");
1019 }
1020
1021 /*
1022  * Print out the results of a single counter:
1023  * does not use aggregated count in system-wide
1024  */
1025 static void print_counter(struct perf_evsel *counter, char *prefix)
1026 {
1027         u64 ena, run, val;
1028         int cpu;
1029
1030         for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
1031                 val = counter->counts->cpu[cpu].val;
1032                 ena = counter->counts->cpu[cpu].ena;
1033                 run = counter->counts->cpu[cpu].run;
1034
1035                 if (prefix)
1036                         fprintf(output, "%s", prefix);
1037
1038                 if (run == 0 || ena == 0) {
1039                         fprintf(output, "CPU%*d%s%*s%s%*s",
1040                                 csv_output ? 0 : -4,
1041                                 perf_evsel__cpus(counter)->map[cpu], csv_sep,
1042                                 csv_output ? 0 : 18,
1043                                 counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED,
1044                                 csv_sep,
1045                                 csv_output ? 0 : -24,
1046                                 perf_evsel__name(counter));
1047
1048                         if (counter->cgrp)
1049                                 fprintf(output, "%s%s",
1050                                         csv_sep, counter->cgrp->name);
1051
1052                         fputc('\n', output);
1053                         continue;
1054                 }
1055
1056                 if (nsec_counter(counter))
1057                         nsec_printout(cpu, 0, counter, val);
1058                 else
1059                         abs_printout(cpu, 0, counter, val);
1060
1061                 if (!csv_output) {
1062                         print_noise(counter, 1.0);
1063
1064                         if (run != ena)
1065                                 fprintf(output, "  (%.2f%%)",
1066                                         100.0 * run / ena);
1067                 }
1068                 fputc('\n', output);
1069         }
1070 }
1071
1072 static void print_stat(int argc, const char **argv)
1073 {
1074         struct perf_evsel *counter;
1075         int i;
1076
1077         fflush(stdout);
1078
1079         if (!csv_output) {
1080                 fprintf(output, "\n");
1081                 fprintf(output, " Performance counter stats for ");
1082                 if (!perf_target__has_task(&target)) {
1083                         fprintf(output, "\'%s", argv[0]);
1084                         for (i = 1; i < argc; i++)
1085                                 fprintf(output, " %s", argv[i]);
1086                 } else if (target.pid)
1087                         fprintf(output, "process id \'%s", target.pid);
1088                 else
1089                         fprintf(output, "thread id \'%s", target.tid);
1090
1091                 fprintf(output, "\'");
1092                 if (run_count > 1)
1093                         fprintf(output, " (%d runs)", run_count);
1094                 fprintf(output, ":\n\n");
1095         }
1096
1097         switch (aggr_mode) {
1098         case AGGR_SOCKET:
1099                 print_aggr(NULL);
1100                 break;
1101         case AGGR_GLOBAL:
1102                 list_for_each_entry(counter, &evsel_list->entries, node)
1103                         print_counter_aggr(counter, NULL);
1104                 break;
1105         case AGGR_NONE:
1106                 list_for_each_entry(counter, &evsel_list->entries, node)
1107                         print_counter(counter, NULL);
1108                 break;
1109         default:
1110                 break;
1111         }
1112
1113         if (!csv_output) {
1114                 if (!null_run)
1115                         fprintf(output, "\n");
1116                 fprintf(output, " %17.9f seconds time elapsed",
1117                                 avg_stats(&walltime_nsecs_stats)/1e9);
1118                 if (run_count > 1) {
1119                         fprintf(output, "                                        ");
1120                         print_noise_pct(stddev_stats(&walltime_nsecs_stats),
1121                                         avg_stats(&walltime_nsecs_stats));
1122                 }
1123                 fprintf(output, "\n\n");
1124         }
1125 }
1126
1127 static volatile int signr = -1;
1128
1129 static void skip_signal(int signo)
1130 {
1131         if ((child_pid == -1) || interval)
1132                 done = 1;
1133
1134         signr = signo;
1135 }
1136
1137 static void sig_atexit(void)
1138 {
1139         if (child_pid != -1)
1140                 kill(child_pid, SIGTERM);
1141
1142         if (signr == -1)
1143                 return;
1144
1145         signal(signr, SIG_DFL);
1146         kill(getpid(), signr);
1147 }
1148
1149 static int stat__set_big_num(const struct option *opt __maybe_unused,
1150                              const char *s __maybe_unused, int unset)
1151 {
1152         big_num_opt = unset ? 0 : 1;
1153         return 0;
1154 }
1155
1156 static int perf_stat_init_aggr_mode(void)
1157 {
1158         switch (aggr_mode) {
1159         case AGGR_SOCKET:
1160                 if (cpu_map__build_socket_map(evsel_list->cpus, &aggr_map)) {
1161                         perror("cannot build socket map");
1162                         return -1;
1163                 }
1164                 aggr_get_id = cpu_map__get_socket;
1165                 break;
1166         case AGGR_NONE:
1167         case AGGR_GLOBAL:
1168         default:
1169                 break;
1170         }
1171         return 0;
1172 }
1173
1174
1175 /*
1176  * Add default attributes, if there were no attributes specified or
1177  * if -d/--detailed, -d -d or -d -d -d is used:
1178  */
1179 static int add_default_attributes(void)
1180 {
1181         struct perf_event_attr default_attrs[] = {
1182
1183   { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK              },
1184   { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES        },
1185   { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_MIGRATIONS          },
1186   { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS             },
1187
1188   { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES              },
1189   { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_FRONTEND },
1190   { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_BACKEND  },
1191   { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS            },
1192   { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS     },
1193   { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES           },
1194
1195 };
1196
1197 /*
1198  * Detailed stats (-d), covering the L1 and last level data caches:
1199  */
1200         struct perf_event_attr detailed_attrs[] = {
1201
1202   { .type = PERF_TYPE_HW_CACHE,
1203     .config =
1204          PERF_COUNT_HW_CACHE_L1D                <<  0  |
1205         (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
1206         (PERF_COUNT_HW_CACHE_RESULT_ACCESS      << 16)                          },
1207
1208   { .type = PERF_TYPE_HW_CACHE,
1209     .config =
1210          PERF_COUNT_HW_CACHE_L1D                <<  0  |
1211         (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
1212         (PERF_COUNT_HW_CACHE_RESULT_MISS        << 16)                          },
1213
1214   { .type = PERF_TYPE_HW_CACHE,
1215     .config =
1216          PERF_COUNT_HW_CACHE_LL                 <<  0  |
1217         (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
1218         (PERF_COUNT_HW_CACHE_RESULT_ACCESS      << 16)                          },
1219
1220   { .type = PERF_TYPE_HW_CACHE,
1221     .config =
1222          PERF_COUNT_HW_CACHE_LL                 <<  0  |
1223         (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
1224         (PERF_COUNT_HW_CACHE_RESULT_MISS        << 16)                          },
1225 };
1226
1227 /*
1228  * Very detailed stats (-d -d), covering the instruction cache and the TLB caches:
1229  */
1230         struct perf_event_attr very_detailed_attrs[] = {
1231
1232   { .type = PERF_TYPE_HW_CACHE,
1233     .config =
1234          PERF_COUNT_HW_CACHE_L1I                <<  0  |
1235         (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
1236         (PERF_COUNT_HW_CACHE_RESULT_ACCESS      << 16)                          },
1237
1238   { .type = PERF_TYPE_HW_CACHE,
1239     .config =
1240          PERF_COUNT_HW_CACHE_L1I                <<  0  |
1241         (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
1242         (PERF_COUNT_HW_CACHE_RESULT_MISS        << 16)                          },
1243
1244   { .type = PERF_TYPE_HW_CACHE,
1245     .config =
1246          PERF_COUNT_HW_CACHE_DTLB               <<  0  |
1247         (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
1248         (PERF_COUNT_HW_CACHE_RESULT_ACCESS      << 16)                          },
1249
1250   { .type = PERF_TYPE_HW_CACHE,
1251     .config =
1252          PERF_COUNT_HW_CACHE_DTLB               <<  0  |
1253         (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
1254         (PERF_COUNT_HW_CACHE_RESULT_MISS        << 16)                          },
1255
1256   { .type = PERF_TYPE_HW_CACHE,
1257     .config =
1258          PERF_COUNT_HW_CACHE_ITLB               <<  0  |
1259         (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
1260         (PERF_COUNT_HW_CACHE_RESULT_ACCESS      << 16)                          },
1261
1262   { .type = PERF_TYPE_HW_CACHE,
1263     .config =
1264          PERF_COUNT_HW_CACHE_ITLB               <<  0  |
1265         (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
1266         (PERF_COUNT_HW_CACHE_RESULT_MISS        << 16)                          },
1267
1268 };
1269
1270 /*
1271  * Very, very detailed stats (-d -d -d), adding prefetch events:
1272  */
1273         struct perf_event_attr very_very_detailed_attrs[] = {
1274
1275   { .type = PERF_TYPE_HW_CACHE,
1276     .config =
1277          PERF_COUNT_HW_CACHE_L1D                <<  0  |
1278         (PERF_COUNT_HW_CACHE_OP_PREFETCH        <<  8) |
1279         (PERF_COUNT_HW_CACHE_RESULT_ACCESS      << 16)                          },
1280
1281   { .type = PERF_TYPE_HW_CACHE,
1282     .config =
1283          PERF_COUNT_HW_CACHE_L1D                <<  0  |
1284         (PERF_COUNT_HW_CACHE_OP_PREFETCH        <<  8) |
1285         (PERF_COUNT_HW_CACHE_RESULT_MISS        << 16)                          },
1286 };
1287
1288         /* Set attrs if no event is selected and !null_run: */
1289         if (null_run)
1290                 return 0;
1291
1292         if (!evsel_list->nr_entries) {
1293                 if (perf_evlist__add_default_attrs(evsel_list, default_attrs) < 0)
1294                         return -1;
1295         }
1296
1297         /* Detailed events get appended to the event list: */
1298
1299         if (detailed_run <  1)
1300                 return 0;
1301
1302         /* Append detailed run extra attributes: */
1303         if (perf_evlist__add_default_attrs(evsel_list, detailed_attrs) < 0)
1304                 return -1;
1305
1306         if (detailed_run < 2)
1307                 return 0;
1308
1309         /* Append very detailed run extra attributes: */
1310         if (perf_evlist__add_default_attrs(evsel_list, very_detailed_attrs) < 0)
1311                 return -1;
1312
1313         if (detailed_run < 3)
1314                 return 0;
1315
1316         /* Append very, very detailed run extra attributes: */
1317         return perf_evlist__add_default_attrs(evsel_list, very_very_detailed_attrs);
1318 }
1319
1320 int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
1321 {
1322         bool append_file = false;
1323         int output_fd = 0;
1324         const char *output_name = NULL;
1325         const struct option options[] = {
1326         OPT_CALLBACK('e', "event", &evsel_list, "event",
1327                      "event selector. use 'perf list' to list available events",
1328                      parse_events_option),
1329         OPT_CALLBACK(0, "filter", &evsel_list, "filter",
1330                      "event filter", parse_filter),
1331         OPT_BOOLEAN('i', "no-inherit", &no_inherit,
1332                     "child tasks do not inherit counters"),
1333         OPT_STRING('p', "pid", &target.pid, "pid",
1334                    "stat events on existing process id"),
1335         OPT_STRING('t', "tid", &target.tid, "tid",
1336                    "stat events on existing thread id"),
1337         OPT_BOOLEAN('a', "all-cpus", &target.system_wide,
1338                     "system-wide collection from all CPUs"),
1339         OPT_BOOLEAN('g', "group", &group,
1340                     "put the counters into a counter group"),
1341         OPT_BOOLEAN('c', "scale", &scale, "scale/normalize counters"),
1342         OPT_INCR('v', "verbose", &verbose,
1343                     "be more verbose (show counter open errors, etc)"),
1344         OPT_INTEGER('r', "repeat", &run_count,
1345                     "repeat command and print average + stddev (max: 100, forever: 0)"),
1346         OPT_BOOLEAN('n', "null", &null_run,
1347                     "null run - dont start any counters"),
1348         OPT_INCR('d', "detailed", &detailed_run,
1349                     "detailed run - start a lot of events"),
1350         OPT_BOOLEAN('S', "sync", &sync_run,
1351                     "call sync() before starting a run"),
1352         OPT_CALLBACK_NOOPT('B', "big-num", NULL, NULL, 
1353                            "print large numbers with thousands\' separators",
1354                            stat__set_big_num),
1355         OPT_STRING('C', "cpu", &target.cpu_list, "cpu",
1356                     "list of cpus to monitor in system-wide"),
1357         OPT_SET_UINT('A', "no-aggr", &aggr_mode,
1358                     "disable CPU count aggregation", AGGR_NONE),
1359         OPT_STRING('x', "field-separator", &csv_sep, "separator",
1360                    "print counts with custom separator"),
1361         OPT_CALLBACK('G', "cgroup", &evsel_list, "name",
1362                      "monitor event in cgroup name only", parse_cgroups),
1363         OPT_STRING('o', "output", &output_name, "file", "output file name"),
1364         OPT_BOOLEAN(0, "append", &append_file, "append to the output file"),
1365         OPT_INTEGER(0, "log-fd", &output_fd,
1366                     "log output to fd, instead of stderr"),
1367         OPT_STRING(0, "pre", &pre_cmd, "command",
1368                         "command to run prior to the measured command"),
1369         OPT_STRING(0, "post", &post_cmd, "command",
1370                         "command to run after to the measured command"),
1371         OPT_UINTEGER('I', "interval-print", &interval,
1372                     "print counts at regular interval in ms (>= 100)"),
1373         OPT_SET_UINT(0, "aggr-socket", &aggr_mode,
1374                      "aggregate counts per processor socket", AGGR_SOCKET),
1375         OPT_END()
1376         };
1377         const char * const stat_usage[] = {
1378                 "perf stat [<options>] [<command>]",
1379                 NULL
1380         };
1381         int status = -ENOMEM, run_idx;
1382         const char *mode;
1383
1384         setlocale(LC_ALL, "");
1385
1386         evsel_list = perf_evlist__new();
1387         if (evsel_list == NULL)
1388                 return -ENOMEM;
1389
1390         argc = parse_options(argc, argv, options, stat_usage,
1391                 PARSE_OPT_STOP_AT_NON_OPTION);
1392
1393         output = stderr;
1394         if (output_name && strcmp(output_name, "-"))
1395                 output = NULL;
1396
1397         if (output_name && output_fd) {
1398                 fprintf(stderr, "cannot use both --output and --log-fd\n");
1399                 usage_with_options(stat_usage, options);
1400         }
1401
1402         if (output_fd < 0) {
1403                 fprintf(stderr, "argument to --log-fd must be a > 0\n");
1404                 usage_with_options(stat_usage, options);
1405         }
1406
1407         if (!output) {
1408                 struct timespec tm;
1409                 mode = append_file ? "a" : "w";
1410
1411                 output = fopen(output_name, mode);
1412                 if (!output) {
1413                         perror("failed to create output file");
1414                         return -1;
1415                 }
1416                 clock_gettime(CLOCK_REALTIME, &tm);
1417                 fprintf(output, "# started on %s\n", ctime(&tm.tv_sec));
1418         } else if (output_fd > 0) {
1419                 mode = append_file ? "a" : "w";
1420                 output = fdopen(output_fd, mode);
1421                 if (!output) {
1422                         perror("Failed opening logfd");
1423                         return -errno;
1424                 }
1425         }
1426
1427         if (csv_sep) {
1428                 csv_output = true;
1429                 if (!strcmp(csv_sep, "\\t"))
1430                         csv_sep = "\t";
1431         } else
1432                 csv_sep = DEFAULT_SEPARATOR;
1433
1434         /*
1435          * let the spreadsheet do the pretty-printing
1436          */
1437         if (csv_output) {
1438                 /* User explicitly passed -B? */
1439                 if (big_num_opt == 1) {
1440                         fprintf(stderr, "-B option not supported with -x\n");
1441                         usage_with_options(stat_usage, options);
1442                 } else /* Nope, so disable big number formatting */
1443                         big_num = false;
1444         } else if (big_num_opt == 0) /* User passed --no-big-num */
1445                 big_num = false;
1446
1447         if (!argc && !perf_target__has_task(&target))
1448                 usage_with_options(stat_usage, options);
1449         if (run_count < 0) {
1450                 usage_with_options(stat_usage, options);
1451         } else if (run_count == 0) {
1452                 forever = true;
1453                 run_count = 1;
1454         }
1455
1456         /* no_aggr, cgroup are for system-wide only */
1457         if ((aggr_mode != AGGR_GLOBAL || nr_cgroups)
1458              && !perf_target__has_cpu(&target)) {
1459                 fprintf(stderr, "both cgroup and no-aggregation "
1460                         "modes only available in system-wide mode\n");
1461
1462                 usage_with_options(stat_usage, options);
1463                 return -1;
1464         }
1465
1466         if (add_default_attributes())
1467                 goto out;
1468
1469         perf_target__validate(&target);
1470
1471         if (perf_evlist__create_maps(evsel_list, &target) < 0) {
1472                 if (perf_target__has_task(&target))
1473                         pr_err("Problems finding threads of monitor\n");
1474                 if (perf_target__has_cpu(&target))
1475                         perror("failed to parse CPUs map");
1476
1477                 usage_with_options(stat_usage, options);
1478                 return -1;
1479         }
1480         if (interval && interval < 100) {
1481                 pr_err("print interval must be >= 100ms\n");
1482                 usage_with_options(stat_usage, options);
1483                 return -1;
1484         }
1485
1486         if (perf_evlist__alloc_stats(evsel_list, interval))
1487                 goto out_free_maps;
1488
1489         if (perf_stat_init_aggr_mode())
1490                 goto out;
1491
1492         /*
1493          * We dont want to block the signals - that would cause
1494          * child tasks to inherit that and Ctrl-C would not work.
1495          * What we want is for Ctrl-C to work in the exec()-ed
1496          * task, but being ignored by perf stat itself:
1497          */
1498         atexit(sig_atexit);
1499         if (!forever)
1500                 signal(SIGINT,  skip_signal);
1501         signal(SIGCHLD, skip_signal);
1502         signal(SIGALRM, skip_signal);
1503         signal(SIGABRT, skip_signal);
1504
1505         status = 0;
1506         for (run_idx = 0; forever || run_idx < run_count; run_idx++) {
1507                 if (run_count != 1 && verbose)
1508                         fprintf(output, "[ perf stat: executing run #%d ... ]\n",
1509                                 run_idx + 1);
1510
1511                 status = run_perf_stat(argc, argv);
1512                 if (forever && status != -1) {
1513                         print_stat(argc, argv);
1514                         perf_stat__reset_stats(evsel_list);
1515                 }
1516         }
1517
1518         if (!forever && status != -1 && !interval)
1519                 print_stat(argc, argv);
1520
1521         perf_evlist__free_stats(evsel_list);
1522 out_free_maps:
1523         perf_evlist__delete_maps(evsel_list);
1524 out:
1525         perf_evlist__delete(evsel_list);
1526         return status;
1527 }