Merge branch 'perf/fast' into perf/core
Ingo Molnar [Fri, 27 Jan 2012 11:07:57 +0000 (12:07 +0100)]
Merge reason: Lets ready it for v3.4

Signed-off-by: Ingo Molnar <mingo@elte.hu>

1  2 
arch/arm/include/asm/perf_event.h
kernel/events/core.c
tools/perf/builtin-test.c

  #ifndef __ARM_PERF_EVENT_H__
  #define __ARM_PERF_EVENT_H__
  
- /* ARM performance counters start from 1 (in the cp15 accesses) so use the
-  * same indexes here for consistency. */
- #define PERF_EVENT_INDEX_OFFSET 1
  /* ARM perf PMU IDs for use by internal perf clients. */
  enum arm_perf_pmu_ids {
        ARM_PERF_PMU_ID_XSCALE1 = 0,
@@@ -32,4 -28,7 +28,4 @@@
  extern enum arm_perf_pmu_ids
  armpmu_get_pmu_id(void);
  
 -extern int
 -armpmu_get_max_events(void);
 -
  #endif /* __ARM_PERF_EVENT_H__ */
diff --combined kernel/events/core.c
@@@ -4,7 -4,7 +4,7 @@@
   *  Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
   *  Copyright (C) 2008-2011 Red Hat, Inc., Ingo Molnar
   *  Copyright (C) 2008-2011 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
 - *  Copyright  ©  2009 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
 + *  Copyright  ©  2009 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
   *
   * For licensing details see kernel-base/COPYING
   */
@@@ -815,7 -815,7 +815,7 @@@ static void update_event_times(struct p
         * here.
         */
        if (is_cgroup_event(event))
 -              run_end = perf_event_time(event);
 +              run_end = perf_cgroup_event_time(event);
        else if (ctx->is_active)
                run_end = ctx->time;
        else
@@@ -3208,10 -3208,6 +3208,6 @@@ int perf_event_task_disable(void
        return 0;
  }
  
- #ifndef PERF_EVENT_INDEX_OFFSET
- # define PERF_EVENT_INDEX_OFFSET 0
- #endif
  static int perf_event_index(struct perf_event *event)
  {
        if (event->hw.state & PERF_HES_STOPPED)
        if (event->state != PERF_EVENT_STATE_ACTIVE)
                return 0;
  
-       return event->hw.idx + 1 - PERF_EVENT_INDEX_OFFSET;
+       return event->pmu->event_idx(event);
  }
  
  static void calc_timer_values(struct perf_event *event,
+                               u64 *now,
                                u64 *enabled,
                                u64 *running)
  {
-       u64 now, ctx_time;
+       u64 ctx_time;
  
-       now = perf_clock();
-       ctx_time = event->shadow_ctx_time + now;
+       *now = perf_clock();
+       ctx_time = event->shadow_ctx_time + *now;
        *enabled = ctx_time - event->tstamp_enabled;
        *running = ctx_time - event->tstamp_running;
  }
  
+ void __weak perf_update_user_clock(struct perf_event_mmap_page *userpg, u64 now)
+ {
+ }
  /*
   * Callers need to ensure there can be no nesting of this function, otherwise
   * the seqlock logic goes bad. We can not serialize this because the arch
@@@ -3244,7 -3245,7 +3245,7 @@@ void perf_event_update_userpage(struct 
  {
        struct perf_event_mmap_page *userpg;
        struct ring_buffer *rb;
-       u64 enabled, running;
+       u64 enabled, running, now;
  
        rcu_read_lock();
        /*
         * because of locking issue as we can be called in
         * NMI context
         */
-       calc_timer_values(event, &enabled, &running);
+       calc_timer_values(event, &now, &enabled, &running);
        rb = rcu_dereference(event->rb);
        if (!rb)
                goto unlock;
        barrier();
        userpg->index = perf_event_index(event);
        userpg->offset = perf_event_count(event);
-       if (event->state == PERF_EVENT_STATE_ACTIVE)
+       if (userpg->index)
                userpg->offset -= local64_read(&event->hw.prev_count);
  
        userpg->time_enabled = enabled +
        userpg->time_running = running +
                        atomic64_read(&event->child_total_time_running);
  
+       perf_update_user_clock(userpg, now);
        barrier();
        ++userpg->lock;
        preempt_enable();
@@@ -3360,13 -3363,9 +3363,13 @@@ static void ring_buffer_wakeup(struct p
  
        rcu_read_lock();
        rb = rcu_dereference(event->rb);
 -      list_for_each_entry_rcu(event, &rb->event_list, rb_entry) {
 +      if (!rb)
 +              goto unlock;
 +
 +      list_for_each_entry_rcu(event, &rb->event_list, rb_entry)
                wake_up_all(&event->waitq);
 -      }
 +
 +unlock:
        rcu_read_unlock();
  }
  
@@@ -3538,6 -3537,8 +3541,8 @@@ static int perf_mmap(struct file *file
        event->mmap_user = get_current_user();
        vma->vm_mm->pinned_vm += event->mmap_locked;
  
+       perf_event_update_userpage(event);
  unlock:
        if (!ret)
                atomic_inc(&event->mmap_count);
@@@ -3769,7 -3770,7 +3774,7 @@@ static void perf_output_read_group(stru
  static void perf_output_read(struct perf_output_handle *handle,
                             struct perf_event *event)
  {
-       u64 enabled = 0, running = 0;
+       u64 enabled = 0, running = 0, now;
        u64 read_format = event->attr.read_format;
  
        /*
         * NMI context
         */
        if (read_format & PERF_FORMAT_TOTAL_TIMES)
-               calc_timer_values(event, &enabled, &running);
+               calc_timer_values(event, &now, &enabled, &running);
  
        if (event->attr.read_format & PERF_FORMAT_GROUP)
                perf_output_read_group(handle, event, enabled, running);
@@@ -4994,6 -4995,11 +4999,11 @@@ static int perf_swevent_init(struct per
        return 0;
  }
  
+ static int perf_swevent_event_idx(struct perf_event *event)
+ {
+       return 0;
+ }
  static struct pmu perf_swevent = {
        .task_ctx_nr    = perf_sw_context,
  
        .start          = perf_swevent_start,
        .stop           = perf_swevent_stop,
        .read           = perf_swevent_read,
+       .event_idx      = perf_swevent_event_idx,
  };
  
  #ifdef CONFIG_EVENT_TRACING
@@@ -5089,6 -5097,8 +5101,8 @@@ static struct pmu perf_tracepoint = 
        .start          = perf_swevent_start,
        .stop           = perf_swevent_stop,
        .read           = perf_swevent_read,
+       .event_idx      = perf_swevent_event_idx,
  };
  
  static inline void perf_tp_register(void)
@@@ -5173,7 -5183,7 +5187,7 @@@ static enum hrtimer_restart perf_sweven
        regs = get_irq_regs();
  
        if (regs && !perf_exclude_event(event, regs)) {
 -              if (!(event->attr.exclude_idle && current->pid == 0))
 +              if (!(event->attr.exclude_idle && is_idle_task(current)))
                        if (perf_event_overflow(event, &data, regs))
                                ret = HRTIMER_NORESTART;
        }
@@@ -5308,6 -5318,8 +5322,8 @@@ static struct pmu perf_cpu_clock = 
        .start          = cpu_clock_event_start,
        .stop           = cpu_clock_event_stop,
        .read           = cpu_clock_event_read,
+       .event_idx      = perf_swevent_event_idx,
  };
  
  /*
@@@ -5380,6 -5392,8 +5396,8 @@@ static struct pmu perf_task_clock = 
        .start          = task_clock_event_start,
        .stop           = task_clock_event_stop,
        .read           = task_clock_event_read,
+       .event_idx      = perf_swevent_event_idx,
  };
  
  static void perf_pmu_nop_void(struct pmu *pmu)
@@@ -5407,6 -5421,11 +5425,11 @@@ static void perf_pmu_cancel_txn(struct 
        perf_pmu_enable(pmu);
  }
  
+ static int perf_event_idx_default(struct perf_event *event)
+ {
+       return event->hw.idx + 1;
+ }
  /*
   * Ensures all contexts with the same task_ctx_nr have the same
   * pmu_cpu_context too.
@@@ -5493,6 -5512,7 +5516,7 @@@ static int pmu_dev_alloc(struct pmu *pm
        if (!pmu->dev)
                goto out;
  
+       pmu->dev->groups = pmu->attr_groups;
        device_initialize(pmu->dev);
        ret = dev_set_name(pmu->dev, "%s", pmu->name);
        if (ret)
@@@ -5596,6 -5616,9 +5620,9 @@@ got_cpu_context
                pmu->pmu_disable = perf_pmu_nop_void;
        }
  
+       if (!pmu->event_idx)
+               pmu->event_idx = perf_event_idx_default;
        list_add_rcu(&pmu->entry, &pmus);
        ret = 0;
  unlock:
@@@ -6941,13 -6964,10 +6968,13 @@@ static int __perf_cgroup_move(void *inf
        return 0;
  }
  
 -static void
 -perf_cgroup_attach_task(struct cgroup *cgrp, struct task_struct *task)
 +static void perf_cgroup_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
 +                             struct cgroup_taskset *tset)
  {
 -      task_function_call(task, __perf_cgroup_move, task);
 +      struct task_struct *task;
 +
 +      cgroup_taskset_for_each(task, cgrp, tset)
 +              task_function_call(task, __perf_cgroup_move, task);
  }
  
  static void perf_cgroup_exit(struct cgroup_subsys *ss, struct cgroup *cgrp,
        if (!(task->flags & PF_EXITING))
                return;
  
 -      perf_cgroup_attach_task(cgrp, task);
 +      task_function_call(task, __perf_cgroup_move, task);
  }
  
  struct cgroup_subsys perf_subsys = {
        .create         = perf_cgroup_create,
        .destroy        = perf_cgroup_destroy,
        .exit           = perf_cgroup_exit,
 -      .attach_task    = perf_cgroup_attach_task,
 +      .attach         = perf_cgroup_attach,
  };
  #endif /* CONFIG_CGROUP_PERF */
@@@ -15,6 -15,8 +15,8 @@@
  #include "util/thread_map.h"
  #include "../../include/linux/hw_breakpoint.h"
  
+ #include <sys/mman.h>
  static int vmlinux_matches_kallsyms_filter(struct map *map __used, struct symbol *sym)
  {
        bool *visited = symbol__priv(sym);
@@@ -276,7 -278,7 +278,7 @@@ static int test__open_syscall_event(voi
                return -1;
        }
  
 -      threads = thread_map__new(-1, getpid());
 +      threads = thread_map__new(-1, getpid(), UINT_MAX);
        if (threads == NULL) {
                pr_debug("thread_map__new\n");
                return -1;
@@@ -342,7 -344,7 +344,7 @@@ static int test__open_syscall_event_on_
                return -1;
        }
  
 -      threads = thread_map__new(-1, getpid());
 +      threads = thread_map__new(-1, getpid(), UINT_MAX);
        if (threads == NULL) {
                pr_debug("thread_map__new\n");
                return -1;
@@@ -490,7 -492,7 +492,7 @@@ static int test__basic_mmap(void
                expected_nr_events[i] = random() % 257;
        }
  
 -      threads = thread_map__new(-1, getpid());
 +      threads = thread_map__new(-1, getpid(), UINT_MAX);
        if (threads == NULL) {
                pr_debug("thread_map__new\n");
                return -1;
@@@ -1054,7 -1056,7 +1056,7 @@@ static int test__PERF_RECORD(void
         * we're monitoring, the one forked there.
         */
        err = perf_evlist__create_maps(evlist, opts.target_pid,
 -                                     opts.target_tid, opts.cpu_list);
 +                                     opts.target_tid, UINT_MAX, opts.cpu_list);
        if (err < 0) {
                pr_debug("Not enough memory to create thread/cpu maps\n");
                goto out_delete_evlist;
        return (err < 0 || errs > 0) ? -1 : 0;
  }
  
+ #if defined(__x86_64__) || defined(__i386__)
+ #define barrier() asm volatile("" ::: "memory")
+ static u64 rdpmc(unsigned int counter)
+ {
+       unsigned int low, high;
+       asm volatile("rdpmc" : "=a" (low), "=d" (high) : "c" (counter));
+       return low | ((u64)high) << 32;
+ }
+ static u64 rdtsc(void)
+ {
+       unsigned int low, high;
+       asm volatile("rdtsc" : "=a" (low), "=d" (high));
+       return low | ((u64)high) << 32;
+ }
+ static u64 mmap_read_self(void *addr)
+ {
+       struct perf_event_mmap_page *pc = addr;
+       u32 seq, idx, time_mult = 0, time_shift = 0;
+       u64 count, cyc = 0, time_offset = 0, enabled, running, delta;
+       do {
+               seq = pc->lock;
+               barrier();
+               enabled = pc->time_enabled;
+               running = pc->time_running;
+               if (enabled != running) {
+                       cyc = rdtsc();
+                       time_mult = pc->time_mult;
+                       time_shift = pc->time_shift;
+                       time_offset = pc->time_offset;
+               }
+               idx = pc->index;
+               count = pc->offset;
+               if (idx)
+                       count += rdpmc(idx - 1);
+               barrier();
+       } while (pc->lock != seq);
+       if (enabled != running) {
+               u64 quot, rem;
+               quot = (cyc >> time_shift);
+               rem = cyc & ((1 << time_shift) - 1);
+               delta = time_offset + quot * time_mult +
+                       ((rem * time_mult) >> time_shift);
+               enabled += delta;
+               if (idx)
+                       running += delta;
+               quot = count / running;
+               rem = count % running;
+               count = quot * enabled + (rem * enabled) / running;
+       }
+       return count;
+ }
+ /*
+  * If the RDPMC instruction faults then signal this back to the test parent task:
+  */
+ static void segfault_handler(int sig __used, siginfo_t *info __used, void *uc __used)
+ {
+       exit(-1);
+ }
+ static int __test__rdpmc(void)
+ {
+       long page_size = sysconf(_SC_PAGE_SIZE);
+       volatile int tmp = 0;
+       u64 i, loops = 1000;
+       int n;
+       int fd;
+       void *addr;
+       struct perf_event_attr attr = {
+               .type = PERF_TYPE_HARDWARE,
+               .config = PERF_COUNT_HW_INSTRUCTIONS,
+               .exclude_kernel = 1,
+       };
+       u64 delta_sum = 0;
+         struct sigaction sa;
+       sigfillset(&sa.sa_mask);
+       sa.sa_sigaction = segfault_handler;
+       sigaction(SIGSEGV, &sa, NULL);
+       fprintf(stderr, "\n\n");
+       fd = sys_perf_event_open(&attr, 0, -1, -1, 0);
+       if (fd < 0) {
+               die("Error: sys_perf_event_open() syscall returned "
+                   "with %d (%s)\n", fd, strerror(errno));
+       }
+       addr = mmap(NULL, page_size, PROT_READ, MAP_SHARED, fd, 0);
+       if (addr == (void *)(-1)) {
+               die("Error: mmap() syscall returned "
+                   "with (%s)\n", strerror(errno));
+       }
+       for (n = 0; n < 6; n++) {
+               u64 stamp, now, delta;
+               stamp = mmap_read_self(addr);
+               for (i = 0; i < loops; i++)
+                       tmp++;
+               now = mmap_read_self(addr);
+               loops *= 10;
+               delta = now - stamp;
+               fprintf(stderr, "%14d: %14Lu\n", n, (long long)delta);
+               delta_sum += delta;
+       }
+       munmap(addr, page_size);
+       close(fd);
+       fprintf(stderr, "   ");
+       if (!delta_sum)
+               return -1;
+       return 0;
+ }
+ static int test__rdpmc(void)
+ {
+       int status = 0;
+       int wret = 0;
+       int ret;
+       int pid;
+       pid = fork();
+       if (pid < 0)
+               return -1;
+       if (!pid) {
+               ret = __test__rdpmc();
+               exit(ret);
+       }
+       wret = waitpid(pid, &status, 0);
+       if (wret < 0 || status)
+               return -1;
+       return 0;
+ }
+ #endif
  static struct test {
        const char *desc;
        int (*func)(void);
                .desc = "parse events tests",
                .func = test__parse_events,
        },
+ #if defined(__x86_64__) || defined(__i386__)
+       {
+               .desc = "x86 rdpmc test",
+               .func = test__rdpmc,
+       },
+ #endif
        {
                .desc = "Validate PERF_RECORD_* events & perf_sample fields",
                .func = test__PERF_RECORD,
@@@ -1396,7 -1571,7 +1571,7 @@@ int cmd_test(int argc, const char **arg
        NULL,
        };
        const struct option test_options[] = {
 -      OPT_INTEGER('v', "verbose", &verbose,
 +      OPT_INCR('v', "verbose", &verbose,
                    "be more verbose (show symbol address, etc)"),
        OPT_END()
        };
        if (symbol__init() < 0)
                return -1;
  
-       setup_pager();
        return __cmd_test(argc, argv);
  }