Merge commit 'v2.6.35' into perf/core
Ingo Molnar [Mon, 2 Aug 2010 06:29:56 +0000 (08:29 +0200)]
Conflicts:
tools/perf/Makefile
tools/perf/util/hist.c

Merge reason: Resolve the conflicts and update to latest upstream.

Signed-off-by: Ingo Molnar <mingo@elte.hu>

168 files changed:
Documentation/ABI/testing/debugfs-kmemtrace [deleted file]
Documentation/trace/ftrace-design.txt
Documentation/trace/kmemtrace.txt [deleted file]
Documentation/trace/kprobetrace.txt
MAINTAINERS
Makefile
arch/alpha/include/asm/local64.h [new file with mode: 0644]
arch/arm/include/asm/local64.h [new file with mode: 0644]
arch/arm/kernel/perf_event.c
arch/avr32/include/asm/local64.h [new file with mode: 0644]
arch/blackfin/include/asm/local64.h [new file with mode: 0644]
arch/cris/include/asm/local64.h [new file with mode: 0644]
arch/frv/include/asm/local64.h [new file with mode: 0644]
arch/frv/kernel/local64.h [new file with mode: 0644]
arch/h8300/include/asm/local64.h [new file with mode: 0644]
arch/ia64/include/asm/local64.h [new file with mode: 0644]
arch/m32r/include/asm/local64.h [new file with mode: 0644]
arch/m68k/include/asm/local64.h [new file with mode: 0644]
arch/microblaze/include/asm/local64.h [new file with mode: 0644]
arch/mips/include/asm/local64.h [new file with mode: 0644]
arch/mn10300/include/asm/local64.h [new file with mode: 0644]
arch/parisc/include/asm/local64.h [new file with mode: 0644]
arch/powerpc/include/asm/local64.h [new file with mode: 0644]
arch/powerpc/include/asm/perf_event.h
arch/powerpc/kernel/misc.S
arch/powerpc/kernel/perf_event.c
arch/s390/include/asm/local64.h [new file with mode: 0644]
arch/score/include/asm/local64.h [new file with mode: 0644]
arch/sh/include/asm/local64.h [new file with mode: 0644]
arch/sh/kernel/perf_event.c
arch/sparc/include/asm/local64.h [new file with mode: 0644]
arch/sparc/include/asm/perf_event.h
arch/sparc/kernel/helpers.S
arch/sparc/kernel/perf_event.c
arch/x86/include/asm/hw_breakpoint.h
arch/x86/include/asm/local64.h [new file with mode: 0644]
arch/x86/include/asm/perf_event.h
arch/x86/include/asm/perf_event_p4.h
arch/x86/include/asm/stacktrace.h
arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
arch/x86/kernel/cpu/perf_event.c
arch/x86/kernel/cpu/perf_event_p4.c
arch/x86/kernel/dumpstack.c
arch/x86/kernel/dumpstack.h [deleted file]
arch/x86/kernel/dumpstack_32.c
arch/x86/kernel/dumpstack_64.c
arch/x86/kernel/hw_breakpoint.c
arch/x86/kernel/kprobes.c
arch/x86/kernel/process.c
arch/x86/kernel/process_32.c
arch/x86/kernel/process_64.c
arch/x86/kernel/stacktrace.c
arch/x86/mm/pf_in.c
arch/xtensa/include/asm/local64.h [new file with mode: 0644]
drivers/cpufreq/cpufreq.c
drivers/cpuidle/cpuidle.c
drivers/idle/intel_idle.c
fs/exec.c
include/asm-generic/local64.h [new file with mode: 0644]
include/asm-generic/vmlinux.lds.h
include/linux/ftrace.h
include/linux/ftrace_event.h
include/linux/kernel.h
include/linux/kmemtrace.h [deleted file]
include/linux/perf_event.h
include/linux/sched.h
include/linux/slab_def.h
include/linux/slub_def.h
include/linux/syscalls.h
include/trace/boot.h [deleted file]
include/trace/events/power.h
include/trace/events/sched.h
include/trace/events/timer.h
include/trace/ftrace.h
include/trace/syscall.h
init/main.c
kernel/hw_breakpoint.c
kernel/perf_event.c
kernel/sched.c
kernel/trace/Kconfig
kernel/trace/Makefile
kernel/trace/ftrace.c
kernel/trace/kmemtrace.c [deleted file]
kernel/trace/ring_buffer.c
kernel/trace/trace.c
kernel/trace/trace.h
kernel/trace/trace_boot.c [deleted file]
kernel/trace/trace_clock.c
kernel/trace/trace_entries.h
kernel/trace/trace_event_perf.c
kernel/trace/trace_events.c
kernel/trace/trace_events_filter.c
kernel/trace/trace_export.c
kernel/trace/trace_functions.c
kernel/trace/trace_functions_graph.c
kernel/trace/trace_irqsoff.c
kernel/trace/trace_kprobe.c
kernel/trace/trace_ksym.c [deleted file]
kernel/trace/trace_output.c
kernel/trace/trace_sched_wakeup.c
kernel/trace/trace_selftest.c
kernel/trace/trace_stack.c
kernel/trace/trace_syscalls.c
kernel/trace/trace_sysprof.c [deleted file]
mm/mmap.c
mm/slab.c
mm/slob.c
mm/slub.c
scripts/package/Makefile
scripts/recordmcount.pl
tools/perf/.gitignore
tools/perf/Documentation/perf-buildid-cache.txt
tools/perf/Documentation/perf-probe.txt
tools/perf/Documentation/perf-record.txt
tools/perf/Documentation/perf-stat.txt
tools/perf/Documentation/perf-top.txt
tools/perf/MANIFEST [new file with mode: 0644]
tools/perf/Makefile
tools/perf/arch/sh/Makefile [new file with mode: 0644]
tools/perf/arch/sh/util/dwarf-regs.c [new file with mode: 0644]
tools/perf/builtin-annotate.c
tools/perf/builtin-buildid-cache.c
tools/perf/builtin-buildid-list.c
tools/perf/builtin-diff.c
tools/perf/builtin-probe.c
tools/perf/builtin-record.c
tools/perf/builtin-report.c
tools/perf/builtin-stat.c
tools/perf/builtin-timechart.c
tools/perf/builtin-top.c
tools/perf/builtin-trace.c
tools/perf/feature-tests.mak [new file with mode: 0644]
tools/perf/perf-archive.sh
tools/perf/perf.c
tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Core.py
tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/SchedGui.py [new file with mode: 0644]
tools/perf/scripts/python/bin/sched-migration-record [new file with mode: 0644]
tools/perf/scripts/python/bin/sched-migration-report [new file with mode: 0644]
tools/perf/scripts/python/sched-migration.py [new file with mode: 0644]
tools/perf/util/build-id.c
tools/perf/util/cache.h
tools/perf/util/callchain.c
tools/perf/util/callchain.h
tools/perf/util/config.c
tools/perf/util/cpumap.c
tools/perf/util/cpumap.h
tools/perf/util/debug.c
tools/perf/util/event.c
tools/perf/util/event.h
tools/perf/util/header.c
tools/perf/util/hist.c
tools/perf/util/hist.h
tools/perf/util/map.c
tools/perf/util/map.h
tools/perf/util/newt.c
tools/perf/util/parse-events.c
tools/perf/util/probe-event.c
tools/perf/util/probe-event.h
tools/perf/util/probe-finder.c
tools/perf/util/probe-finder.h
tools/perf/util/session.c
tools/perf/util/sort.c
tools/perf/util/sort.h
tools/perf/util/symbol.c
tools/perf/util/symbol.h
tools/perf/util/thread.c
tools/perf/util/thread.h
tools/perf/util/util.h

diff --git a/Documentation/ABI/testing/debugfs-kmemtrace b/Documentation/ABI/testing/debugfs-kmemtrace
deleted file mode 100644 (file)
index 5e6a92a..0000000
+++ /dev/null
@@ -1,71 +0,0 @@
-What:          /sys/kernel/debug/kmemtrace/
-Date:          July 2008
-Contact:       Eduard - Gabriel Munteanu <eduard.munteanu@linux360.ro>
-Description:
-
-In kmemtrace-enabled kernels, the following files are created:
-
-/sys/kernel/debug/kmemtrace/
-       cpu<n>          (0400)  Per-CPU tracing data, see below. (binary)
-       total_overruns  (0400)  Total number of bytes which were dropped from
-                               cpu<n> files because of full buffer condition,
-                               non-binary. (text)
-       abi_version     (0400)  Kernel's kmemtrace ABI version. (text)
-
-Each per-CPU file should be read according to the relay interface. That is,
-the reader should set affinity to that specific CPU and, as currently done by
-the userspace application (though there are other methods), use poll() with
-an infinite timeout before every read(). Otherwise, erroneous data may be
-read. The binary data has the following _core_ format:
-
-       Event ID        (1 byte)        Unsigned integer, one of:
-               0 - represents an allocation (KMEMTRACE_EVENT_ALLOC)
-               1 - represents a freeing of previously allocated memory
-                   (KMEMTRACE_EVENT_FREE)
-       Type ID         (1 byte)        Unsigned integer, one of:
-               0 - this is a kmalloc() / kfree()
-               1 - this is a kmem_cache_alloc() / kmem_cache_free()
-               2 - this is a __get_free_pages() et al.
-       Event size      (2 bytes)       Unsigned integer representing the
-                                       size of this event. Used to extend
-                                       kmemtrace. Discard the bytes you
-                                       don't know about.
-       Sequence number (4 bytes)       Signed integer used to reorder data
-                                       logged on SMP machines. Wraparound
-                                       must be taken into account, although
-                                       it is unlikely.
-       Caller address  (8 bytes)       Return address to the caller.
-       Pointer to mem  (8 bytes)       Pointer to target memory area. Can be
-                                       NULL, but not all such calls might be
-                                       recorded.
-
-In case of KMEMTRACE_EVENT_ALLOC events, the next fields follow:
-
-       Requested bytes (8 bytes)       Total number of requested bytes,
-                                       unsigned, must not be zero.
-       Allocated bytes (8 bytes)       Total number of actually allocated
-                                       bytes, unsigned, must not be lower
-                                       than requested bytes.
-       Requested flags (4 bytes)       GFP flags supplied by the caller.
-       Target CPU      (4 bytes)       Signed integer, valid for event id 1.
-                                       If equal to -1, target CPU is the same
-                                       as origin CPU, but the reverse might
-                                       not be true.
-
-The data is made available in the same endianness the machine has.
-
-Other event ids and type ids may be defined and added. Other fields may be
-added by increasing event size, but see below for details.
-Every modification to the ABI, including new id definitions, are followed
-by bumping the ABI version by one.
-
-Adding new data to the packet (features) is done at the end of the mandatory
-data:
-       Feature size    (2 byte)
-       Feature ID      (1 byte)
-       Feature data    (Feature size - 3 bytes)
-
-
-Users:
-       kmemtrace-user - git://repo.or.cz/kmemtrace-user.git
-
index f1f81af..dc52bd4 100644 (file)
@@ -13,6 +13,9 @@ Note that this focuses on architecture implementation details only.  If you
 want more explanation of a feature in terms of common code, review the common
 ftrace.txt file.
 
+Ideally, everyone who wishes to retain performance while supporting tracing in
+their kernel should make it all the way to dynamic ftrace support.
+
 
 Prerequisites
 -------------
@@ -215,7 +218,7 @@ An arch may pass in a unique value (frame pointer) to both the entering and
 exiting of a function.  On exit, the value is compared and if it does not
 match, then it will panic the kernel.  This is largely a sanity check for bad
 code generation with gcc.  If gcc for your port sanely updates the frame
-pointer under different opitmization levels, then ignore this option.
+pointer under different optimization levels, then ignore this option.
 
 However, adding support for it isn't terribly difficult.  In your assembly code
 that calls prepare_ftrace_return(), pass the frame pointer as the 3rd argument.
@@ -234,7 +237,7 @@ If you can't trace NMI functions, then skip this option.
 
 
 HAVE_SYSCALL_TRACEPOINTS
----------------------
+------------------------
 
 You need very few things to get the syscalls tracing in an arch.
 
@@ -250,12 +253,152 @@ You need very few things to get the syscalls tracing in an arch.
 HAVE_FTRACE_MCOUNT_RECORD
 -------------------------
 
-See scripts/recordmcount.pl for more info.
+See scripts/recordmcount.pl for more info.  Just fill in the arch-specific
+details for how to locate the addresses of mcount call sites via objdump.
+This option doesn't make much sense without also implementing dynamic ftrace.
 
+
+HAVE_DYNAMIC_FTRACE
+-------------------
+
+You will first need HAVE_FTRACE_MCOUNT_RECORD and HAVE_FUNCTION_TRACER, so
+scroll your reader back up if you got over eager.
+
+Once those are out of the way, you will need to implement:
+       - asm/ftrace.h:
+               - MCOUNT_ADDR
+               - ftrace_call_adjust()
+               - struct dyn_arch_ftrace{}
+       - asm code:
+               - mcount() (new stub)
+               - ftrace_caller()
+               - ftrace_call()
+               - ftrace_stub()
+       - C code:
+               - ftrace_dyn_arch_init()
+               - ftrace_make_nop()
+               - ftrace_make_call()
+               - ftrace_update_ftrace_func()
+
+First you will need to fill out some arch details in your asm/ftrace.h.
+
+Define MCOUNT_ADDR as the address of your mcount symbol similar to:
+       #define MCOUNT_ADDR ((unsigned long)mcount)
+Since no one else will have a decl for that function, you will need to:
+       extern void mcount(void);
+
+You will also need the helper function ftrace_call_adjust().  Most people
+will be able to stub it out like so:
+       static inline unsigned long ftrace_call_adjust(unsigned long addr)
+       {
+               return addr;
+       }
 <details to be filled>
 
+Lastly you will need the custom dyn_arch_ftrace structure.  If you need
+some extra state when runtime patching arbitrary call sites, this is the
+place.  For now though, create an empty struct:
+       struct dyn_arch_ftrace {
+               /* No extra data needed */
+       };
+
+With the header out of the way, we can fill out the assembly code.  While we
+did already create a mcount() function earlier, dynamic ftrace only wants a
+stub function.  This is because the mcount() will only be used during boot
+and then all references to it will be patched out never to return.  Instead,
+the guts of the old mcount() will be used to create a new ftrace_caller()
+function.  Because the two are hard to merge, it will most likely be a lot
+easier to have two separate definitions split up by #ifdefs.  Same goes for
+the ftrace_stub() as that will now be inlined in ftrace_caller().
+
+Before we get confused anymore, let's check out some pseudo code so you can
+implement your own stuff in assembly:
 
-HAVE_DYNAMIC_FTRACE
----------------------
+void mcount(void)
+{
+       return;
+}
+
+void ftrace_caller(void)
+{
+       /* implement HAVE_FUNCTION_TRACE_MCOUNT_TEST if you desire */
+
+       /* save all state needed by the ABI (see paragraph above) */
+
+       unsigned long frompc = ...;
+       unsigned long selfpc = <return address> - MCOUNT_INSN_SIZE;
+
+ftrace_call:
+       ftrace_stub(frompc, selfpc);
+
+       /* restore all state needed by the ABI */
+
+ftrace_stub:
+       return;
+}
+
+This might look a little odd at first, but keep in mind that we will be runtime
+patching multiple things.  First, only functions that we actually want to trace
+will be patched to call ftrace_caller().  Second, since we only have one tracer
+active at a time, we will patch the ftrace_caller() function itself to call the
+specific tracer in question.  That is the point of the ftrace_call label.
+
+With that in mind, let's move on to the C code that will actually be doing the
+runtime patching.  You'll need a little knowledge of your arch's opcodes in
+order to make it through the next section.
+
+Every arch has an init callback function.  If you need to do something early on
+to initialize some state, this is the time to do that.  Otherwise, this simple
+function below should be sufficient for most people:
+
+int __init ftrace_dyn_arch_init(void *data)
+{
+       /* return value is done indirectly via data */
+       *(unsigned long *)data = 0;
+
+       return 0;
+}
+
+There are two functions that are used to do runtime patching of arbitrary
+functions.  The first is used to turn the mcount call site into a nop (which
+is what helps us retain runtime performance when not tracing).  The second is
+used to turn the mcount call site into a call to an arbitrary location (but
+typically that is ftracer_caller()).  See the general function definition in
+linux/ftrace.h for the functions:
+       ftrace_make_nop()
+       ftrace_make_call()
+The rec->ip value is the address of the mcount call site that was collected
+by the scripts/recordmcount.pl during build time.
+
+The last function is used to do runtime patching of the active tracer.  This
+will be modifying the assembly code at the location of the ftrace_call symbol
+inside of the ftrace_caller() function.  So you should have sufficient padding
+at that location to support the new function calls you'll be inserting.  Some
+people will be using a "call" type instruction while others will be using a
+"branch" type instruction.  Specifically, the function is:
+       ftrace_update_ftrace_func()
+
+
+HAVE_DYNAMIC_FTRACE + HAVE_FUNCTION_GRAPH_TRACER
+------------------------------------------------
+
+The function grapher needs a few tweaks in order to work with dynamic ftrace.
+Basically, you will need to:
+       - update:
+               - ftrace_caller()
+               - ftrace_graph_call()
+               - ftrace_graph_caller()
+       - implement:
+               - ftrace_enable_ftrace_graph_caller()
+               - ftrace_disable_ftrace_graph_caller()
 
 <details to be filled>
+Quick notes:
+       - add a nop stub after the ftrace_call location named ftrace_graph_call;
+         stub needs to be large enough to support a call to ftrace_graph_caller()
+       - update ftrace_graph_caller() to work with being called by the new
+         ftrace_caller() since some semantics may have changed
+       - ftrace_enable_ftrace_graph_caller() will runtime patch the
+         ftrace_graph_call location with a call to ftrace_graph_caller()
+       - ftrace_disable_ftrace_graph_caller() will runtime patch the
+         ftrace_graph_call location with nops
diff --git a/Documentation/trace/kmemtrace.txt b/Documentation/trace/kmemtrace.txt
deleted file mode 100644 (file)
index 6308735..0000000
+++ /dev/null
@@ -1,126 +0,0 @@
-                       kmemtrace - Kernel Memory Tracer
-
-                         by Eduard - Gabriel Munteanu
-                            <eduard.munteanu@linux360.ro>
-
-I. Introduction
-===============
-
-kmemtrace helps kernel developers figure out two things:
-1) how different allocators (SLAB, SLUB etc.) perform
-2) how kernel code allocates memory and how much
-
-To do this, we trace every allocation and export information to the userspace
-through the relay interface. We export things such as the number of requested
-bytes, the number of bytes actually allocated (i.e. including internal
-fragmentation), whether this is a slab allocation or a plain kmalloc() and so
-on.
-
-The actual analysis is performed by a userspace tool (see section III for
-details on where to get it from). It logs the data exported by the kernel,
-processes it and (as of writing this) can provide the following information:
-- the total amount of memory allocated and fragmentation per call-site
-- the amount of memory allocated and fragmentation per allocation
-- total memory allocated and fragmentation in the collected dataset
-- number of cross-CPU allocation and frees (makes sense in NUMA environments)
-
-Moreover, it can potentially find inconsistent and erroneous behavior in
-kernel code, such as using slab free functions on kmalloc'ed memory or
-allocating less memory than requested (but not truly failed allocations).
-
-kmemtrace also makes provisions for tracing on some arch and analysing the
-data on another.
-
-II. Design and goals
-====================
-
-kmemtrace was designed to handle rather large amounts of data. Thus, it uses
-the relay interface to export whatever is logged to userspace, which then
-stores it. Analysis and reporting is done asynchronously, that is, after the
-data is collected and stored. By design, it allows one to log and analyse
-on different machines and different arches.
-
-As of writing this, the ABI is not considered stable, though it might not
-change much. However, no guarantees are made about compatibility yet. When
-deemed stable, the ABI should still allow easy extension while maintaining
-backward compatibility. This is described further in Documentation/ABI.
-
-Summary of design goals:
-       - allow logging and analysis to be done across different machines
-       - be fast and anticipate usage in high-load environments (*)
-       - be reasonably extensible
-       - make it possible for GNU/Linux distributions to have kmemtrace
-       included in their repositories
-
-(*) - one of the reasons Pekka Enberg's original userspace data analysis
-    tool's code was rewritten from Perl to C (although this is more than a
-    simple conversion)
-
-
-III. Quick usage guide
-======================
-
-1) Get a kernel that supports kmemtrace and build it accordingly (i.e. enable
-CONFIG_KMEMTRACE).
-
-2) Get the userspace tool and build it:
-$ git clone git://repo.or.cz/kmemtrace-user.git                # current repository
-$ cd kmemtrace-user/
-$ ./autogen.sh
-$ ./configure
-$ make
-
-3) Boot the kmemtrace-enabled kernel if you haven't, preferably in the
-'single' runlevel (so that relay buffers don't fill up easily), and run
-kmemtrace:
-# '$' does not mean user, but root here.
-$ mount -t debugfs none /sys/kernel/debug
-$ mount -t proc none /proc
-$ cd path/to/kmemtrace-user/
-$ ./kmemtraced
-Wait a bit, then stop it with CTRL+C.
-$ cat /sys/kernel/debug/kmemtrace/total_overruns       # Check if we didn't
-                                                       # overrun, should
-                                                       # be zero.
-$ (Optionally) [Run kmemtrace_check separately on each cpu[0-9]*.out file to
-               check its correctness]
-$ ./kmemtrace-report
-
-Now you should have a nice and short summary of how the allocator performs.
-
-IV. FAQ and known issues
-========================
-
-Q: 'cat /sys/kernel/debug/kmemtrace/total_overruns' is non-zero, how do I fix
-this? Should I worry?
-A: If it's non-zero, this affects kmemtrace's accuracy, depending on how
-large the number is. You can fix it by supplying a higher
-'kmemtrace.subbufs=N' kernel parameter.
----
-
-Q: kmemtrace_check reports errors, how do I fix this? Should I worry?
-A: This is a bug and should be reported. It can occur for a variety of
-reasons:
-       - possible bugs in relay code
-       - possible misuse of relay by kmemtrace
-       - timestamps being collected unorderly
-Or you may fix it yourself and send us a patch.
----
-
-Q: kmemtrace_report shows many errors, how do I fix this? Should I worry?
-A: This is a known issue and I'm working on it. These might be true errors
-in kernel code, which may have inconsistent behavior (e.g. allocating memory
-with kmem_cache_alloc() and freeing it with kfree()). Pekka Enberg pointed
-out this behavior may work with SLAB, but may fail with other allocators.
-
-It may also be due to lack of tracing in some unusual allocator functions.
-
-We don't want bug reports regarding this issue yet.
----
-
-V. See also
-===========
-
-Documentation/kernel-parameters.txt
-Documentation/ABI/testing/debugfs-kmemtrace
-
index ec94748..5f77d94 100644 (file)
@@ -42,7 +42,7 @@ Synopsis of kprobe_events
   +|-offs(FETCHARG) : Fetch memory at FETCHARG +|- offs address.(**)
   NAME=FETCHARG : Set NAME as the argument name of FETCHARG.
   FETCHARG:TYPE : Set TYPE as the type of FETCHARG. Currently, basic types
-                 (u8/u16/u32/u64/s8/s16/s32/s64) are supported.
+                 (u8/u16/u32/u64/s8/s16/s32/s64) and string are supported.
 
   (*) only for return probe.
   (**) this is useful for fetching a field of data structures.
index 02f75fc..5d5f9e2 100644 (file)
@@ -3380,13 +3380,6 @@ F:       include/linux/kmemleak.h
 F:     mm/kmemleak.c
 F:     mm/kmemleak-test.c
 
-KMEMTRACE
-M:     Eduard - Gabriel Munteanu <eduard.munteanu@linux360.ro>
-S:     Maintained
-F:     Documentation/trace/kmemtrace.txt
-F:     include/linux/kmemtrace.h
-F:     kernel/trace/kmemtrace.c
-
 KPROBES
 M:     Ananth N Mavinakayanahalli <ananth@in.ibm.com>
 M:     Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
@@ -5654,7 +5647,7 @@ TRACING
 M:     Steven Rostedt <rostedt@goodmis.org>
 M:     Frederic Weisbecker <fweisbec@gmail.com>
 M:     Ingo Molnar <mingo@redhat.com>
-T:     git git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip.git tracing/core
+T:     git git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip.git perf/core
 S:     Maintained
 F:     Documentation/trace/ftrace.txt
 F:     arch/*/*/*/ftrace.h
index 141da26..85a14f5 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -414,7 +414,7 @@ endif
 no-dot-config-targets := clean mrproper distclean \
                         cscope TAGS tags help %docs check% \
                         include/linux/version.h headers_% \
-                        kernelrelease kernelversion
+                        kernelrelease kernelversion %src-pkg
 
 config-targets := 0
 mixed-targets  := 0
@@ -1158,6 +1158,8 @@ distclean: mrproper
 # rpm target kept for backward compatibility
 package-dir    := $(srctree)/scripts/package
 
+%src-pkg: FORCE
+       $(Q)$(MAKE) $(build)=$(package-dir) $@
 %pkg: include/config/kernel.release FORCE
        $(Q)$(MAKE) $(build)=$(package-dir) $@
 rpm: include/config/kernel.release FORCE
diff --git a/arch/alpha/include/asm/local64.h b/arch/alpha/include/asm/local64.h
new file mode 100644 (file)
index 0000000..36c93b5
--- /dev/null
@@ -0,0 +1 @@
+#include <asm-generic/local64.h>
diff --git a/arch/arm/include/asm/local64.h b/arch/arm/include/asm/local64.h
new file mode 100644 (file)
index 0000000..36c93b5
--- /dev/null
@@ -0,0 +1 @@
+#include <asm-generic/local64.h>
index de12536..417c392 100644 (file)
@@ -164,20 +164,20 @@ armpmu_event_set_period(struct perf_event *event,
                        struct hw_perf_event *hwc,
                        int idx)
 {
-       s64 left = atomic64_read(&hwc->period_left);
+       s64 left = local64_read(&hwc->period_left);
        s64 period = hwc->sample_period;
        int ret = 0;
 
        if (unlikely(left <= -period)) {
                left = period;
-               atomic64_set(&hwc->period_left, left);
+               local64_set(&hwc->period_left, left);
                hwc->last_period = period;
                ret = 1;
        }
 
        if (unlikely(left <= 0)) {
                left += period;
-               atomic64_set(&hwc->period_left, left);
+               local64_set(&hwc->period_left, left);
                hwc->last_period = period;
                ret = 1;
        }
@@ -185,7 +185,7 @@ armpmu_event_set_period(struct perf_event *event,
        if (left > (s64)armpmu->max_period)
                left = armpmu->max_period;
 
-       atomic64_set(&hwc->prev_count, (u64)-left);
+       local64_set(&hwc->prev_count, (u64)-left);
 
        armpmu->write_counter(idx, (u64)(-left) & 0xffffffff);
 
@@ -204,18 +204,18 @@ armpmu_event_update(struct perf_event *event,
        u64 delta;
 
 again:
-       prev_raw_count = atomic64_read(&hwc->prev_count);
+       prev_raw_count = local64_read(&hwc->prev_count);
        new_raw_count = armpmu->read_counter(idx);
 
-       if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count,
+       if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
                             new_raw_count) != prev_raw_count)
                goto again;
 
        delta = (new_raw_count << shift) - (prev_raw_count << shift);
        delta >>= shift;
 
-       atomic64_add(delta, &event->count);
-       atomic64_sub(delta, &hwc->period_left);
+       local64_add(delta, &event->count);
+       local64_sub(delta, &hwc->period_left);
 
        return new_raw_count;
 }
@@ -478,7 +478,7 @@ __hw_perf_event_init(struct perf_event *event)
        if (!hwc->sample_period) {
                hwc->sample_period  = armpmu->max_period;
                hwc->last_period    = hwc->sample_period;
-               atomic64_set(&hwc->period_left, hwc->sample_period);
+               local64_set(&hwc->period_left, hwc->sample_period);
        }
 
        err = 0;
diff --git a/arch/avr32/include/asm/local64.h b/arch/avr32/include/asm/local64.h
new file mode 100644 (file)
index 0000000..36c93b5
--- /dev/null
@@ -0,0 +1 @@
+#include <asm-generic/local64.h>
diff --git a/arch/blackfin/include/asm/local64.h b/arch/blackfin/include/asm/local64.h
new file mode 100644 (file)
index 0000000..36c93b5
--- /dev/null
@@ -0,0 +1 @@
+#include <asm-generic/local64.h>
diff --git a/arch/cris/include/asm/local64.h b/arch/cris/include/asm/local64.h
new file mode 100644 (file)
index 0000000..36c93b5
--- /dev/null
@@ -0,0 +1 @@
+#include <asm-generic/local64.h>
diff --git a/arch/frv/include/asm/local64.h b/arch/frv/include/asm/local64.h
new file mode 100644 (file)
index 0000000..36c93b5
--- /dev/null
@@ -0,0 +1 @@
+#include <asm-generic/local64.h>
diff --git a/arch/frv/kernel/local64.h b/arch/frv/kernel/local64.h
new file mode 100644 (file)
index 0000000..36c93b5
--- /dev/null
@@ -0,0 +1 @@
+#include <asm-generic/local64.h>
diff --git a/arch/h8300/include/asm/local64.h b/arch/h8300/include/asm/local64.h
new file mode 100644 (file)
index 0000000..36c93b5
--- /dev/null
@@ -0,0 +1 @@
+#include <asm-generic/local64.h>
diff --git a/arch/ia64/include/asm/local64.h b/arch/ia64/include/asm/local64.h
new file mode 100644 (file)
index 0000000..36c93b5
--- /dev/null
@@ -0,0 +1 @@
+#include <asm-generic/local64.h>
diff --git a/arch/m32r/include/asm/local64.h b/arch/m32r/include/asm/local64.h
new file mode 100644 (file)
index 0000000..36c93b5
--- /dev/null
@@ -0,0 +1 @@
+#include <asm-generic/local64.h>
diff --git a/arch/m68k/include/asm/local64.h b/arch/m68k/include/asm/local64.h
new file mode 100644 (file)
index 0000000..36c93b5
--- /dev/null
@@ -0,0 +1 @@
+#include <asm-generic/local64.h>
diff --git a/arch/microblaze/include/asm/local64.h b/arch/microblaze/include/asm/local64.h
new file mode 100644 (file)
index 0000000..36c93b5
--- /dev/null
@@ -0,0 +1 @@
+#include <asm-generic/local64.h>
diff --git a/arch/mips/include/asm/local64.h b/arch/mips/include/asm/local64.h
new file mode 100644 (file)
index 0000000..36c93b5
--- /dev/null
@@ -0,0 +1 @@
+#include <asm-generic/local64.h>
diff --git a/arch/mn10300/include/asm/local64.h b/arch/mn10300/include/asm/local64.h
new file mode 100644 (file)
index 0000000..36c93b5
--- /dev/null
@@ -0,0 +1 @@
+#include <asm-generic/local64.h>
diff --git a/arch/parisc/include/asm/local64.h b/arch/parisc/include/asm/local64.h
new file mode 100644 (file)
index 0000000..36c93b5
--- /dev/null
@@ -0,0 +1 @@
+#include <asm-generic/local64.h>
diff --git a/arch/powerpc/include/asm/local64.h b/arch/powerpc/include/asm/local64.h
new file mode 100644 (file)
index 0000000..36c93b5
--- /dev/null
@@ -0,0 +1 @@
+#include <asm-generic/local64.h>
index e6d4ce6..5c16b89 100644 (file)
 #ifdef CONFIG_FSL_EMB_PERF_EVENT
 #include <asm/perf_event_fsl_emb.h>
 #endif
+
+#ifdef CONFIG_PERF_EVENTS
+#include <asm/ptrace.h>
+#include <asm/reg.h>
+
+#define perf_arch_fetch_caller_regs(regs, __ip)                        \
+       do {                                                    \
+               (regs)->nip = __ip;                             \
+               (regs)->gpr[1] = *(unsigned long *)__get_SP();  \
+               asm volatile("mfmsr %0" : "=r" ((regs)->msr));  \
+       } while (0)
+#endif
index 22e507c..2d29752 100644 (file)
@@ -127,29 +127,3 @@ _GLOBAL(__setup_cpu_power7)
 _GLOBAL(__restore_cpu_power7)
        /* place holder */
        blr
-
-/*
- * Get a minimal set of registers for our caller's nth caller.
- * r3 = regs pointer, r5 = n.
- *
- * We only get R1 (stack pointer), NIP (next instruction pointer)
- * and LR (link register).  These are all we can get in the
- * general case without doing complicated stack unwinding, but
- * fortunately they are enough to do a stack backtrace, which
- * is all we need them for.
- */
-_GLOBAL(perf_arch_fetch_caller_regs)
-       mr      r6,r1
-       cmpwi   r5,0
-       mflr    r4
-       ble     2f
-       mtctr   r5
-1:     PPC_LL  r6,0(r6)
-       bdnz    1b
-       PPC_LL  r4,PPC_LR_STKOFF(r6)
-2:     PPC_LL  r7,0(r6)
-       PPC_LL  r7,PPC_LR_STKOFF(r7)
-       PPC_STL r6,GPR1-STACK_FRAME_OVERHEAD(r3)
-       PPC_STL r4,_NIP-STACK_FRAME_OVERHEAD(r3)
-       PPC_STL r7,_LINK-STACK_FRAME_OVERHEAD(r3)
-       blr
index 5c14ffe..d301a30 100644 (file)
@@ -410,15 +410,15 @@ static void power_pmu_read(struct perf_event *event)
         * Therefore we treat them like NMIs.
         */
        do {
-               prev = atomic64_read(&event->hw.prev_count);
+               prev = local64_read(&event->hw.prev_count);
                barrier();
                val = read_pmc(event->hw.idx);
-       } while (atomic64_cmpxchg(&event->hw.prev_count, prev, val) != prev);
+       } while (local64_cmpxchg(&event->hw.prev_count, prev, val) != prev);
 
        /* The counters are only 32 bits wide */
        delta = (val - prev) & 0xfffffffful;
-       atomic64_add(delta, &event->count);
-       atomic64_sub(delta, &event->hw.period_left);
+       local64_add(delta, &event->count);
+       local64_sub(delta, &event->hw.period_left);
 }
 
 /*
@@ -444,10 +444,10 @@ static void freeze_limited_counters(struct cpu_hw_events *cpuhw,
                if (!event->hw.idx)
                        continue;
                val = (event->hw.idx == 5) ? pmc5 : pmc6;
-               prev = atomic64_read(&event->hw.prev_count);
+               prev = local64_read(&event->hw.prev_count);
                event->hw.idx = 0;
                delta = (val - prev) & 0xfffffffful;
-               atomic64_add(delta, &event->count);
+               local64_add(delta, &event->count);
        }
 }
 
@@ -462,7 +462,7 @@ static void thaw_limited_counters(struct cpu_hw_events *cpuhw,
                event = cpuhw->limited_counter[i];
                event->hw.idx = cpuhw->limited_hwidx[i];
                val = (event->hw.idx == 5) ? pmc5 : pmc6;
-               atomic64_set(&event->hw.prev_count, val);
+               local64_set(&event->hw.prev_count, val);
                perf_event_update_userpage(event);
        }
 }
@@ -666,11 +666,11 @@ void hw_perf_enable(void)
                }
                val = 0;
                if (event->hw.sample_period) {
-                       left = atomic64_read(&event->hw.period_left);
+                       left = local64_read(&event->hw.period_left);
                        if (left < 0x80000000L)
                                val = 0x80000000L - left;
                }
-               atomic64_set(&event->hw.prev_count, val);
+               local64_set(&event->hw.prev_count, val);
                event->hw.idx = idx;
                write_pmc(idx, val);
                perf_event_update_userpage(event);
@@ -754,7 +754,7 @@ static int power_pmu_enable(struct perf_event *event)
         * skip the schedulability test here, it will be peformed
         * at commit time(->commit_txn) as a whole
         */
-       if (cpuhw->group_flag & PERF_EVENT_TXN_STARTED)
+       if (cpuhw->group_flag & PERF_EVENT_TXN)
                goto nocheck;
 
        if (check_excludes(cpuhw->event, cpuhw->flags, n0, 1))
@@ -845,8 +845,8 @@ static void power_pmu_unthrottle(struct perf_event *event)
        if (left < 0x80000000L)
                val = 0x80000000L - left;
        write_pmc(event->hw.idx, val);
-       atomic64_set(&event->hw.prev_count, val);
-       atomic64_set(&event->hw.period_left, left);
+       local64_set(&event->hw.prev_count, val);
+       local64_set(&event->hw.period_left, left);
        perf_event_update_userpage(event);
        perf_enable();
        local_irq_restore(flags);
@@ -861,7 +861,7 @@ void power_pmu_start_txn(const struct pmu *pmu)
 {
        struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
 
-       cpuhw->group_flag |= PERF_EVENT_TXN_STARTED;
+       cpuhw->group_flag |= PERF_EVENT_TXN;
        cpuhw->n_txn_start = cpuhw->n_events;
 }
 
@@ -874,7 +874,7 @@ void power_pmu_cancel_txn(const struct pmu *pmu)
 {
        struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
 
-       cpuhw->group_flag &= ~PERF_EVENT_TXN_STARTED;
+       cpuhw->group_flag &= ~PERF_EVENT_TXN;
 }
 
 /*
@@ -900,6 +900,7 @@ int power_pmu_commit_txn(const struct pmu *pmu)
        for (i = cpuhw->n_txn_start; i < n; ++i)
                cpuhw->event[i]->hw.config = cpuhw->events[i];
 
+       cpuhw->group_flag &= ~PERF_EVENT_TXN;
        return 0;
 }
 
@@ -1111,7 +1112,7 @@ const struct pmu *hw_perf_event_init(struct perf_event *event)
        event->hw.config = events[n];
        event->hw.event_base = cflags[n];
        event->hw.last_period = event->hw.sample_period;
-       atomic64_set(&event->hw.period_left, event->hw.last_period);
+       local64_set(&event->hw.period_left, event->hw.last_period);
 
        /*
         * See if we need to reserve the PMU.
@@ -1149,16 +1150,16 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
        int record = 0;
 
        /* we don't have to worry about interrupts here */
-       prev = atomic64_read(&event->hw.prev_count);
+       prev = local64_read(&event->hw.prev_count);
        delta = (val - prev) & 0xfffffffful;
-       atomic64_add(delta, &event->count);
+       local64_add(delta, &event->count);
 
        /*
         * See if the total period for this event has expired,
         * and update for the next period.
         */
        val = 0;
-       left = atomic64_read(&event->hw.period_left) - delta;
+       left = local64_read(&event->hw.period_left) - delta;
        if (period) {
                if (left <= 0) {
                        left += period;
@@ -1196,8 +1197,8 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
        }
 
        write_pmc(event->hw.idx, val);
-       atomic64_set(&event->hw.prev_count, val);
-       atomic64_set(&event->hw.period_left, left);
+       local64_set(&event->hw.prev_count, val);
+       local64_set(&event->hw.period_left, left);
        perf_event_update_userpage(event);
 }
 
diff --git a/arch/s390/include/asm/local64.h b/arch/s390/include/asm/local64.h
new file mode 100644 (file)
index 0000000..36c93b5
--- /dev/null
@@ -0,0 +1 @@
+#include <asm-generic/local64.h>
diff --git a/arch/score/include/asm/local64.h b/arch/score/include/asm/local64.h
new file mode 100644 (file)
index 0000000..36c93b5
--- /dev/null
@@ -0,0 +1 @@
+#include <asm-generic/local64.h>
diff --git a/arch/sh/include/asm/local64.h b/arch/sh/include/asm/local64.h
new file mode 100644 (file)
index 0000000..36c93b5
--- /dev/null
@@ -0,0 +1 @@
+#include <asm-generic/local64.h>
index 81b6de4..7a3dc35 100644 (file)
@@ -185,10 +185,10 @@ static void sh_perf_event_update(struct perf_event *event,
         * this is the simplest approach for maintaining consistency.
         */
 again:
-       prev_raw_count = atomic64_read(&hwc->prev_count);
+       prev_raw_count = local64_read(&hwc->prev_count);
        new_raw_count = sh_pmu->read(idx);
 
-       if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count,
+       if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
                             new_raw_count) != prev_raw_count)
                goto again;
 
@@ -203,7 +203,7 @@ again:
        delta = (new_raw_count << shift) - (prev_raw_count << shift);
        delta >>= shift;
 
-       atomic64_add(delta, &event->count);
+       local64_add(delta, &event->count);
 }
 
 static void sh_pmu_disable(struct perf_event *event)
diff --git a/arch/sparc/include/asm/local64.h b/arch/sparc/include/asm/local64.h
new file mode 100644 (file)
index 0000000..36c93b5
--- /dev/null
@@ -0,0 +1 @@
+#include <asm-generic/local64.h>
index 7e26698..74c4e0c 100644 (file)
@@ -6,7 +6,15 @@ extern void set_perf_event_pending(void);
 #define        PERF_EVENT_INDEX_OFFSET 0
 
 #ifdef CONFIG_PERF_EVENTS
+#include <asm/ptrace.h>
+
 extern void init_hw_perf_events(void);
+
+extern void
+__perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int skip);
+
+#define perf_arch_fetch_caller_regs(pt_regs, ip)       \
+       __perf_arch_fetch_caller_regs(pt_regs, ip, 1);
 #else
 static inline void init_hw_perf_events(void)   { }
 #endif
index 92090cc..682fee0 100644 (file)
@@ -47,9 +47,9 @@ stack_trace_flush:
        .size           stack_trace_flush,.-stack_trace_flush
 
 #ifdef CONFIG_PERF_EVENTS
-       .globl          perf_arch_fetch_caller_regs
-       .type           perf_arch_fetch_caller_regs,#function
-perf_arch_fetch_caller_regs:
+       .globl          __perf_arch_fetch_caller_regs
+       .type           __perf_arch_fetch_caller_regs,#function
+__perf_arch_fetch_caller_regs:
        /* We always read the %pstate into %o5 since we will use
         * that to construct a fake %tstate to store into the regs.
         */
index 44faabc..357ced3 100644 (file)
@@ -572,18 +572,18 @@ static u64 sparc_perf_event_update(struct perf_event *event,
        s64 delta;
 
 again:
-       prev_raw_count = atomic64_read(&hwc->prev_count);
+       prev_raw_count = local64_read(&hwc->prev_count);
        new_raw_count = read_pmc(idx);
 
-       if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count,
+       if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
                             new_raw_count) != prev_raw_count)
                goto again;
 
        delta = (new_raw_count << shift) - (prev_raw_count << shift);
        delta >>= shift;
 
-       atomic64_add(delta, &event->count);
-       atomic64_sub(delta, &hwc->period_left);
+       local64_add(delta, &event->count);
+       local64_sub(delta, &hwc->period_left);
 
        return new_raw_count;
 }
@@ -591,27 +591,27 @@ again:
 static int sparc_perf_event_set_period(struct perf_event *event,
                                       struct hw_perf_event *hwc, int idx)
 {
-       s64 left = atomic64_read(&hwc->period_left);
+       s64 left = local64_read(&hwc->period_left);
        s64 period = hwc->sample_period;
        int ret = 0;
 
        if (unlikely(left <= -period)) {
                left = period;
-               atomic64_set(&hwc->period_left, left);
+               local64_set(&hwc->period_left, left);
                hwc->last_period = period;
                ret = 1;
        }
 
        if (unlikely(left <= 0)) {
                left += period;
-               atomic64_set(&hwc->period_left, left);
+               local64_set(&hwc->period_left, left);
                hwc->last_period = period;
                ret = 1;
        }
        if (left > MAX_PERIOD)
                left = MAX_PERIOD;
 
-       atomic64_set(&hwc->prev_count, (u64)-left);
+       local64_set(&hwc->prev_count, (u64)-left);
 
        write_pmc(idx, (u64)(-left) & 0xffffffff);
 
@@ -1006,7 +1006,7 @@ static int sparc_pmu_enable(struct perf_event *event)
         * skip the schedulability test here, it will be peformed
         * at commit time(->commit_txn) as a whole
         */
-       if (cpuc->group_flag & PERF_EVENT_TXN_STARTED)
+       if (cpuc->group_flag & PERF_EVENT_TXN)
                goto nocheck;
 
        if (check_excludes(cpuc->event, n0, 1))
@@ -1088,7 +1088,7 @@ static int __hw_perf_event_init(struct perf_event *event)
        if (!hwc->sample_period) {
                hwc->sample_period = MAX_PERIOD;
                hwc->last_period = hwc->sample_period;
-               atomic64_set(&hwc->period_left, hwc->sample_period);
+               local64_set(&hwc->period_left, hwc->sample_period);
        }
 
        return 0;
@@ -1103,7 +1103,7 @@ static void sparc_pmu_start_txn(const struct pmu *pmu)
 {
        struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
 
-       cpuhw->group_flag |= PERF_EVENT_TXN_STARTED;
+       cpuhw->group_flag |= PERF_EVENT_TXN;
 }
 
 /*
@@ -1115,7 +1115,7 @@ static void sparc_pmu_cancel_txn(const struct pmu *pmu)
 {
        struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
 
-       cpuhw->group_flag &= ~PERF_EVENT_TXN_STARTED;
+       cpuhw->group_flag &= ~PERF_EVENT_TXN;
 }
 
 /*
@@ -1138,6 +1138,7 @@ static int sparc_pmu_commit_txn(const struct pmu *pmu)
        if (sparc_check_constraints(cpuc->event, cpuc->events, n))
                return -EAGAIN;
 
+       cpuc->group_flag &= ~PERF_EVENT_TXN;
        return 0;
 }
 
index 9422553..528a11e 100644 (file)
@@ -20,10 +20,10 @@ struct arch_hw_breakpoint {
 #include <linux/list.h>
 
 /* Available HW breakpoint length encodings */
+#define X86_BREAKPOINT_LEN_X           0x00
 #define X86_BREAKPOINT_LEN_1           0x40
 #define X86_BREAKPOINT_LEN_2           0x44
 #define X86_BREAKPOINT_LEN_4           0x4c
-#define X86_BREAKPOINT_LEN_EXECUTE     0x40
 
 #ifdef CONFIG_X86_64
 #define X86_BREAKPOINT_LEN_8           0x48
diff --git a/arch/x86/include/asm/local64.h b/arch/x86/include/asm/local64.h
new file mode 100644 (file)
index 0000000..36c93b5
--- /dev/null
@@ -0,0 +1 @@
+#include <asm-generic/local64.h>
index 254883d..6e742cc 100644 (file)
@@ -68,8 +68,9 @@ union cpuid10_eax {
 
 union cpuid10_edx {
        struct {
-               unsigned int num_counters_fixed:4;
-               unsigned int reserved:28;
+               unsigned int num_counters_fixed:5;
+               unsigned int bit_width_fixed:8;
+               unsigned int reserved:19;
        } split;
        unsigned int full;
 };
@@ -140,6 +141,19 @@ extern unsigned long perf_instruction_pointer(struct pt_regs *regs);
 extern unsigned long perf_misc_flags(struct pt_regs *regs);
 #define perf_misc_flags(regs)  perf_misc_flags(regs)
 
+#include <asm/stacktrace.h>
+
+/*
+ * We abuse bit 3 from flags to pass exact information, see perf_misc_flags
+ * and the comment with PERF_EFLAGS_EXACT.
+ */
+#define perf_arch_fetch_caller_regs(regs, __ip)                {       \
+       (regs)->ip = (__ip);                                    \
+       (regs)->bp = caller_frame_pointer();                    \
+       (regs)->cs = __KERNEL_CS;                               \
+       regs->flags = 0;                                        \
+}
+
 #else
 static inline void init_hw_perf_events(void)           { }
 static inline void perf_events_lapic_init(void)        { }
index 64a8ebf..def5007 100644 (file)
@@ -19,7 +19,6 @@
 #define ARCH_P4_RESERVED_ESCR  (2) /* IQ_ESCR(0,1) not always present */
 #define ARCH_P4_MAX_ESCR       (ARCH_P4_TOTAL_ESCR - ARCH_P4_RESERVED_ESCR)
 #define ARCH_P4_MAX_CCCR       (18)
-#define ARCH_P4_MAX_COUNTER    (ARCH_P4_MAX_CCCR / 2)
 
 #define P4_ESCR_EVENT_MASK     0x7e000000U
 #define P4_ESCR_EVENT_SHIFT    25
 #define P4_CCCR_THRESHOLD(v)           ((v) << P4_CCCR_THRESHOLD_SHIFT)
 #define P4_CCCR_ESEL(v)                        ((v) << P4_CCCR_ESCR_SELECT_SHIFT)
 
-/* Custom bits in reerved CCCR area */
-#define P4_CCCR_CACHE_OPS_MASK         0x0000003fU
-
-
 /* Non HT mask */
 #define P4_CCCR_MASK                           \
        (P4_CCCR_OVF                    |       \
  * ESCR and CCCR but rather an only packed value should
  * be unpacked and written to a proper addresses
  *
- * the base idea is to pack as much info as
- * possible
+ * the base idea is to pack as much info as possible
  */
 #define p4_config_pack_escr(v)         (((u64)(v)) << 32)
 #define p4_config_pack_cccr(v)         (((u64)(v)) & 0xffffffffULL)
                t;                                      \
        })
 
-#define p4_config_unpack_cache_event(v)        (((u64)(v)) & P4_CCCR_CACHE_OPS_MASK)
-
 #define P4_CONFIG_HT_SHIFT             63
 #define P4_CONFIG_HT                   (1ULL << P4_CONFIG_HT_SHIFT)
 
@@ -214,6 +206,12 @@ static inline u32 p4_default_escr_conf(int cpu, int exclude_os, int exclude_usr)
        return escr;
 }
 
+/*
+ * This are the events which should be used in "Event Select"
+ * field of ESCR register, they are like unique keys which allow
+ * the kernel to determinate which CCCR and COUNTER should be
+ * used to track an event
+ */
 enum P4_EVENTS {
        P4_EVENT_TC_DELIVER_MODE,
        P4_EVENT_BPU_FETCH_REQUEST,
@@ -561,7 +559,7 @@ enum P4_EVENT_OPCODES {
  * a caller should use P4_ESCR_EMASK_NAME helper to
  * pick the EventMask needed, for example
  *
- *     P4_ESCR_EMASK_NAME(P4_EVENT_TC_DELIVER_MODE, DD)
+ *     P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, DD)
  */
 enum P4_ESCR_EMASKS {
        P4_GEN_ESCR_EMASK(P4_EVENT_TC_DELIVER_MODE, DD, 0),
@@ -753,43 +751,50 @@ enum P4_ESCR_EMASKS {
        P4_GEN_ESCR_EMASK(P4_EVENT_INSTR_COMPLETED, BOGUS, 1),
 };
 
-/* P4 PEBS: stale for a while */
-#define P4_PEBS_METRIC_MASK    0x00001fffU
-#define P4_PEBS_UOB_TAG                0x01000000U
-#define P4_PEBS_ENABLE         0x02000000U
-
-/* Replay metrics for MSR_IA32_PEBS_ENABLE and MSR_P4_PEBS_MATRIX_VERT */
-#define P4_PEBS__1stl_cache_load_miss_retired  0x3000001
-#define P4_PEBS__2ndl_cache_load_miss_retired  0x3000002
-#define P4_PEBS__dtlb_load_miss_retired                0x3000004
-#define P4_PEBS__dtlb_store_miss_retired       0x3000004
-#define P4_PEBS__dtlb_all_miss_retired         0x3000004
-#define P4_PEBS__tagged_mispred_branch         0x3018000
-#define P4_PEBS__mob_load_replay_retired       0x3000200
-#define P4_PEBS__split_load_retired            0x3000400
-#define P4_PEBS__split_store_retired           0x3000400
-
-#define P4_VERT__1stl_cache_load_miss_retired  0x0000001
-#define P4_VERT__2ndl_cache_load_miss_retired  0x0000001
-#define P4_VERT__dtlb_load_miss_retired                0x0000001
-#define P4_VERT__dtlb_store_miss_retired       0x0000002
-#define P4_VERT__dtlb_all_miss_retired         0x0000003
-#define P4_VERT__tagged_mispred_branch         0x0000010
-#define P4_VERT__mob_load_replay_retired       0x0000001
-#define P4_VERT__split_load_retired            0x0000001
-#define P4_VERT__split_store_retired           0x0000002
-
-enum P4_CACHE_EVENTS {
-       P4_CACHE__NONE,
-
-       P4_CACHE__1stl_cache_load_miss_retired,
-       P4_CACHE__2ndl_cache_load_miss_retired,
-       P4_CACHE__dtlb_load_miss_retired,
-       P4_CACHE__dtlb_store_miss_retired,
-       P4_CACHE__itlb_reference_hit,
-       P4_CACHE__itlb_reference_miss,
-
-       P4_CACHE__MAX
+/*
+ * P4 PEBS specifics (Replay Event only)
+ *
+ * Format (bits):
+ *   0-6: metric from P4_PEBS_METRIC enum
+ *    7 : reserved
+ *    8 : reserved
+ * 9-11 : reserved
+ *
+ * Note we have UOP and PEBS bits reserved for now
+ * just in case if we will need them once
+ */
+#define P4_PEBS_CONFIG_ENABLE          (1 << 7)
+#define P4_PEBS_CONFIG_UOP_TAG         (1 << 8)
+#define P4_PEBS_CONFIG_METRIC_MASK     0x3f
+#define P4_PEBS_CONFIG_MASK            0xff
+
+/*
+ * mem: Only counters MSR_IQ_COUNTER4 (16) and
+ * MSR_IQ_COUNTER5 (17) are allowed for PEBS sampling
+ */
+#define P4_PEBS_ENABLE                 0x02000000U
+#define P4_PEBS_ENABLE_UOP_TAG         0x01000000U
+
+#define p4_config_unpack_metric(v)     (((u64)(v)) & P4_PEBS_CONFIG_METRIC_MASK)
+#define p4_config_unpack_pebs(v)       (((u64)(v)) & P4_PEBS_CONFIG_MASK)
+
+#define p4_config_pebs_has(v, mask)    (p4_config_unpack_pebs(v) & (mask))
+
+enum P4_PEBS_METRIC {
+       P4_PEBS_METRIC__none,
+
+       P4_PEBS_METRIC__1stl_cache_load_miss_retired,
+       P4_PEBS_METRIC__2ndl_cache_load_miss_retired,
+       P4_PEBS_METRIC__dtlb_load_miss_retired,
+       P4_PEBS_METRIC__dtlb_store_miss_retired,
+       P4_PEBS_METRIC__dtlb_all_miss_retired,
+       P4_PEBS_METRIC__tagged_mispred_branch,
+       P4_PEBS_METRIC__mob_load_replay_retired,
+       P4_PEBS_METRIC__split_load_retired,
+       P4_PEBS_METRIC__split_store_retired,
+
+       P4_PEBS_METRIC__max
 };
 
 #endif /* PERF_EVENT_P4_H */
+
index 4dab78e..2b16a2a 100644 (file)
@@ -1,6 +1,13 @@
+/*
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ *  Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs
+ */
+
 #ifndef _ASM_X86_STACKTRACE_H
 #define _ASM_X86_STACKTRACE_H
 
+#include <linux/uaccess.h>
+
 extern int kstack_depth_to_print;
 
 struct thread_info;
@@ -42,4 +49,46 @@ void dump_trace(struct task_struct *tsk, struct pt_regs *regs,
                unsigned long *stack, unsigned long bp,
                const struct stacktrace_ops *ops, void *data);
 
+#ifdef CONFIG_X86_32
+#define STACKSLOTS_PER_LINE 8
+#define get_bp(bp) asm("movl %%ebp, %0" : "=r" (bp) :)
+#else
+#define STACKSLOTS_PER_LINE 4
+#define get_bp(bp) asm("movq %%rbp, %0" : "=r" (bp) :)
+#endif
+
+extern void
+show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
+               unsigned long *stack, unsigned long bp, char *log_lvl);
+
+extern void
+show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
+               unsigned long *sp, unsigned long bp, char *log_lvl);
+
+extern unsigned int code_bytes;
+
+/* The form of the top of the frame on the stack */
+struct stack_frame {
+       struct stack_frame *next_frame;
+       unsigned long return_address;
+};
+
+struct stack_frame_ia32 {
+    u32 next_frame;
+    u32 return_address;
+};
+
+static inline unsigned long caller_frame_pointer(void)
+{
+       struct stack_frame *frame;
+
+       get_bp(frame);
+
+#ifdef CONFIG_FRAME_POINTER
+       frame = frame->next_frame;
+#endif
+
+       return (unsigned long)frame;
+}
+
 #endif /* _ASM_X86_STACKTRACE_H */
index 1d3cdda..cee5263 100644 (file)
@@ -34,7 +34,6 @@
 #include <linux/compiler.h>
 #include <linux/dmi.h>
 #include <linux/slab.h>
-#include <trace/events/power.h>
 
 #include <linux/acpi.h>
 #include <linux/io.h>
@@ -324,8 +323,6 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy,
                }
        }
 
-       trace_power_frequency(POWER_PSTATE, data->freq_table[next_state].frequency);
-
        switch (data->cpu_feature) {
        case SYSTEM_INTEL_MSR_CAPABLE:
                cmd.type = SYSTEM_INTEL_MSR_CAPABLE;
index 5db5b7d..f2da20f 100644 (file)
@@ -220,6 +220,7 @@ struct x86_pmu {
                                                 struct perf_event *event);
        struct event_constraint *event_constraints;
        void            (*quirks)(void);
+       int             perfctr_second_write;
 
        int             (*cpu_prepare)(int cpu);
        void            (*cpu_starting)(int cpu);
@@ -295,10 +296,10 @@ x86_perf_event_update(struct perf_event *event)
         * count to the generic event atomically:
         */
 again:
-       prev_raw_count = atomic64_read(&hwc->prev_count);
+       prev_raw_count = local64_read(&hwc->prev_count);
        rdmsrl(hwc->event_base + idx, new_raw_count);
 
-       if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count,
+       if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
                                        new_raw_count) != prev_raw_count)
                goto again;
 
@@ -313,8 +314,8 @@ again:
        delta = (new_raw_count << shift) - (prev_raw_count << shift);
        delta >>= shift;
 
-       atomic64_add(delta, &event->count);
-       atomic64_sub(delta, &hwc->period_left);
+       local64_add(delta, &event->count);
+       local64_sub(delta, &hwc->period_left);
 
        return new_raw_count;
 }
@@ -438,7 +439,7 @@ static int x86_setup_perfctr(struct perf_event *event)
        if (!hwc->sample_period) {
                hwc->sample_period = x86_pmu.max_period;
                hwc->last_period = hwc->sample_period;
-               atomic64_set(&hwc->period_left, hwc->sample_period);
+               local64_set(&hwc->period_left, hwc->sample_period);
        } else {
                /*
                 * If we have a PMU initialized but no APIC
@@ -885,7 +886,7 @@ static int
 x86_perf_event_set_period(struct perf_event *event)
 {
        struct hw_perf_event *hwc = &event->hw;
-       s64 left = atomic64_read(&hwc->period_left);
+       s64 left = local64_read(&hwc->period_left);
        s64 period = hwc->sample_period;
        int ret = 0, idx = hwc->idx;
 
@@ -897,14 +898,14 @@ x86_perf_event_set_period(struct perf_event *event)
         */
        if (unlikely(left <= -period)) {
                left = period;
-               atomic64_set(&hwc->period_left, left);
+               local64_set(&hwc->period_left, left);
                hwc->last_period = period;
                ret = 1;
        }
 
        if (unlikely(left <= 0)) {
                left += period;
-               atomic64_set(&hwc->period_left, left);
+               local64_set(&hwc->period_left, left);
                hwc->last_period = period;
                ret = 1;
        }
@@ -923,10 +924,19 @@ x86_perf_event_set_period(struct perf_event *event)
         * The hw event starts counting from this event offset,
         * mark it to be able to extra future deltas:
         */
-       atomic64_set(&hwc->prev_count, (u64)-left);
+       local64_set(&hwc->prev_count, (u64)-left);
 
-       wrmsrl(hwc->event_base + idx,
+       wrmsrl(hwc->event_base + idx, (u64)(-left) & x86_pmu.cntval_mask);
+
+       /*
+        * Due to erratum on certan cpu we need
+        * a second write to be sure the register
+        * is updated properly
+        */
+       if (x86_pmu.perfctr_second_write) {
+               wrmsrl(hwc->event_base + idx,
                        (u64)(-left) & x86_pmu.cntval_mask);
+       }
 
        perf_event_update_userpage(event);
 
@@ -969,7 +979,7 @@ static int x86_pmu_enable(struct perf_event *event)
         * skip the schedulability test here, it will be peformed
         * at commit time(->commit_txn) as a whole
         */
-       if (cpuc->group_flag & PERF_EVENT_TXN_STARTED)
+       if (cpuc->group_flag & PERF_EVENT_TXN)
                goto out;
 
        ret = x86_pmu.schedule_events(cpuc, n, assign);
@@ -1096,7 +1106,7 @@ static void x86_pmu_disable(struct perf_event *event)
         * The events never got scheduled and ->cancel_txn will truncate
         * the event_list.
         */
-       if (cpuc->group_flag & PERF_EVENT_TXN_STARTED)
+       if (cpuc->group_flag & PERF_EVENT_TXN)
                return;
 
        x86_pmu_stop(event);
@@ -1388,7 +1398,7 @@ static void x86_pmu_start_txn(const struct pmu *pmu)
 {
        struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 
-       cpuc->group_flag |= PERF_EVENT_TXN_STARTED;
+       cpuc->group_flag |= PERF_EVENT_TXN;
        cpuc->n_txn = 0;
 }
 
@@ -1401,7 +1411,7 @@ static void x86_pmu_cancel_txn(const struct pmu *pmu)
 {
        struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 
-       cpuc->group_flag &= ~PERF_EVENT_TXN_STARTED;
+       cpuc->group_flag &= ~PERF_EVENT_TXN;
        /*
         * Truncate the collected events.
         */
@@ -1435,11 +1445,7 @@ static int x86_pmu_commit_txn(const struct pmu *pmu)
         */
        memcpy(cpuc->assign, assign, n*sizeof(int));
 
-       /*
-        * Clear out the txn count so that ->cancel_txn() which gets
-        * run after ->commit_txn() doesn't undo things.
-        */
-       cpuc->n_txn = 0;
+       cpuc->group_flag &= ~PERF_EVENT_TXN;
 
        return 0;
 }
@@ -1607,8 +1613,6 @@ static const struct stacktrace_ops backtrace_ops = {
        .walk_stack             = print_context_stack_bp,
 };
 
-#include "../dumpstack.h"
-
 static void
 perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry)
 {
@@ -1730,22 +1734,6 @@ struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
        return entry;
 }
 
-void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int skip)
-{
-       regs->ip = ip;
-       /*
-        * perf_arch_fetch_caller_regs adds another call, we need to increment
-        * the skip level
-        */
-       regs->bp = rewind_frame_pointer(skip + 1);
-       regs->cs = __KERNEL_CS;
-       /*
-        * We abuse bit 3 to pass exact information, see perf_misc_flags
-        * and the comment with PERF_EFLAGS_EXACT.
-        */
-       regs->flags = 0;
-}
-
 unsigned long perf_instruction_pointer(struct pt_regs *regs)
 {
        unsigned long ip;
index ae85d69..107711b 100644 (file)
@@ -21,22 +21,36 @@ struct p4_event_bind {
        char cntr[2][P4_CNTR_LIMIT];            /* counter index (offset), -1 on abscence */
 };
 
-struct p4_cache_event_bind {
+struct p4_pebs_bind {
        unsigned int metric_pebs;
        unsigned int metric_vert;
 };
 
-#define P4_GEN_CACHE_EVENT_BIND(name)          \
-       [P4_CACHE__##name] = {                  \
-               .metric_pebs = P4_PEBS__##name, \
-               .metric_vert = P4_VERT__##name, \
+/* it sets P4_PEBS_ENABLE_UOP_TAG as well */
+#define P4_GEN_PEBS_BIND(name, pebs, vert)                     \
+       [P4_PEBS_METRIC__##name] = {                            \
+               .metric_pebs = pebs | P4_PEBS_ENABLE_UOP_TAG,   \
+               .metric_vert = vert,                            \
        }
 
-static struct p4_cache_event_bind p4_cache_event_bind_map[] = {
-       P4_GEN_CACHE_EVENT_BIND(1stl_cache_load_miss_retired),
-       P4_GEN_CACHE_EVENT_BIND(2ndl_cache_load_miss_retired),
-       P4_GEN_CACHE_EVENT_BIND(dtlb_load_miss_retired),
-       P4_GEN_CACHE_EVENT_BIND(dtlb_store_miss_retired),
+/*
+ * note we have P4_PEBS_ENABLE_UOP_TAG always set here
+ *
+ * it's needed for mapping P4_PEBS_CONFIG_METRIC_MASK bits of
+ * event configuration to find out which values are to be
+ * written into MSR_IA32_PEBS_ENABLE and MSR_P4_PEBS_MATRIX_VERT
+ * resgisters
+ */
+static struct p4_pebs_bind p4_pebs_bind_map[] = {
+       P4_GEN_PEBS_BIND(1stl_cache_load_miss_retired,  0x0000001, 0x0000001),
+       P4_GEN_PEBS_BIND(2ndl_cache_load_miss_retired,  0x0000002, 0x0000001),
+       P4_GEN_PEBS_BIND(dtlb_load_miss_retired,        0x0000004, 0x0000001),
+       P4_GEN_PEBS_BIND(dtlb_store_miss_retired,       0x0000004, 0x0000002),
+       P4_GEN_PEBS_BIND(dtlb_all_miss_retired,         0x0000004, 0x0000003),
+       P4_GEN_PEBS_BIND(tagged_mispred_branch,         0x0018000, 0x0000010),
+       P4_GEN_PEBS_BIND(mob_load_replay_retired,       0x0000200, 0x0000001),
+       P4_GEN_PEBS_BIND(split_load_retired,            0x0000400, 0x0000001),
+       P4_GEN_PEBS_BIND(split_store_retired,           0x0000400, 0x0000002),
 };
 
 /*
@@ -281,10 +295,10 @@ static struct p4_event_bind p4_event_bind_map[] = {
        },
 };
 
-#define P4_GEN_CACHE_EVENT(event, bit, cache_event)                      \
+#define P4_GEN_CACHE_EVENT(event, bit, metric)                           \
        p4_config_pack_escr(P4_ESCR_EVENT(event)                        | \
                            P4_ESCR_EMASK_BIT(event, bit))              | \
-       p4_config_pack_cccr(cache_event                                 | \
+       p4_config_pack_cccr(metric                                      | \
                            P4_CCCR_ESEL(P4_OPCODE_ESEL(P4_OPCODE(event))))
 
 static __initconst const u64 p4_hw_cache_event_ids
@@ -296,34 +310,34 @@ static __initconst const u64 p4_hw_cache_event_ids
        [ C(OP_READ) ] = {
                [ C(RESULT_ACCESS) ] = 0x0,
                [ C(RESULT_MISS)   ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS,
-                                               P4_CACHE__1stl_cache_load_miss_retired),
+                                               P4_PEBS_METRIC__1stl_cache_load_miss_retired),
        },
  },
  [ C(LL  ) ] = {
        [ C(OP_READ) ] = {
                [ C(RESULT_ACCESS) ] = 0x0,
                [ C(RESULT_MISS)   ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS,
-                                               P4_CACHE__2ndl_cache_load_miss_retired),
+                                               P4_PEBS_METRIC__2ndl_cache_load_miss_retired),
        },
 },
  [ C(DTLB) ] = {
        [ C(OP_READ) ] = {
                [ C(RESULT_ACCESS) ] = 0x0,
                [ C(RESULT_MISS)   ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS,
-                                               P4_CACHE__dtlb_load_miss_retired),
+                                               P4_PEBS_METRIC__dtlb_load_miss_retired),
        },
        [ C(OP_WRITE) ] = {
                [ C(RESULT_ACCESS) ] = 0x0,
                [ C(RESULT_MISS)   ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS,
-                                               P4_CACHE__dtlb_store_miss_retired),
+                                               P4_PEBS_METRIC__dtlb_store_miss_retired),
        },
  },
  [ C(ITLB) ] = {
        [ C(OP_READ) ] = {
                [ C(RESULT_ACCESS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_ITLB_REFERENCE, HIT,
-                                               P4_CACHE__itlb_reference_hit),
+                                               P4_PEBS_METRIC__none),
                [ C(RESULT_MISS)   ] = P4_GEN_CACHE_EVENT(P4_EVENT_ITLB_REFERENCE, MISS,
-                                               P4_CACHE__itlb_reference_miss),
+                                               P4_PEBS_METRIC__none),
        },
        [ C(OP_WRITE) ] = {
                [ C(RESULT_ACCESS) ] = -1,
@@ -414,11 +428,37 @@ static u64 p4_pmu_event_map(int hw_event)
        return config;
 }
 
+static int p4_validate_raw_event(struct perf_event *event)
+{
+       unsigned int v;
+
+       /* user data may have out-of-bound event index */
+       v = p4_config_unpack_event(event->attr.config);
+       if (v >= ARRAY_SIZE(p4_event_bind_map)) {
+               pr_warning("P4 PMU: Unknown event code: %d\n", v);
+               return -EINVAL;
+       }
+
+       /*
+        * it may have some screwed PEBS bits
+        */
+       if (p4_config_pebs_has(event->attr.config, P4_PEBS_CONFIG_ENABLE)) {
+               pr_warning("P4 PMU: PEBS are not supported yet\n");
+               return -EINVAL;
+       }
+       v = p4_config_unpack_metric(event->attr.config);
+       if (v >= ARRAY_SIZE(p4_pebs_bind_map)) {
+               pr_warning("P4 PMU: Unknown metric code: %d\n", v);
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
 static int p4_hw_config(struct perf_event *event)
 {
        int cpu = get_cpu();
        int rc = 0;
-       unsigned int evnt;
        u32 escr, cccr;
 
        /*
@@ -438,12 +478,9 @@ static int p4_hw_config(struct perf_event *event)
 
        if (event->attr.type == PERF_TYPE_RAW) {
 
-               /* user data may have out-of-bound event index */
-               evnt = p4_config_unpack_event(event->attr.config);
-               if (evnt >= ARRAY_SIZE(p4_event_bind_map)) {
-                       rc = -EINVAL;
+               rc = p4_validate_raw_event(event);
+               if (rc)
                        goto out;
-               }
 
                /*
                 * We don't control raw events so it's up to the caller
@@ -451,12 +488,15 @@ static int p4_hw_config(struct perf_event *event)
                 * on HT machine but allow HT-compatible specifics to be
                 * passed on)
                 *
+                * Note that for RAW events we allow user to use P4_CCCR_RESERVED
+                * bits since we keep additional info here (for cache events and etc)
+                *
                 * XXX: HT wide things should check perf_paranoid_cpu() &&
                 *      CAP_SYS_ADMIN
                 */
                event->hw.config |= event->attr.config &
                        (p4_config_pack_escr(P4_ESCR_MASK_HT) |
-                        p4_config_pack_cccr(P4_CCCR_MASK_HT));
+                        p4_config_pack_cccr(P4_CCCR_MASK_HT | P4_CCCR_RESERVED));
        }
 
        rc = x86_setup_perfctr(event);
@@ -482,6 +522,29 @@ static inline int p4_pmu_clear_cccr_ovf(struct hw_perf_event *hwc)
        return overflow;
 }
 
+static void p4_pmu_disable_pebs(void)
+{
+       /*
+        * FIXME
+        *
+        * It's still allowed that two threads setup same cache
+        * events so we can't simply clear metrics until we knew
+        * noone is depending on us, so we need kind of counter
+        * for "ReplayEvent" users.
+        *
+        * What is more complex -- RAW events, if user (for some
+        * reason) will pass some cache event metric with improper
+        * event opcode -- it's fine from hardware point of view
+        * but completely nonsence from "meaning" of such action.
+        *
+        * So at moment let leave metrics turned on forever -- it's
+        * ok for now but need to be revisited!
+        *
+        * (void)checking_wrmsrl(MSR_IA32_PEBS_ENABLE, (u64)0);
+        * (void)checking_wrmsrl(MSR_P4_PEBS_MATRIX_VERT, (u64)0);
+        */
+}
+
 static inline void p4_pmu_disable_event(struct perf_event *event)
 {
        struct hw_perf_event *hwc = &event->hw;
@@ -507,6 +570,26 @@ static void p4_pmu_disable_all(void)
                        continue;
                p4_pmu_disable_event(event);
        }
+
+       p4_pmu_disable_pebs();
+}
+
+/* configuration must be valid */
+static void p4_pmu_enable_pebs(u64 config)
+{
+       struct p4_pebs_bind *bind;
+       unsigned int idx;
+
+       BUILD_BUG_ON(P4_PEBS_METRIC__max > P4_PEBS_CONFIG_METRIC_MASK);
+
+       idx = p4_config_unpack_metric(config);
+       if (idx == P4_PEBS_METRIC__none)
+               return;
+
+       bind = &p4_pebs_bind_map[idx];
+
+       (void)checking_wrmsrl(MSR_IA32_PEBS_ENABLE,     (u64)bind->metric_pebs);
+       (void)checking_wrmsrl(MSR_P4_PEBS_MATRIX_VERT,  (u64)bind->metric_vert);
 }
 
 static void p4_pmu_enable_event(struct perf_event *event)
@@ -515,9 +598,7 @@ static void p4_pmu_enable_event(struct perf_event *event)
        int thread = p4_ht_config_thread(hwc->config);
        u64 escr_conf = p4_config_unpack_escr(p4_clear_ht_bit(hwc->config));
        unsigned int idx = p4_config_unpack_event(hwc->config);
-       unsigned int idx_cache = p4_config_unpack_cache_event(hwc->config);
        struct p4_event_bind *bind;
-       struct p4_cache_event_bind *bind_cache;
        u64 escr_addr, cccr;
 
        bind = &p4_event_bind_map[idx];
@@ -537,16 +618,10 @@ static void p4_pmu_enable_event(struct perf_event *event)
        cccr = p4_config_unpack_cccr(hwc->config);
 
        /*
-        * it could be Cache event so that we need to
-        * set metrics into additional MSRs
+        * it could be Cache event so we need to write metrics
+        * into additional MSRs
         */
-       BUILD_BUG_ON(P4_CACHE__MAX > P4_CCCR_CACHE_OPS_MASK);
-       if (idx_cache > P4_CACHE__NONE &&
-               idx_cache < ARRAY_SIZE(p4_cache_event_bind_map)) {
-               bind_cache = &p4_cache_event_bind_map[idx_cache];
-               (void)checking_wrmsrl(MSR_IA32_PEBS_ENABLE, (u64)bind_cache->metric_pebs);
-               (void)checking_wrmsrl(MSR_P4_PEBS_MATRIX_VERT, (u64)bind_cache->metric_vert);
-       }
+       p4_pmu_enable_pebs(hwc->config);
 
        (void)checking_wrmsrl(escr_addr, escr_conf);
        (void)checking_wrmsrl(hwc->config_base + hwc->idx,
@@ -829,6 +904,15 @@ static __initconst const struct x86_pmu p4_pmu = {
        .max_period             = (1ULL << 39) - 1,
        .hw_config              = p4_hw_config,
        .schedule_events        = p4_pmu_schedule_events,
+       /*
+        * This handles erratum N15 in intel doc 249199-029,
+        * the counter may not be updated correctly on write
+        * so we need a second write operation to do the trick
+        * (the official workaround didn't work)
+        *
+        * the former idea is taken from OProfile code
+        */
+       .perfctr_second_write   = 1,
 };
 
 static __init int p4_pmu_init(void)
index c89a386..6e8752c 100644 (file)
@@ -18,7 +18,6 @@
 
 #include <asm/stacktrace.h>
 
-#include "dumpstack.h"
 
 int panic_on_unrecovered_nmi;
 int panic_on_io_nmi;
diff --git a/arch/x86/kernel/dumpstack.h b/arch/x86/kernel/dumpstack.h
deleted file mode 100644 (file)
index e1a93be..0000000
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- *  Copyright (C) 1991, 1992  Linus Torvalds
- *  Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs
- */
-
-#ifndef DUMPSTACK_H
-#define DUMPSTACK_H
-
-#ifdef CONFIG_X86_32
-#define STACKSLOTS_PER_LINE 8
-#define get_bp(bp) asm("movl %%ebp, %0" : "=r" (bp) :)
-#else
-#define STACKSLOTS_PER_LINE 4
-#define get_bp(bp) asm("movq %%rbp, %0" : "=r" (bp) :)
-#endif
-
-#include <linux/uaccess.h>
-
-extern void
-show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
-               unsigned long *stack, unsigned long bp, char *log_lvl);
-
-extern void
-show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
-               unsigned long *sp, unsigned long bp, char *log_lvl);
-
-extern unsigned int code_bytes;
-
-/* The form of the top of the frame on the stack */
-struct stack_frame {
-       struct stack_frame *next_frame;
-       unsigned long return_address;
-};
-
-struct stack_frame_ia32 {
-    u32 next_frame;
-    u32 return_address;
-};
-
-static inline unsigned long rewind_frame_pointer(int n)
-{
-       struct stack_frame *frame;
-
-       get_bp(frame);
-
-#ifdef CONFIG_FRAME_POINTER
-       while (n--) {
-               if (probe_kernel_address(&frame->next_frame, frame))
-                       break;
-       }
-#endif
-
-       return (unsigned long)frame;
-}
-
-#endif /* DUMPSTACK_H */
index 11540a1..0f6376f 100644 (file)
@@ -16,8 +16,6 @@
 
 #include <asm/stacktrace.h>
 
-#include "dumpstack.h"
-
 
 void dump_trace(struct task_struct *task, struct pt_regs *regs,
                unsigned long *stack, unsigned long bp,
index 272c9f1..57a21f1 100644 (file)
@@ -16,7 +16,6 @@
 
 #include <asm/stacktrace.h>
 
-#include "dumpstack.h"
 
 #define N_EXCEPTION_STACKS_END \
                (N_EXCEPTION_STACKS + DEBUG_STKSZ/EXCEPTION_STKSZ - 2)
index a8f1b80..a474ec3 100644 (file)
@@ -208,6 +208,9 @@ int arch_bp_generic_fields(int x86_len, int x86_type,
 {
        /* Len */
        switch (x86_len) {
+       case X86_BREAKPOINT_LEN_X:
+               *gen_len = sizeof(long);
+               break;
        case X86_BREAKPOINT_LEN_1:
                *gen_len = HW_BREAKPOINT_LEN_1;
                break;
@@ -251,6 +254,29 @@ static int arch_build_bp_info(struct perf_event *bp)
 
        info->address = bp->attr.bp_addr;
 
+       /* Type */
+       switch (bp->attr.bp_type) {
+       case HW_BREAKPOINT_W:
+               info->type = X86_BREAKPOINT_WRITE;
+               break;
+       case HW_BREAKPOINT_W | HW_BREAKPOINT_R:
+               info->type = X86_BREAKPOINT_RW;
+               break;
+       case HW_BREAKPOINT_X:
+               info->type = X86_BREAKPOINT_EXECUTE;
+               /*
+                * x86 inst breakpoints need to have a specific undefined len.
+                * But we still need to check userspace is not trying to setup
+                * an unsupported length, to get a range breakpoint for example.
+                */
+               if (bp->attr.bp_len == sizeof(long)) {
+                       info->len = X86_BREAKPOINT_LEN_X;
+                       return 0;
+               }
+       default:
+               return -EINVAL;
+       }
+
        /* Len */
        switch (bp->attr.bp_len) {
        case HW_BREAKPOINT_LEN_1:
@@ -271,21 +297,6 @@ static int arch_build_bp_info(struct perf_event *bp)
                return -EINVAL;
        }
 
-       /* Type */
-       switch (bp->attr.bp_type) {
-       case HW_BREAKPOINT_W:
-               info->type = X86_BREAKPOINT_WRITE;
-               break;
-       case HW_BREAKPOINT_W | HW_BREAKPOINT_R:
-               info->type = X86_BREAKPOINT_RW;
-               break;
-       case HW_BREAKPOINT_X:
-               info->type = X86_BREAKPOINT_EXECUTE;
-               break;
-       default:
-               return -EINVAL;
-       }
-
        return 0;
 }
 /*
@@ -305,6 +316,9 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp)
        ret = -EINVAL;
 
        switch (info->len) {
+       case X86_BREAKPOINT_LEN_X:
+               align = sizeof(long) -1;
+               break;
        case X86_BREAKPOINT_LEN_1:
                align = 0;
                break;
@@ -466,6 +480,13 @@ static int __kprobes hw_breakpoint_handler(struct die_args *args)
 
                perf_bp_event(bp, args->regs);
 
+               /*
+                * Set up resume flag to avoid breakpoint recursion when
+                * returning back to origin.
+                */
+               if (bp->hw.info.type == X86_BREAKPOINT_EXECUTE)
+                       args->regs->flags |= X86_EFLAGS_RF;
+
                rcu_read_unlock();
        }
        /*
index 675879b..1bfb6cf 100644 (file)
@@ -126,16 +126,22 @@ static void __kprobes synthesize_reljump(void *from, void *to)
 }
 
 /*
- * Check for the REX prefix which can only exist on X86_64
- * X86_32 always returns 0
+ * Skip the prefixes of the instruction.
  */
-static int __kprobes is_REX_prefix(kprobe_opcode_t *insn)
+static kprobe_opcode_t *__kprobes skip_prefixes(kprobe_opcode_t *insn)
 {
+       insn_attr_t attr;
+
+       attr = inat_get_opcode_attribute((insn_byte_t)*insn);
+       while (inat_is_legacy_prefix(attr)) {
+               insn++;
+               attr = inat_get_opcode_attribute((insn_byte_t)*insn);
+       }
 #ifdef CONFIG_X86_64
-       if ((*insn & 0xf0) == 0x40)
-               return 1;
+       if (inat_is_rex_prefix(attr))
+               insn++;
 #endif
-       return 0;
+       return insn;
 }
 
 /*
@@ -272,6 +278,9 @@ static int __kprobes can_probe(unsigned long paddr)
  */
 static int __kprobes is_IF_modifier(kprobe_opcode_t *insn)
 {
+       /* Skip prefixes */
+       insn = skip_prefixes(insn);
+
        switch (*insn) {
        case 0xfa:              /* cli */
        case 0xfb:              /* sti */
@@ -280,13 +289,6 @@ static int __kprobes is_IF_modifier(kprobe_opcode_t *insn)
                return 1;
        }
 
-       /*
-        * on X86_64, 0x40-0x4f are REX prefixes so we need to look
-        * at the next byte instead.. but of course not recurse infinitely
-        */
-       if (is_REX_prefix(insn))
-               return is_IF_modifier(++insn);
-
        return 0;
 }
 
@@ -803,9 +805,8 @@ static void __kprobes resume_execution(struct kprobe *p,
        unsigned long orig_ip = (unsigned long)p->addr;
        kprobe_opcode_t *insn = p->ainsn.insn;
 
-       /*skip the REX prefix*/
-       if (is_REX_prefix(insn))
-               insn++;
+       /* Skip prefixes */
+       insn = skip_prefixes(insn);
 
        regs->flags &= ~X86_EFLAGS_TF;
        switch (*insn) {
index e7e3521..787572d 100644 (file)
@@ -371,7 +371,7 @@ static inline int hlt_use_halt(void)
 void default_idle(void)
 {
        if (hlt_use_halt()) {
-               trace_power_start(POWER_CSTATE, 1);
+               trace_power_start(POWER_CSTATE, 1, smp_processor_id());
                current_thread_info()->status &= ~TS_POLLING;
                /*
                 * TS_POLLING-cleared state must be visible before we
@@ -441,7 +441,7 @@ EXPORT_SYMBOL_GPL(cpu_idle_wait);
  */
 void mwait_idle_with_hints(unsigned long ax, unsigned long cx)
 {
-       trace_power_start(POWER_CSTATE, (ax>>4)+1);
+       trace_power_start(POWER_CSTATE, (ax>>4)+1, smp_processor_id());
        if (!need_resched()) {
                if (cpu_has(&current_cpu_data, X86_FEATURE_CLFLUSH_MONITOR))
                        clflush((void *)&current_thread_info()->flags);
@@ -457,7 +457,7 @@ void mwait_idle_with_hints(unsigned long ax, unsigned long cx)
 static void mwait_idle(void)
 {
        if (!need_resched()) {
-               trace_power_start(POWER_CSTATE, 1);
+               trace_power_start(POWER_CSTATE, 1, smp_processor_id());
                if (cpu_has(&current_cpu_data, X86_FEATURE_CLFLUSH_MONITOR))
                        clflush((void *)&current_thread_info()->flags);
 
@@ -478,7 +478,7 @@ static void mwait_idle(void)
  */
 static void poll_idle(void)
 {
-       trace_power_start(POWER_CSTATE, 0);
+       trace_power_start(POWER_CSTATE, 0, smp_processor_id());
        local_irq_enable();
        while (!need_resched())
                cpu_relax();
index 8d12878..96586c3 100644 (file)
@@ -57,6 +57,8 @@
 #include <asm/syscalls.h>
 #include <asm/debugreg.h>
 
+#include <trace/events/power.h>
+
 asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
 
 /*
@@ -111,6 +113,8 @@ void cpu_idle(void)
                        stop_critical_timings();
                        pm_idle();
                        start_critical_timings();
+
+                       trace_power_end(smp_processor_id());
                }
                tick_nohz_restart_sched_tick();
                preempt_enable_no_resched();
index 3c2422a..3d9ea53 100644 (file)
@@ -51,6 +51,8 @@
 #include <asm/syscalls.h>
 #include <asm/debugreg.h>
 
+#include <trace/events/power.h>
+
 asmlinkage extern void ret_from_fork(void);
 
 DEFINE_PER_CPU(unsigned long, old_rsp);
@@ -138,6 +140,9 @@ void cpu_idle(void)
                        stop_critical_timings();
                        pm_idle();
                        start_critical_timings();
+
+                       trace_power_end(smp_processor_id());
+
                        /* In many cases the interrupt that ended idle
                           has already called exit_idle. But some idle
                           loops can be woken up without interrupt. */
index 922eefb..b53c525 100644 (file)
@@ -23,11 +23,16 @@ static int save_stack_stack(void *data, char *name)
        return 0;
 }
 
-static void save_stack_address(void *data, unsigned long addr, int reliable)
+static void
+__save_stack_address(void *data, unsigned long addr, bool reliable, bool nosched)
 {
        struct stack_trace *trace = data;
+#ifdef CONFIG_FRAME_POINTER
        if (!reliable)
                return;
+#endif
+       if (nosched && in_sched_functions(addr))
+               return;
        if (trace->skip > 0) {
                trace->skip--;
                return;
@@ -36,20 +41,15 @@ static void save_stack_address(void *data, unsigned long addr, int reliable)
                trace->entries[trace->nr_entries++] = addr;
 }
 
+static void save_stack_address(void *data, unsigned long addr, int reliable)
+{
+       return __save_stack_address(data, addr, reliable, false);
+}
+
 static void
 save_stack_address_nosched(void *data, unsigned long addr, int reliable)
 {
-       struct stack_trace *trace = (struct stack_trace *)data;
-       if (!reliable)
-               return;
-       if (in_sched_functions(addr))
-               return;
-       if (trace->skip > 0) {
-               trace->skip--;
-               return;
-       }
-       if (trace->nr_entries < trace->max_entries)
-               trace->entries[trace->nr_entries++] = addr;
+       return __save_stack_address(data, addr, reliable, true);
 }
 
 static const struct stacktrace_ops save_stack_ops = {
@@ -96,12 +96,13 @@ EXPORT_SYMBOL_GPL(save_stack_trace_tsk);
 
 /* Userspace stacktrace - based on kernel/trace/trace_sysprof.c */
 
-struct stack_frame {
+struct stack_frame_user {
        const void __user       *next_fp;
        unsigned long           ret_addr;
 };
 
-static int copy_stack_frame(const void __user *fp, struct stack_frame *frame)
+static int
+copy_stack_frame(const void __user *fp, struct stack_frame_user *frame)
 {
        int ret;
 
@@ -126,7 +127,7 @@ static inline void __save_stack_trace_user(struct stack_trace *trace)
                trace->entries[trace->nr_entries++] = regs->ip;
 
        while (trace->nr_entries < trace->max_entries) {
-               struct stack_frame frame;
+               struct stack_frame_user frame;
 
                frame.next_fp = NULL;
                frame.ret_addr = 0;
index 308e325..38e6d17 100644 (file)
@@ -40,16 +40,16 @@ static unsigned char prefix_codes[] = {
 static unsigned int reg_rop[] = {
        0x8A, 0x8B, 0xB60F, 0xB70F, 0xBE0F, 0xBF0F
 };
-static unsigned int reg_wop[] = { 0x88, 0x89 };
+static unsigned int reg_wop[] = { 0x88, 0x89, 0xAA, 0xAB };
 static unsigned int imm_wop[] = { 0xC6, 0xC7 };
 /* IA32 Manual 3, 3-432*/
-static unsigned int rw8[] = { 0x88, 0x8A, 0xC6 };
+static unsigned int rw8[] = { 0x88, 0x8A, 0xC6, 0xAA };
 static unsigned int rw32[] = {
-       0x89, 0x8B, 0xC7, 0xB60F, 0xB70F, 0xBE0F, 0xBF0F
+       0x89, 0x8B, 0xC7, 0xB60F, 0xB70F, 0xBE0F, 0xBF0F, 0xAB
 };
-static unsigned int mw8[] = { 0x88, 0x8A, 0xC6, 0xB60F, 0xBE0F };
+static unsigned int mw8[] = { 0x88, 0x8A, 0xC6, 0xB60F, 0xBE0F, 0xAA };
 static unsigned int mw16[] = { 0xB70F, 0xBF0F };
-static unsigned int mw32[] = { 0x89, 0x8B, 0xC7 };
+static unsigned int mw32[] = { 0x89, 0x8B, 0xC7, 0xAB };
 static unsigned int mw64[] = {};
 #else /* not __i386__ */
 static unsigned char prefix_codes[] = {
@@ -63,20 +63,20 @@ static unsigned char prefix_codes[] = {
 static unsigned int reg_rop[] = {
        0x8A, 0x8B, 0xB60F, 0xB70F, 0xBE0F, 0xBF0F
 };
-static unsigned int reg_wop[] = { 0x88, 0x89 };
+static unsigned int reg_wop[] = { 0x88, 0x89, 0xAA, 0xAB };
 static unsigned int imm_wop[] = { 0xC6, 0xC7 };
-static unsigned int rw8[] = { 0xC6, 0x88, 0x8A };
+static unsigned int rw8[] = { 0xC6, 0x88, 0x8A, 0xAA };
 static unsigned int rw32[] = {
-       0xC7, 0x89, 0x8B, 0xB60F, 0xB70F, 0xBE0F, 0xBF0F
+       0xC7, 0x89, 0x8B, 0xB60F, 0xB70F, 0xBE0F, 0xBF0F, 0xAB
 };
 /* 8 bit only */
-static unsigned int mw8[] = { 0xC6, 0x88, 0x8A, 0xB60F, 0xBE0F };
+static unsigned int mw8[] = { 0xC6, 0x88, 0x8A, 0xB60F, 0xBE0F, 0xAA };
 /* 16 bit only */
 static unsigned int mw16[] = { 0xB70F, 0xBF0F };
 /* 16 or 32 bit */
 static unsigned int mw32[] = { 0xC7 };
 /* 16, 32 or 64 bit */
-static unsigned int mw64[] = { 0x89, 0x8B };
+static unsigned int mw64[] = { 0x89, 0x8B, 0xAB };
 #endif /* not __i386__ */
 
 struct prefix_bits {
@@ -410,7 +410,6 @@ static unsigned long *get_reg_w32(int no, struct pt_regs *regs)
 unsigned long get_ins_reg_val(unsigned long ins_addr, struct pt_regs *regs)
 {
        unsigned int opcode;
-       unsigned char mod_rm;
        int reg;
        unsigned char *p;
        struct prefix_bits prf;
@@ -437,8 +436,13 @@ unsigned long get_ins_reg_val(unsigned long ins_addr, struct pt_regs *regs)
        goto err;
 
 do_work:
-       mod_rm = *p;
-       reg = ((mod_rm >> 3) & 0x7) | (prf.rexr << 3);
+       /* for STOS, source register is fixed */
+       if (opcode == 0xAA || opcode == 0xAB) {
+               reg = arg_AX;
+       } else {
+               unsigned char mod_rm = *p;
+               reg = ((mod_rm >> 3) & 0x7) | (prf.rexr << 3);
+       }
        switch (get_ins_reg_width(ins_addr)) {
        case 1:
                return *get_reg_w8(reg, prf.rex, regs);
diff --git a/arch/xtensa/include/asm/local64.h b/arch/xtensa/include/asm/local64.h
new file mode 100644 (file)
index 0000000..36c93b5
--- /dev/null
@@ -0,0 +1 @@
+#include <asm-generic/local64.h>
index 938b74e..e59cb47 100644 (file)
@@ -29,6 +29,8 @@
 #include <linux/completion.h>
 #include <linux/mutex.h>
 
+#include <trace/events/power.h>
+
 #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_CORE, \
                                                "cpufreq-core", msg)
 
@@ -354,6 +356,7 @@ void cpufreq_notify_transition(struct cpufreq_freqs *freqs, unsigned int state)
 
        case CPUFREQ_POSTCHANGE:
                adjust_jiffies(CPUFREQ_POSTCHANGE, freqs);
+                trace_power_frequency(POWER_PSTATE, freqs->new, freqs->cpu);
                srcu_notifier_call_chain(&cpufreq_transition_notifier_list,
                                CPUFREQ_POSTCHANGE, freqs);
                if (likely(policy) && likely(policy->cpu == freqs->cpu))
index 1994885..dbefe15 100644 (file)
@@ -95,7 +95,7 @@ static void cpuidle_idle_call(void)
        /* give the governor an opportunity to reflect on the outcome */
        if (cpuidle_curr_governor->reflect)
                cpuidle_curr_governor->reflect(dev);
-       trace_power_end(0);
+       trace_power_end(smp_processor_id());
 }
 
 /**
index 54f0fb4..03d202b 100755 (executable)
@@ -231,7 +231,7 @@ static int intel_idle(struct cpuidle_device *dev, struct cpuidle_state *state)
 
        stop_critical_timings();
 #ifndef MODULE
-       trace_power_start(POWER_CSTATE, (eax >> 4) + 1);
+       trace_power_start(POWER_CSTATE, (eax >> 4) + 1, cpu);
 #endif
        if (!need_resched()) {
 
index e19de6a..97d91a0 100644 (file)
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -653,6 +653,7 @@ int setup_arg_pages(struct linux_binprm *bprm,
        else
                stack_base = vma->vm_start - stack_expand;
 #endif
+       current->mm->start_stack = bprm->p;
        ret = expand_stack(vma, stack_base);
        if (ret)
                ret = -EFAULT;
diff --git a/include/asm-generic/local64.h b/include/asm-generic/local64.h
new file mode 100644 (file)
index 0000000..02ac760
--- /dev/null
@@ -0,0 +1,96 @@
+#ifndef _ASM_GENERIC_LOCAL64_H
+#define _ASM_GENERIC_LOCAL64_H
+
+#include <linux/percpu.h>
+#include <asm/types.h>
+
+/*
+ * A signed long type for operations which are atomic for a single CPU.
+ * Usually used in combination with per-cpu variables.
+ *
+ * This is the default implementation, which uses atomic64_t.  Which is
+ * rather pointless.  The whole point behind local64_t is that some processors
+ * can perform atomic adds and subtracts in a manner which is atomic wrt IRQs
+ * running on this CPU.  local64_t allows exploitation of such capabilities.
+ */
+
+/* Implement in terms of atomics. */
+
+#if BITS_PER_LONG == 64
+
+#include <asm/local.h>
+
+typedef struct {
+       local_t a;
+} local64_t;
+
+#define LOCAL64_INIT(i)        { LOCAL_INIT(i) }
+
+#define local64_read(l)                local_read(&(l)->a)
+#define local64_set(l,i)       local_set((&(l)->a),(i))
+#define local64_inc(l)         local_inc(&(l)->a)
+#define local64_dec(l)         local_dec(&(l)->a)
+#define local64_add(i,l)       local_add((i),(&(l)->a))
+#define local64_sub(i,l)       local_sub((i),(&(l)->a))
+
+#define local64_sub_and_test(i, l) local_sub_and_test((i), (&(l)->a))
+#define local64_dec_and_test(l) local_dec_and_test(&(l)->a)
+#define local64_inc_and_test(l) local_inc_and_test(&(l)->a)
+#define local64_add_negative(i, l) local_add_negative((i), (&(l)->a))
+#define local64_add_return(i, l) local_add_return((i), (&(l)->a))
+#define local64_sub_return(i, l) local_sub_return((i), (&(l)->a))
+#define local64_inc_return(l)  local_inc_return(&(l)->a)
+
+#define local64_cmpxchg(l, o, n) local_cmpxchg((&(l)->a), (o), (n))
+#define local64_xchg(l, n)     local_xchg((&(l)->a), (n))
+#define local64_add_unless(l, _a, u) local_add_unless((&(l)->a), (_a), (u))
+#define local64_inc_not_zero(l)        local_inc_not_zero(&(l)->a)
+
+/* Non-atomic variants, ie. preemption disabled and won't be touched
+ * in interrupt, etc.  Some archs can optimize this case well. */
+#define __local64_inc(l)       local64_set((l), local64_read(l) + 1)
+#define __local64_dec(l)       local64_set((l), local64_read(l) - 1)
+#define __local64_add(i,l)     local64_set((l), local64_read(l) + (i))
+#define __local64_sub(i,l)     local64_set((l), local64_read(l) - (i))
+
+#else /* BITS_PER_LONG != 64 */
+
+#include <asm/atomic.h>
+
+/* Don't use typedef: don't want them to be mixed with atomic_t's. */
+typedef struct {
+       atomic64_t a;
+} local64_t;
+
+#define LOCAL64_INIT(i)        { ATOMIC_LONG_INIT(i) }
+
+#define local64_read(l)                atomic64_read(&(l)->a)
+#define local64_set(l,i)       atomic64_set((&(l)->a),(i))
+#define local64_inc(l)         atomic64_inc(&(l)->a)
+#define local64_dec(l)         atomic64_dec(&(l)->a)
+#define local64_add(i,l)       atomic64_add((i),(&(l)->a))
+#define local64_sub(i,l)       atomic64_sub((i),(&(l)->a))
+
+#define local64_sub_and_test(i, l) atomic64_sub_and_test((i), (&(l)->a))
+#define local64_dec_and_test(l) atomic64_dec_and_test(&(l)->a)
+#define local64_inc_and_test(l) atomic64_inc_and_test(&(l)->a)
+#define local64_add_negative(i, l) atomic64_add_negative((i), (&(l)->a))
+#define local64_add_return(i, l) atomic64_add_return((i), (&(l)->a))
+#define local64_sub_return(i, l) atomic64_sub_return((i), (&(l)->a))
+#define local64_inc_return(l)  atomic64_inc_return(&(l)->a)
+
+#define local64_cmpxchg(l, o, n) atomic64_cmpxchg((&(l)->a), (o), (n))
+#define local64_xchg(l, n)     atomic64_xchg((&(l)->a), (n))
+#define local64_add_unless(l, _a, u) atomic64_add_unless((&(l)->a), (_a), (u))
+#define local64_inc_not_zero(l)        atomic64_inc_not_zero(&(l)->a)
+
+/* Non-atomic variants, ie. preemption disabled and won't be touched
+ * in interrupt, etc.  Some archs can optimize this case well. */
+#define __local64_inc(l)       local64_set((l), local64_read(l) + 1)
+#define __local64_dec(l)       local64_set((l), local64_read(l) - 1)
+#define __local64_add(i,l)     local64_set((l), local64_read(l) + (i))
+#define __local64_sub(i,l)     local64_set((l), local64_read(l) - (i))
+
+#endif /* BITS_PER_LONG != 64 */
+
+#endif /* _ASM_GENERIC_LOCAL64_H */
index 030a954..853aa87 100644 (file)
        CPU_KEEP(exit.data)                                             \
        MEM_KEEP(init.data)                                             \
        MEM_KEEP(exit.data)                                             \
-       . = ALIGN(8);                                                   \
-       VMLINUX_SYMBOL(__start___markers) = .;                          \
-       *(__markers)                                                    \
-       VMLINUX_SYMBOL(__stop___markers) = .;                           \
        . = ALIGN(32);                                                  \
        VMLINUX_SYMBOL(__start___tracepoints) = .;                      \
        *(__tracepoints)                                                \
index 41e4633..dcd6a7c 100644 (file)
@@ -1,3 +1,8 @@
+/*
+ * Ftrace header.  For implementation details beyond the random comments
+ * scattered below, see: Documentation/trace/ftrace-design.txt
+ */
+
 #ifndef _LINUX_FTRACE_H
 #define _LINUX_FTRACE_H
 
index 3167f2d..02b8b24 100644 (file)
@@ -11,8 +11,6 @@ struct trace_array;
 struct tracer;
 struct dentry;
 
-DECLARE_PER_CPU(struct trace_seq, ftrace_event_seq);
-
 struct trace_print_flags {
        unsigned long           mask;
        const char              *name;
@@ -58,6 +56,9 @@ struct trace_iterator {
        struct ring_buffer_iter *buffer_iter[NR_CPUS];
        unsigned long           iter_flags;
 
+       /* trace_seq for __print_flags() and __print_symbolic() etc. */
+       struct trace_seq        tmp_seq;
+
        /* The below is zeroed out in pipe_read */
        struct trace_seq        seq;
        struct trace_entry      *ent;
@@ -146,14 +147,19 @@ struct ftrace_event_class {
        int                     (*raw_init)(struct ftrace_event_call *);
 };
 
+extern int ftrace_event_reg(struct ftrace_event_call *event,
+                           enum trace_reg type);
+
 enum {
        TRACE_EVENT_FL_ENABLED_BIT,
        TRACE_EVENT_FL_FILTERED_BIT,
+       TRACE_EVENT_FL_RECORDED_CMD_BIT,
 };
 
 enum {
-       TRACE_EVENT_FL_ENABLED  = (1 << TRACE_EVENT_FL_ENABLED_BIT),
-       TRACE_EVENT_FL_FILTERED = (1 << TRACE_EVENT_FL_FILTERED_BIT),
+       TRACE_EVENT_FL_ENABLED          = (1 << TRACE_EVENT_FL_ENABLED_BIT),
+       TRACE_EVENT_FL_FILTERED         = (1 << TRACE_EVENT_FL_FILTERED_BIT),
+       TRACE_EVENT_FL_RECORDED_CMD     = (1 << TRACE_EVENT_FL_RECORDED_CMD_BIT),
 };
 
 struct ftrace_event_call {
@@ -171,6 +177,7 @@ struct ftrace_event_call {
         * 32 bit flags:
         *   bit 1:             enabled
         *   bit 2:             filter_active
+        *   bit 3:             enabled cmd record
         *
         * Changes to flags must hold the event_mutex.
         *
@@ -257,8 +264,7 @@ static inline void
 perf_trace_buf_submit(void *raw_data, int size, int rctx, u64 addr,
                       u64 count, struct pt_regs *regs, void *head)
 {
-       perf_tp_event(addr, count, raw_data, size, regs, head);
-       perf_swevent_put_recursion_context(rctx);
+       perf_tp_event(addr, count, raw_data, size, regs, head, rctx);
 }
 #endif
 
index 8317ec4..adee958 100644 (file)
@@ -508,9 +508,6 @@ extern void tracing_start(void);
 extern void tracing_stop(void);
 extern void ftrace_off_permanent(void);
 
-extern void
-ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3);
-
 static inline void __attribute__ ((format (printf, 1, 2)))
 ____trace_printk_check_format(const char *fmt, ...)
 {
@@ -586,8 +583,6 @@ __ftrace_vprintk(unsigned long ip, const char *fmt, va_list ap);
 
 extern void ftrace_dump(enum ftrace_dump_mode oops_dump_mode);
 #else
-static inline void
-ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3) { }
 static inline int
 trace_printk(const char *fmt, ...) __attribute__ ((format (printf, 1, 2)));
 
diff --git a/include/linux/kmemtrace.h b/include/linux/kmemtrace.h
deleted file mode 100644 (file)
index b616d39..0000000
+++ /dev/null
@@ -1,25 +0,0 @@
-/*
- * Copyright (C) 2008 Eduard - Gabriel Munteanu
- *
- * This file is released under GPL version 2.
- */
-
-#ifndef _LINUX_KMEMTRACE_H
-#define _LINUX_KMEMTRACE_H
-
-#ifdef __KERNEL__
-
-#include <trace/events/kmem.h>
-
-#ifdef CONFIG_KMEMTRACE
-extern void kmemtrace_init(void);
-#else
-static inline void kmemtrace_init(void)
-{
-}
-#endif
-
-#endif /* __KERNEL__ */
-
-#endif /* _LINUX_KMEMTRACE_H */
-
index 5d0266d..937495c 100644 (file)
@@ -214,8 +214,9 @@ struct perf_event_attr {
                                 *  See also PERF_RECORD_MISC_EXACT_IP
                                 */
                                precise_ip     :  2, /* skid constraint       */
+                               mmap_data      :  1, /* non-exec mmap data    */
 
-                               __reserved_1   : 47;
+                               __reserved_1   : 46;
 
        union {
                __u32           wakeup_events;    /* wakeup every n events */
@@ -461,6 +462,7 @@ enum perf_callchain_context {
 
 #ifdef CONFIG_PERF_EVENTS
 # include <asm/perf_event.h>
+# include <asm/local64.h>
 #endif
 
 struct perf_guest_info_callbacks {
@@ -531,14 +533,16 @@ struct hw_perf_event {
                        struct hrtimer  hrtimer;
                };
 #ifdef CONFIG_HAVE_HW_BREAKPOINT
-               /* breakpoint */
-               struct arch_hw_breakpoint       info;
+               struct { /* breakpoint */
+                       struct arch_hw_breakpoint       info;
+                       struct list_head                bp_list;
+               };
 #endif
        };
-       atomic64_t                      prev_count;
+       local64_t                       prev_count;
        u64                             sample_period;
        u64                             last_period;
-       atomic64_t                      period_left;
+       local64_t                       period_left;
        u64                             interrupts;
 
        u64                             freq_time_stamp;
@@ -548,7 +552,10 @@ struct hw_perf_event {
 
 struct perf_event;
 
-#define PERF_EVENT_TXN_STARTED 1
+/*
+ * Common implementation detail of pmu::{start,commit,cancel}_txn
+ */
+#define PERF_EVENT_TXN 0x1
 
 /**
  * struct pmu - generic performance monitoring unit
@@ -562,14 +569,28 @@ struct pmu {
        void (*unthrottle)              (struct perf_event *event);
 
        /*
-        * group events scheduling is treated as a transaction,
-        * add group events as a whole and perform one schedulability test.
-        * If test fails, roll back the whole group
+        * Group events scheduling is treated as a transaction, add group
+        * events as a whole and perform one schedulability test. If the test
+        * fails, roll back the whole group
         */
 
+       /*
+        * Start the transaction, after this ->enable() doesn't need
+        * to do schedulability tests.
+        */
        void (*start_txn)       (const struct pmu *pmu);
-       void (*cancel_txn)      (const struct pmu *pmu);
+       /*
+        * If ->start_txn() disabled the ->enable() schedulability test
+        * then ->commit_txn() is required to perform one. On success
+        * the transaction is closed. On error the transaction is kept
+        * open until ->cancel_txn() is called.
+        */
        int  (*commit_txn)      (const struct pmu *pmu);
+       /*
+        * Will cancel the transaction, assumes ->disable() is called for
+        * each successfull ->enable() during the transaction.
+        */
+       void (*cancel_txn)      (const struct pmu *pmu);
 };
 
 /**
@@ -584,7 +605,9 @@ enum perf_event_active_state {
 
 struct file;
 
-struct perf_mmap_data {
+#define PERF_BUFFER_WRITABLE           0x01
+
+struct perf_buffer {
        atomic_t                        refcount;
        struct rcu_head                 rcu_head;
 #ifdef CONFIG_PERF_USE_VMALLOC
@@ -650,7 +673,8 @@ struct perf_event {
 
        enum perf_event_active_state    state;
        unsigned int                    attach_state;
-       atomic64_t                      count;
+       local64_t                       count;
+       atomic64_t                      child_count;
 
        /*
         * These are the total time in nanoseconds that the event
@@ -709,7 +733,7 @@ struct perf_event {
        atomic_t                        mmap_count;
        int                             mmap_locked;
        struct user_struct              *mmap_user;
-       struct perf_mmap_data           *data;
+       struct perf_buffer              *buffer;
 
        /* poll related */
        wait_queue_head_t               waitq;
@@ -807,7 +831,7 @@ struct perf_cpu_context {
 
 struct perf_output_handle {
        struct perf_event               *event;
-       struct perf_mmap_data           *data;
+       struct perf_buffer              *buffer;
        unsigned long                   wakeup;
        unsigned long                   size;
        void                            *addr;
@@ -910,8 +934,10 @@ extern atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX];
 
 extern void __perf_sw_event(u32, u64, int, struct pt_regs *, u64);
 
-extern void
-perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int skip);
+#ifndef perf_arch_fetch_caller_regs
+static inline void
+perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip) { }
+#endif
 
 /*
  * Take a snapshot of the regs. Skip ip and frame pointer to
@@ -921,31 +947,11 @@ perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int skip);
  * - bp for callchains
  * - eflags, for future purposes, just in case
  */
-static inline void perf_fetch_caller_regs(struct pt_regs *regs, int skip)
+static inline void perf_fetch_caller_regs(struct pt_regs *regs)
 {
-       unsigned long ip;
-
        memset(regs, 0, sizeof(*regs));
 
-       switch (skip) {
-       case 1 :
-               ip = CALLER_ADDR0;
-               break;
-       case 2 :
-               ip = CALLER_ADDR1;
-               break;
-       case 3 :
-               ip = CALLER_ADDR2;
-               break;
-       case 4:
-               ip = CALLER_ADDR3;
-               break;
-       /* No need to support further for now */
-       default:
-               ip = 0;
-       }
-
-       return perf_arch_fetch_caller_regs(regs, ip, skip);
+       perf_arch_fetch_caller_regs(regs, CALLER_ADDR0);
 }
 
 static inline void
@@ -955,21 +961,14 @@ perf_sw_event(u32 event_id, u64 nr, int nmi, struct pt_regs *regs, u64 addr)
                struct pt_regs hot_regs;
 
                if (!regs) {
-                       perf_fetch_caller_regs(&hot_regs, 1);
+                       perf_fetch_caller_regs(&hot_regs);
                        regs = &hot_regs;
                }
                __perf_sw_event(event_id, nr, nmi, regs, addr);
        }
 }
 
-extern void __perf_event_mmap(struct vm_area_struct *vma);
-
-static inline void perf_event_mmap(struct vm_area_struct *vma)
-{
-       if (vma->vm_flags & VM_EXEC)
-               __perf_event_mmap(vma);
-}
-
+extern void perf_event_mmap(struct vm_area_struct *vma);
 extern struct perf_guest_info_callbacks *perf_guest_cbs;
 extern int perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks);
 extern int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks);
@@ -1001,7 +1000,7 @@ static inline bool perf_paranoid_kernel(void)
 extern void perf_event_init(void);
 extern void perf_tp_event(u64 addr, u64 count, void *record,
                          int entry_size, struct pt_regs *regs,
-                         struct hlist_head *head);
+                         struct hlist_head *head, int rctx);
 extern void perf_bp_event(struct perf_event *event, void *data);
 
 #ifndef perf_misc_flags
index 0478888..7b6ec63 100644 (file)
@@ -2435,18 +2435,6 @@ static inline void set_task_cpu(struct task_struct *p, unsigned int cpu)
 
 #endif /* CONFIG_SMP */
 
-#ifdef CONFIG_TRACING
-extern void
-__trace_special(void *__tr, void *__data,
-               unsigned long arg1, unsigned long arg2, unsigned long arg3);
-#else
-static inline void
-__trace_special(void *__tr, void *__data,
-               unsigned long arg1, unsigned long arg2, unsigned long arg3)
-{
-}
-#endif
-
 extern long sched_setaffinity(pid_t pid, const struct cpumask *new_mask);
 extern long sched_getaffinity(pid_t pid, struct cpumask *mask);
 
index 1812dac..1acfa73 100644 (file)
@@ -14,7 +14,8 @@
 #include <asm/page.h>          /* kmalloc_sizes.h needs PAGE_SIZE */
 #include <asm/cache.h>         /* kmalloc_sizes.h needs L1_CACHE_BYTES */
 #include <linux/compiler.h>
-#include <linux/kmemtrace.h>
+
+#include <trace/events/kmem.h>
 
 #ifndef ARCH_KMALLOC_MINALIGN
 /*
index 4ba59cf..6447a72 100644 (file)
 #include <linux/gfp.h>
 #include <linux/workqueue.h>
 #include <linux/kobject.h>
-#include <linux/kmemtrace.h>
 #include <linux/kmemleak.h>
 
+#include <trace/events/kmem.h>
+
 enum stat_item {
        ALLOC_FASTPATH,         /* Allocation from cpu slab */
        ALLOC_SLOWPATH,         /* Allocation by getting a new cpu slab */
index 13ebb54..a6bfd13 100644 (file)
@@ -167,7 +167,6 @@ extern struct trace_event_functions exit_syscall_print_funcs;
                .enter_event    = &event_enter_##sname,         \
                .exit_event     = &event_exit_##sname,          \
                .enter_fields   = LIST_HEAD_INIT(__syscall_meta_##sname.enter_fields), \
-               .exit_fields    = LIST_HEAD_INIT(__syscall_meta_##sname.exit_fields), \
        };
 
 #define SYSCALL_DEFINE0(sname)                                 \
@@ -182,7 +181,6 @@ extern struct trace_event_functions exit_syscall_print_funcs;
                .enter_event    = &event_enter__##sname,        \
                .exit_event     = &event_exit__##sname,         \
                .enter_fields   = LIST_HEAD_INIT(__syscall_meta__##sname.enter_fields), \
-               .exit_fields    = LIST_HEAD_INIT(__syscall_meta__##sname.exit_fields), \
        };                                                      \
        asmlinkage long sys_##sname(void)
 #else
diff --git a/include/trace/boot.h b/include/trace/boot.h
deleted file mode 100644 (file)
index 088ea08..0000000
+++ /dev/null
@@ -1,60 +0,0 @@
-#ifndef _LINUX_TRACE_BOOT_H
-#define _LINUX_TRACE_BOOT_H
-
-#include <linux/module.h>
-#include <linux/kallsyms.h>
-#include <linux/init.h>
-
-/*
- * Structure which defines the trace of an initcall
- * while it is called.
- * You don't have to fill the func field since it is
- * only used internally by the tracer.
- */
-struct boot_trace_call {
-       pid_t                   caller;
-       char                    func[KSYM_SYMBOL_LEN];
-};
-
-/*
- * Structure which defines the trace of an initcall
- * while it returns.
- */
-struct boot_trace_ret {
-       char                    func[KSYM_SYMBOL_LEN];
-       int                             result;
-       unsigned long long      duration;               /* nsecs */
-};
-
-#ifdef CONFIG_BOOT_TRACER
-/* Append the traces on the ring-buffer */
-extern void trace_boot_call(struct boot_trace_call *bt, initcall_t fn);
-extern void trace_boot_ret(struct boot_trace_ret *bt, initcall_t fn);
-
-/* Tells the tracer that smp_pre_initcall is finished.
- * So we can start the tracing
- */
-extern void start_boot_trace(void);
-
-/* Resume the tracing of other necessary events
- * such as sched switches
- */
-extern void enable_boot_trace(void);
-
-/* Suspend this tracing. Actually, only sched_switches tracing have
- * to be suspended. Initcalls doesn't need it.)
- */
-extern void disable_boot_trace(void);
-#else
-static inline
-void trace_boot_call(struct boot_trace_call *bt, initcall_t fn) { }
-
-static inline
-void trace_boot_ret(struct boot_trace_ret *bt, initcall_t fn) { }
-
-static inline void start_boot_trace(void) { }
-static inline void enable_boot_trace(void) { }
-static inline void disable_boot_trace(void) { }
-#endif /* CONFIG_BOOT_TRACER */
-
-#endif /* __LINUX_TRACE_BOOT_H */
index c4efe9b..35a2a6e 100644 (file)
@@ -18,52 +18,55 @@ enum {
 
 DECLARE_EVENT_CLASS(power,
 
-       TP_PROTO(unsigned int type, unsigned int state),
+       TP_PROTO(unsigned int type, unsigned int state, unsigned int cpu_id),
 
-       TP_ARGS(type, state),
+       TP_ARGS(type, state, cpu_id),
 
        TP_STRUCT__entry(
                __field(        u64,            type            )
                __field(        u64,            state           )
+               __field(        u64,            cpu_id          )
        ),
 
        TP_fast_assign(
                __entry->type = type;
                __entry->state = state;
+               __entry->cpu_id = cpu_id;
        ),
 
-       TP_printk("type=%lu state=%lu", (unsigned long)__entry->type, (unsigned long)__entry->state)
+       TP_printk("type=%lu state=%lu cpu_id=%lu", (unsigned long)__entry->type,
+               (unsigned long)__entry->state, (unsigned long)__entry->cpu_id)
 );
 
 DEFINE_EVENT(power, power_start,
 
-       TP_PROTO(unsigned int type, unsigned int state),
+       TP_PROTO(unsigned int type, unsigned int state, unsigned int cpu_id),
 
-       TP_ARGS(type, state)
+       TP_ARGS(type, state, cpu_id)
 );
 
 DEFINE_EVENT(power, power_frequency,
 
-       TP_PROTO(unsigned int type, unsigned int state),
+       TP_PROTO(unsigned int type, unsigned int state, unsigned int cpu_id),
 
-       TP_ARGS(type, state)
+       TP_ARGS(type, state, cpu_id)
 );
 
 TRACE_EVENT(power_end,
 
-       TP_PROTO(int dummy),
+       TP_PROTO(unsigned int cpu_id),
 
-       TP_ARGS(dummy),
+       TP_ARGS(cpu_id),
 
        TP_STRUCT__entry(
-               __field(        u64,            dummy           )
+               __field(        u64,            cpu_id          )
        ),
 
        TP_fast_assign(
-               __entry->dummy = 0xffff;
+               __entry->cpu_id = cpu_id;
        ),
 
-       TP_printk("dummy=%lu", (unsigned long)__entry->dummy)
+       TP_printk("cpu_id=%lu", (unsigned long)__entry->cpu_id)
 
 );
 
index b9e1dd6..9208c92 100644 (file)
@@ -50,31 +50,6 @@ TRACE_EVENT(sched_kthread_stop_ret,
 );
 
 /*
- * Tracepoint for waiting on task to unschedule:
- */
-TRACE_EVENT(sched_wait_task,
-
-       TP_PROTO(struct task_struct *p),
-
-       TP_ARGS(p),
-
-       TP_STRUCT__entry(
-               __array(        char,   comm,   TASK_COMM_LEN   )
-               __field(        pid_t,  pid                     )
-               __field(        int,    prio                    )
-       ),
-
-       TP_fast_assign(
-               memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
-               __entry->pid    = p->pid;
-               __entry->prio   = p->prio;
-       ),
-
-       TP_printk("comm=%s pid=%d prio=%d",
-                 __entry->comm, __entry->pid, __entry->prio)
-);
-
-/*
  * Tracepoint for waking up a task:
  */
 DECLARE_EVENT_CLASS(sched_wakeup_template,
@@ -240,6 +215,13 @@ DEFINE_EVENT(sched_process_template, sched_process_exit,
             TP_ARGS(p));
 
 /*
+ * Tracepoint for waiting on task to unschedule:
+ */
+DEFINE_EVENT(sched_process_template, sched_wait_task,
+       TP_PROTO(struct task_struct *p),
+       TP_ARGS(p));
+
+/*
  * Tracepoint for a waiting task:
  */
 TRACE_EVENT(sched_process_wait,
index 9496b96..c624126 100644 (file)
@@ -8,11 +8,7 @@
 #include <linux/hrtimer.h>
 #include <linux/timer.h>
 
-/**
- * timer_init - called when the timer is initialized
- * @timer:     pointer to struct timer_list
- */
-TRACE_EVENT(timer_init,
+DECLARE_EVENT_CLASS(timer_class,
 
        TP_PROTO(struct timer_list *timer),
 
@@ -30,6 +26,17 @@ TRACE_EVENT(timer_init,
 );
 
 /**
+ * timer_init - called when the timer is initialized
+ * @timer:     pointer to struct timer_list
+ */
+DEFINE_EVENT(timer_class, timer_init,
+
+       TP_PROTO(struct timer_list *timer),
+
+       TP_ARGS(timer)
+);
+
+/**
  * timer_start - called when the timer is started
  * @timer:     pointer to struct timer_list
  * @expires:   the timers expiry time
@@ -94,42 +101,22 @@ TRACE_EVENT(timer_expire_entry,
  * NOTE: Do NOT derefernce timer in TP_fast_assign. The pointer might
  * be invalid. We solely track the pointer.
  */
-TRACE_EVENT(timer_expire_exit,
+DEFINE_EVENT(timer_class, timer_expire_exit,
 
        TP_PROTO(struct timer_list *timer),
 
-       TP_ARGS(timer),
-
-       TP_STRUCT__entry(
-               __field(void *, timer   )
-       ),
-
-       TP_fast_assign(
-               __entry->timer  = timer;
-       ),
-
-       TP_printk("timer=%p", __entry->timer)
+       TP_ARGS(timer)
 );
 
 /**
  * timer_cancel - called when the timer is canceled
  * @timer:     pointer to struct timer_list
  */
-TRACE_EVENT(timer_cancel,
+DEFINE_EVENT(timer_class, timer_cancel,
 
        TP_PROTO(struct timer_list *timer),
 
-       TP_ARGS(timer),
-
-       TP_STRUCT__entry(
-               __field( void *,        timer   )
-       ),
-
-       TP_fast_assign(
-               __entry->timer  = timer;
-       ),
-
-       TP_printk("timer=%p", __entry->timer)
+       TP_ARGS(timer)
 );
 
 /**
@@ -224,14 +211,7 @@ TRACE_EVENT(hrtimer_expire_entry,
                  (unsigned long long)ktime_to_ns((ktime_t) { .tv64 = __entry->now }))
  );
 
-/**
- * hrtimer_expire_exit - called immediately after the hrtimer callback returns
- * @timer:     pointer to struct hrtimer
- *
- * When used in combination with the hrtimer_expire_entry tracepoint we can
- * determine the runtime of the callback function.
- */
-TRACE_EVENT(hrtimer_expire_exit,
+DECLARE_EVENT_CLASS(hrtimer_class,
 
        TP_PROTO(struct hrtimer *hrtimer),
 
@@ -249,24 +229,28 @@ TRACE_EVENT(hrtimer_expire_exit,
 );
 
 /**
- * hrtimer_cancel - called when the hrtimer is canceled
- * @hrtimer:   pointer to struct hrtimer
+ * hrtimer_expire_exit - called immediately after the hrtimer callback returns
+ * @timer:     pointer to struct hrtimer
+ *
+ * When used in combination with the hrtimer_expire_entry tracepoint we can
+ * determine the runtime of the callback function.
  */
-TRACE_EVENT(hrtimer_cancel,
+DEFINE_EVENT(hrtimer_class, hrtimer_expire_exit,
 
        TP_PROTO(struct hrtimer *hrtimer),
 
-       TP_ARGS(hrtimer),
+       TP_ARGS(hrtimer)
+);
 
-       TP_STRUCT__entry(
-               __field( void *,        hrtimer )
-       ),
+/**
+ * hrtimer_cancel - called when the hrtimer is canceled
+ * @hrtimer:   pointer to struct hrtimer
+ */
+DEFINE_EVENT(hrtimer_class, hrtimer_cancel,
 
-       TP_fast_assign(
-               __entry->hrtimer        = hrtimer;
-       ),
+       TP_PROTO(struct hrtimer *hrtimer),
 
-       TP_printk("hrtimer=%p", __entry->hrtimer)
+       TP_ARGS(hrtimer)
 );
 
 /**
index 5a64905..a9377c0 100644 (file)
 #define DEFINE_EVENT_PRINT(template, name, proto, args, print) \
        DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args))
 
-#undef __cpparg
-#define __cpparg(arg...) arg
-
 /* Callbacks are meaningless to ftrace. */
 #undef TRACE_EVENT_FN
 #define TRACE_EVENT_FN(name, proto, args, tstruct,                     \
                assign, print, reg, unreg)                              \
-       TRACE_EVENT(name, __cpparg(proto), __cpparg(args),              \
-               __cpparg(tstruct), __cpparg(assign), __cpparg(print))   \
+       TRACE_EVENT(name, PARAMS(proto), PARAMS(args),                  \
+               PARAMS(tstruct), PARAMS(assign), PARAMS(print))         \
 
 #include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
 
  *     struct trace_seq *s = &iter->seq;
  *     struct ftrace_raw_<call> *field; <-- defined in stage 1
  *     struct trace_entry *entry;
- *     struct trace_seq *p;
+ *     struct trace_seq *p = &iter->tmp_seq;
  *     int ret;
  *
  *     entry = iter->ent;
  *
  *     field = (typeof(field))entry;
  *
- *     p = &get_cpu_var(ftrace_event_seq);
  *     trace_seq_init(p);
  *     ret = trace_seq_printf(s, "%s: ", <call>);
  *     if (ret)
  *             ret = trace_seq_printf(s, <TP_printk> "\n");
- *     put_cpu();
  *     if (!ret)
  *             return TRACE_TYPE_PARTIAL_LINE;
  *
@@ -216,7 +211,7 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags,    \
        struct trace_seq *s = &iter->seq;                               \
        struct ftrace_raw_##call *field;                                \
        struct trace_entry *entry;                                      \
-       struct trace_seq *p;                                            \
+       struct trace_seq *p = &iter->tmp_seq;                           \
        int ret;                                                        \
                                                                        \
        event = container_of(trace_event, struct ftrace_event_call,     \
@@ -231,12 +226,10 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags,  \
                                                                        \
        field = (typeof(field))entry;                                   \
                                                                        \
-       p = &get_cpu_var(ftrace_event_seq);                             \
        trace_seq_init(p);                                              \
        ret = trace_seq_printf(s, "%s: ", event->name);                 \
        if (ret)                                                        \
                ret = trace_seq_printf(s, print);                       \
-       put_cpu();                                                      \
        if (!ret)                                                       \
                return TRACE_TYPE_PARTIAL_LINE;                         \
                                                                        \
@@ -255,7 +248,7 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags,    \
        struct trace_seq *s = &iter->seq;                               \
        struct ftrace_raw_##template *field;                            \
        struct trace_entry *entry;                                      \
-       struct trace_seq *p;                                            \
+       struct trace_seq *p = &iter->tmp_seq;                           \
        int ret;                                                        \
                                                                        \
        entry = iter->ent;                                              \
@@ -267,12 +260,10 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags,  \
                                                                        \
        field = (typeof(field))entry;                                   \
                                                                        \
-       p = &get_cpu_var(ftrace_event_seq);                             \
        trace_seq_init(p);                                              \
        ret = trace_seq_printf(s, "%s: ", #call);                       \
        if (ret)                                                        \
                ret = trace_seq_printf(s, print);                       \
-       put_cpu();                                                      \
        if (!ret)                                                       \
                return TRACE_TYPE_PARTIAL_LINE;                         \
                                                                        \
@@ -439,6 +430,7 @@ static inline notrace int ftrace_get_offsets_##call(                        \
  *     .fields                 = LIST_HEAD_INIT(event_class_##call.fields),
  *     .raw_init               = trace_event_raw_init,
  *     .probe                  = ftrace_raw_event_##call,
+ *     .reg                    = ftrace_event_reg,
  * };
  *
  * static struct ftrace_event_call __used
@@ -567,6 +559,7 @@ static struct ftrace_event_class __used event_class_##call = {              \
        .fields                 = LIST_HEAD_INIT(event_class_##call.fields),\
        .raw_init               = trace_event_raw_init,                 \
        .probe                  = ftrace_raw_event_##call,              \
+       .reg                    = ftrace_event_reg,                     \
        _TRACE_PERF_INIT(call)                                          \
 };
 
@@ -705,7 +698,7 @@ perf_trace_##call(void *__data, proto)                                      \
        int __data_size;                                                \
        int rctx;                                                       \
                                                                        \
-       perf_fetch_caller_regs(&__regs, 1);                             \
+       perf_fetch_caller_regs(&__regs);                                \
                                                                        \
        __data_size = ftrace_get_offsets_##call(&__data_offsets, args); \
        __entry_size = ALIGN(__data_size + sizeof(*entry) + sizeof(u32),\
index 257e089..31966a4 100644 (file)
@@ -26,7 +26,6 @@ struct syscall_metadata {
        const char      **types;
        const char      **args;
        struct list_head enter_fields;
-       struct list_head exit_fields;
 
        struct ftrace_event_call *enter_event;
        struct ftrace_event_call *exit_event;
index a42fdf4..4ab5124 100644 (file)
 #include <linux/ftrace.h>
 #include <linux/async.h>
 #include <linux/kmemcheck.h>
-#include <linux/kmemtrace.h>
 #include <linux/sfi.h>
 #include <linux/shmem_fs.h>
 #include <linux/slab.h>
-#include <trace/boot.h>
 
 #include <asm/io.h>
 #include <asm/bugs.h>
@@ -663,7 +661,6 @@ asmlinkage void __init start_kernel(void)
 #endif
        page_cgroup_init();
        enable_debug_pagealloc();
-       kmemtrace_init();
        kmemleak_init();
        debug_objects_mem_init();
        idr_init_cache();
@@ -725,38 +722,33 @@ int initcall_debug;
 core_param(initcall_debug, initcall_debug, bool, 0644);
 
 static char msgbuf[64];
-static struct boot_trace_call call;
-static struct boot_trace_ret ret;
 
 int do_one_initcall(initcall_t fn)
 {
        int count = preempt_count();
        ktime_t calltime, delta, rettime;
+       unsigned long long duration;
+       int ret;
 
        if (initcall_debug) {
-               call.caller = task_pid_nr(current);
-               printk("calling  %pF @ %i\n", fn, call.caller);
+               printk("calling  %pF @ %i\n", fn, task_pid_nr(current));
                calltime = ktime_get();
-               trace_boot_call(&call, fn);
-               enable_boot_trace();
        }
 
-       ret.result = fn();
+       ret = fn();
 
        if (initcall_debug) {
-               disable_boot_trace();
                rettime = ktime_get();
                delta = ktime_sub(rettime, calltime);
-               ret.duration = (unsigned long long) ktime_to_ns(delta) >> 10;
-               trace_boot_ret(&ret, fn);
-               printk("initcall %pF returned %d after %Ld usecs\n", fn,
-                       ret.result, ret.duration);
+               duration = (unsigned long long) ktime_to_ns(delta) >> 10;
+               printk("initcall %pF returned %d after %lld usecs\n", fn,
+                       ret, duration);
        }
 
        msgbuf[0] = 0;
 
-       if (ret.result && ret.result != -ENODEV && initcall_debug)
-               sprintf(msgbuf, "error code %d ", ret.result);
+       if (ret && ret != -ENODEV && initcall_debug)
+               sprintf(msgbuf, "error code %d ", ret);
 
        if (preempt_count() != count) {
                strlcat(msgbuf, "preemption imbalance ", sizeof(msgbuf));
@@ -770,7 +762,7 @@ int do_one_initcall(initcall_t fn)
                printk("initcall %pF returned with %s\n", fn, msgbuf);
        }
 
-       return ret.result;
+       return ret;
 }
 
 
@@ -894,7 +886,6 @@ static int __init kernel_init(void * unused)
        smp_prepare_cpus(setup_max_cpus);
 
        do_pre_smp_initcalls();
-       start_boot_trace();
 
        smp_init();
        sched_init_smp();
index 7a56b22..e34d94d 100644 (file)
@@ -41,6 +41,7 @@
 #include <linux/sched.h>
 #include <linux/init.h>
 #include <linux/slab.h>
+#include <linux/list.h>
 #include <linux/cpu.h>
 #include <linux/smp.h>
 
@@ -62,6 +63,9 @@ static DEFINE_PER_CPU(unsigned int, nr_bp_flexible[TYPE_MAX]);
 
 static int nr_slots[TYPE_MAX];
 
+/* Keep track of the breakpoints attached to tasks */
+static LIST_HEAD(bp_task_head);
+
 static int constraints_initialized;
 
 /* Gather the number of total pinned and un-pinned bp in a cpuset */
@@ -103,33 +107,21 @@ static unsigned int max_task_bp_pinned(int cpu, enum bp_type_idx type)
        return 0;
 }
 
-static int task_bp_pinned(struct task_struct *tsk, enum bp_type_idx type)
+/*
+ * Count the number of breakpoints of the same type and same task.
+ * The given event must be not on the list.
+ */
+static int task_bp_pinned(struct perf_event *bp, enum bp_type_idx type)
 {
-       struct perf_event_context *ctx = tsk->perf_event_ctxp;
-       struct list_head *list;
-       struct perf_event *bp;
-       unsigned long flags;
+       struct perf_event_context *ctx = bp->ctx;
+       struct perf_event *iter;
        int count = 0;
 
-       if (WARN_ONCE(!ctx, "No perf context for this task"))
-               return 0;
-
-       list = &ctx->event_list;
-
-       raw_spin_lock_irqsave(&ctx->lock, flags);
-
-       /*
-        * The current breakpoint counter is not included in the list
-        * at the open() callback time
-        */
-       list_for_each_entry(bp, list, event_entry) {
-               if (bp->attr.type == PERF_TYPE_BREAKPOINT)
-                       if (find_slot_idx(bp) == type)
-                               count += hw_breakpoint_weight(bp);
+       list_for_each_entry(iter, &bp_task_head, hw.bp_list) {
+               if (iter->ctx == ctx && find_slot_idx(iter) == type)
+                       count += hw_breakpoint_weight(iter);
        }
 
-       raw_spin_unlock_irqrestore(&ctx->lock, flags);
-
        return count;
 }
 
@@ -149,7 +141,7 @@ fetch_bp_busy_slots(struct bp_busy_slots *slots, struct perf_event *bp,
                if (!tsk)
                        slots->pinned += max_task_bp_pinned(cpu, type);
                else
-                       slots->pinned += task_bp_pinned(tsk, type);
+                       slots->pinned += task_bp_pinned(bp, type);
                slots->flexible = per_cpu(nr_bp_flexible[type], cpu);
 
                return;
@@ -162,7 +154,7 @@ fetch_bp_busy_slots(struct bp_busy_slots *slots, struct perf_event *bp,
                if (!tsk)
                        nr += max_task_bp_pinned(cpu, type);
                else
-                       nr += task_bp_pinned(tsk, type);
+                       nr += task_bp_pinned(bp, type);
 
                if (nr > slots->pinned)
                        slots->pinned = nr;
@@ -188,7 +180,7 @@ fetch_this_slot(struct bp_busy_slots *slots, int weight)
 /*
  * Add a pinned breakpoint for the given task in our constraint table
  */
-static void toggle_bp_task_slot(struct task_struct *tsk, int cpu, bool enable,
+static void toggle_bp_task_slot(struct perf_event *bp, int cpu, bool enable,
                                enum bp_type_idx type, int weight)
 {
        unsigned int *tsk_pinned;
@@ -196,10 +188,11 @@ static void toggle_bp_task_slot(struct task_struct *tsk, int cpu, bool enable,
        int old_idx = 0;
        int idx = 0;
 
-       old_count = task_bp_pinned(tsk, type);
+       old_count = task_bp_pinned(bp, type);
        old_idx = old_count - 1;
        idx = old_idx + weight;
 
+       /* tsk_pinned[n] is the number of tasks having n breakpoints */
        tsk_pinned = per_cpu(nr_task_bp_pinned[type], cpu);
        if (enable) {
                tsk_pinned[idx]++;
@@ -222,23 +215,30 @@ toggle_bp_slot(struct perf_event *bp, bool enable, enum bp_type_idx type,
        int cpu = bp->cpu;
        struct task_struct *tsk = bp->ctx->task;
 
+       /* Pinned counter cpu profiling */
+       if (!tsk) {
+
+               if (enable)
+                       per_cpu(nr_cpu_bp_pinned[type], bp->cpu) += weight;
+               else
+                       per_cpu(nr_cpu_bp_pinned[type], bp->cpu) -= weight;
+               return;
+       }
+
        /* Pinned counter task profiling */
-       if (tsk) {
-               if (cpu >= 0) {
-                       toggle_bp_task_slot(tsk, cpu, enable, type, weight);
-                       return;
-               }
 
+       if (!enable)
+               list_del(&bp->hw.bp_list);
+
+       if (cpu >= 0) {
+               toggle_bp_task_slot(bp, cpu, enable, type, weight);
+       } else {
                for_each_online_cpu(cpu)
-                       toggle_bp_task_slot(tsk, cpu, enable, type, weight);
-               return;
+                       toggle_bp_task_slot(bp, cpu, enable, type, weight);
        }
 
-       /* Pinned counter cpu profiling */
        if (enable)
-               per_cpu(nr_cpu_bp_pinned[type], bp->cpu) += weight;
-       else
-               per_cpu(nr_cpu_bp_pinned[type], bp->cpu) -= weight;
+               list_add_tail(&bp->hw.bp_list, &bp_task_head);
 }
 
 /*
@@ -301,6 +301,10 @@ static int __reserve_bp_slot(struct perf_event *bp)
        weight = hw_breakpoint_weight(bp);
 
        fetch_bp_busy_slots(&slots, bp, type);
+       /*
+        * Simulate the addition of this breakpoint to the constraints
+        * and see the result.
+        */
        fetch_this_slot(&slots, weight);
 
        /* Flexible counters need to keep at least one slot */
index ff86c55..c772a3d 100644 (file)
@@ -675,7 +675,6 @@ group_sched_in(struct perf_event *group_event,
        struct perf_event *event, *partial_group = NULL;
        const struct pmu *pmu = group_event->pmu;
        bool txn = false;
-       int ret;
 
        if (group_event->state == PERF_EVENT_STATE_OFF)
                return 0;
@@ -703,14 +702,8 @@ group_sched_in(struct perf_event *group_event,
                }
        }
 
-       if (!txn)
-               return 0;
-
-       ret = pmu->commit_txn(pmu);
-       if (!ret) {
-               pmu->cancel_txn(pmu);
+       if (!txn || !pmu->commit_txn(pmu))
                return 0;
-       }
 
 group_error:
        /*
@@ -1155,9 +1148,9 @@ static void __perf_event_sync_stat(struct perf_event *event,
         * In order to keep per-task stats reliable we need to flip the event
         * values when we flip the contexts.
         */
-       value = atomic64_read(&next_event->count);
-       value = atomic64_xchg(&event->count, value);
-       atomic64_set(&next_event->count, value);
+       value = local64_read(&next_event->count);
+       value = local64_xchg(&event->count, value);
+       local64_set(&next_event->count, value);
 
        swap(event->total_time_enabled, next_event->total_time_enabled);
        swap(event->total_time_running, next_event->total_time_running);
@@ -1547,10 +1540,10 @@ static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count)
 
        hwc->sample_period = sample_period;
 
-       if (atomic64_read(&hwc->period_left) > 8*sample_period) {
+       if (local64_read(&hwc->period_left) > 8*sample_period) {
                perf_disable();
                perf_event_stop(event);
-               atomic64_set(&hwc->period_left, 0);
+               local64_set(&hwc->period_left, 0);
                perf_event_start(event);
                perf_enable();
        }
@@ -1591,7 +1584,7 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx)
 
                perf_disable();
                event->pmu->read(event);
-               now = atomic64_read(&event->count);
+               now = local64_read(&event->count);
                delta = now - hwc->freq_count_stamp;
                hwc->freq_count_stamp = now;
 
@@ -1743,6 +1736,11 @@ static void __perf_event_read(void *info)
        event->pmu->read(event);
 }
 
+static inline u64 perf_event_count(struct perf_event *event)
+{
+       return local64_read(&event->count) + atomic64_read(&event->child_count);
+}
+
 static u64 perf_event_read(struct perf_event *event)
 {
        /*
@@ -1762,7 +1760,7 @@ static u64 perf_event_read(struct perf_event *event)
                raw_spin_unlock_irqrestore(&ctx->lock, flags);
        }
 
-       return atomic64_read(&event->count);
+       return perf_event_count(event);
 }
 
 /*
@@ -1883,7 +1881,7 @@ static void free_event_rcu(struct rcu_head *head)
 }
 
 static void perf_pending_sync(struct perf_event *event);
-static void perf_mmap_data_put(struct perf_mmap_data *data);
+static void perf_buffer_put(struct perf_buffer *buffer);
 
 static void free_event(struct perf_event *event)
 {
@@ -1891,7 +1889,7 @@ static void free_event(struct perf_event *event)
 
        if (!event->parent) {
                atomic_dec(&nr_events);
-               if (event->attr.mmap)
+               if (event->attr.mmap || event->attr.mmap_data)
                        atomic_dec(&nr_mmap_events);
                if (event->attr.comm)
                        atomic_dec(&nr_comm_events);
@@ -1899,9 +1897,9 @@ static void free_event(struct perf_event *event)
                        atomic_dec(&nr_task_events);
        }
 
-       if (event->data) {
-               perf_mmap_data_put(event->data);
-               event->data = NULL;
+       if (event->buffer) {
+               perf_buffer_put(event->buffer);
+               event->buffer = NULL;
        }
 
        if (event->destroy)
@@ -2126,13 +2124,13 @@ perf_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
 static unsigned int perf_poll(struct file *file, poll_table *wait)
 {
        struct perf_event *event = file->private_data;
-       struct perf_mmap_data *data;
+       struct perf_buffer *buffer;
        unsigned int events = POLL_HUP;
 
        rcu_read_lock();
-       data = rcu_dereference(event->data);
-       if (data)
-               events = atomic_xchg(&data->poll, 0);
+       buffer = rcu_dereference(event->buffer);
+       if (buffer)
+               events = atomic_xchg(&buffer->poll, 0);
        rcu_read_unlock();
 
        poll_wait(file, &event->waitq, wait);
@@ -2143,7 +2141,7 @@ static unsigned int perf_poll(struct file *file, poll_table *wait)
 static void perf_event_reset(struct perf_event *event)
 {
        (void)perf_event_read(event);
-       atomic64_set(&event->count, 0);
+       local64_set(&event->count, 0);
        perf_event_update_userpage(event);
 }
 
@@ -2342,14 +2340,14 @@ static int perf_event_index(struct perf_event *event)
 void perf_event_update_userpage(struct perf_event *event)
 {
        struct perf_event_mmap_page *userpg;
-       struct perf_mmap_data *data;
+       struct perf_buffer *buffer;
 
        rcu_read_lock();
-       data = rcu_dereference(event->data);
-       if (!data)
+       buffer = rcu_dereference(event->buffer);
+       if (!buffer)
                goto unlock;
 
-       userpg = data->user_page;
+       userpg = buffer->user_page;
 
        /*
         * Disable preemption so as to not let the corresponding user-space
@@ -2359,9 +2357,9 @@ void perf_event_update_userpage(struct perf_event *event)
        ++userpg->lock;
        barrier();
        userpg->index = perf_event_index(event);
-       userpg->offset = atomic64_read(&event->count);
+       userpg->offset = perf_event_count(event);
        if (event->state == PERF_EVENT_STATE_ACTIVE)
-               userpg->offset -= atomic64_read(&event->hw.prev_count);
+               userpg->offset -= local64_read(&event->hw.prev_count);
 
        userpg->time_enabled = event->total_time_enabled +
                        atomic64_read(&event->child_total_time_enabled);
@@ -2376,6 +2374,25 @@ unlock:
        rcu_read_unlock();
 }
 
+static unsigned long perf_data_size(struct perf_buffer *buffer);
+
+static void
+perf_buffer_init(struct perf_buffer *buffer, long watermark, int flags)
+{
+       long max_size = perf_data_size(buffer);
+
+       if (watermark)
+               buffer->watermark = min(max_size, watermark);
+
+       if (!buffer->watermark)
+               buffer->watermark = max_size / 2;
+
+       if (flags & PERF_BUFFER_WRITABLE)
+               buffer->writable = 1;
+
+       atomic_set(&buffer->refcount, 1);
+}
+
 #ifndef CONFIG_PERF_USE_VMALLOC
 
 /*
@@ -2383,15 +2400,15 @@ unlock:
  */
 
 static struct page *
-perf_mmap_to_page(struct perf_mmap_data *data, unsigned long pgoff)
+perf_mmap_to_page(struct perf_buffer *buffer, unsigned long pgoff)
 {
-       if (pgoff > data->nr_pages)
+       if (pgoff > buffer->nr_pages)
                return NULL;
 
        if (pgoff == 0)
-               return virt_to_page(data->user_page);
+               return virt_to_page(buffer->user_page);
 
-       return virt_to_page(data->data_pages[pgoff - 1]);
+       return virt_to_page(buffer->data_pages[pgoff - 1]);
 }
 
 static void *perf_mmap_alloc_page(int cpu)
@@ -2407,42 +2424,44 @@ static void *perf_mmap_alloc_page(int cpu)
        return page_address(page);
 }
 
-static struct perf_mmap_data *
-perf_mmap_data_alloc(struct perf_event *event, int nr_pages)
+static struct perf_buffer *
+perf_buffer_alloc(int nr_pages, long watermark, int cpu, int flags)
 {
-       struct perf_mmap_data *data;
+       struct perf_buffer *buffer;
        unsigned long size;
        int i;
 
-       size = sizeof(struct perf_mmap_data);
+       size = sizeof(struct perf_buffer);
        size += nr_pages * sizeof(void *);
 
-       data = kzalloc(size, GFP_KERNEL);
-       if (!data)
+       buffer = kzalloc(size, GFP_KERNEL);
+       if (!buffer)
                goto fail;
 
-       data->user_page = perf_mmap_alloc_page(event->cpu);
-       if (!data->user_page)
+       buffer->user_page = perf_mmap_alloc_page(cpu);
+       if (!buffer->user_page)
                goto fail_user_page;
 
        for (i = 0; i < nr_pages; i++) {
-               data->data_pages[i] = perf_mmap_alloc_page(event->cpu);
-               if (!data->data_pages[i])
+               buffer->data_pages[i] = perf_mmap_alloc_page(cpu);
+               if (!buffer->data_pages[i])
                        goto fail_data_pages;
        }
 
-       data->nr_pages = nr_pages;
+       buffer->nr_pages = nr_pages;
+
+       perf_buffer_init(buffer, watermark, flags);
 
-       return data;
+       return buffer;
 
 fail_data_pages:
        for (i--; i >= 0; i--)
-               free_page((unsigned long)data->data_pages[i]);
+               free_page((unsigned long)buffer->data_pages[i]);
 
-       free_page((unsigned long)data->user_page);
+       free_page((unsigned long)buffer->user_page);
 
 fail_user_page:
-       kfree(data);
+       kfree(buffer);
 
 fail:
        return NULL;
@@ -2456,17 +2475,17 @@ static void perf_mmap_free_page(unsigned long addr)
        __free_page(page);
 }
 
-static void perf_mmap_data_free(struct perf_mmap_data *data)
+static void perf_buffer_free(struct perf_buffer *buffer)
 {
        int i;
 
-       perf_mmap_free_page((unsigned long)data->user_page);
-       for (i = 0; i < data->nr_pages; i++)
-               perf_mmap_free_page((unsigned long)data->data_pages[i]);
-       kfree(data);
+       perf_mmap_free_page((unsigned long)buffer->user_page);
+       for (i = 0; i < buffer->nr_pages; i++)
+               perf_mmap_free_page((unsigned long)buffer->data_pages[i]);
+       kfree(buffer);
 }
 
-static inline int page_order(struct perf_mmap_data *data)
+static inline int page_order(struct perf_buffer *buffer)
 {
        return 0;
 }
@@ -2479,18 +2498,18 @@ static inline int page_order(struct perf_mmap_data *data)
  * Required for architectures that have d-cache aliasing issues.
  */
 
-static inline int page_order(struct perf_mmap_data *data)
+static inline int page_order(struct perf_buffer *buffer)
 {
-       return data->page_order;
+       return buffer->page_order;
 }
 
 static struct page *
-perf_mmap_to_page(struct perf_mmap_data *data, unsigned long pgoff)
+perf_mmap_to_page(struct perf_buffer *buffer, unsigned long pgoff)
 {
-       if (pgoff > (1UL << page_order(data)))
+       if (pgoff > (1UL << page_order(buffer)))
                return NULL;
 
-       return vmalloc_to_page((void *)data->user_page + pgoff * PAGE_SIZE);
+       return vmalloc_to_page((void *)buffer->user_page + pgoff * PAGE_SIZE);
 }
 
 static void perf_mmap_unmark_page(void *addr)
@@ -2500,57 +2519,59 @@ static void perf_mmap_unmark_page(void *addr)
        page->mapping = NULL;
 }
 
-static void perf_mmap_data_free_work(struct work_struct *work)
+static void perf_buffer_free_work(struct work_struct *work)
 {
-       struct perf_mmap_data *data;
+       struct perf_buffer *buffer;
        void *base;
        int i, nr;
 
-       data = container_of(work, struct perf_mmap_data, work);
-       nr = 1 << page_order(data);
+       buffer = container_of(work, struct perf_buffer, work);
+       nr = 1 << page_order(buffer);
 
-       base = data->user_page;
+       base = buffer->user_page;
        for (i = 0; i < nr + 1; i++)
                perf_mmap_unmark_page(base + (i * PAGE_SIZE));
 
        vfree(base);
-       kfree(data);
+       kfree(buffer);
 }
 
-static void perf_mmap_data_free(struct perf_mmap_data *data)
+static void perf_buffer_free(struct perf_buffer *buffer)
 {
-       schedule_work(&data->work);
+       schedule_work(&buffer->work);
 }
 
-static struct perf_mmap_data *
-perf_mmap_data_alloc(struct perf_event *event, int nr_pages)
+static struct perf_buffer *
+perf_buffer_alloc(int nr_pages, long watermark, int cpu, int flags)
 {
-       struct perf_mmap_data *data;
+       struct perf_buffer *buffer;
        unsigned long size;
        void *all_buf;
 
-       size = sizeof(struct perf_mmap_data);
+       size = sizeof(struct perf_buffer);
        size += sizeof(void *);
 
-       data = kzalloc(size, GFP_KERNEL);
-       if (!data)
+       buffer = kzalloc(size, GFP_KERNEL);
+       if (!buffer)
                goto fail;
 
-       INIT_WORK(&data->work, perf_mmap_data_free_work);
+       INIT_WORK(&buffer->work, perf_buffer_free_work);
 
        all_buf = vmalloc_user((nr_pages + 1) * PAGE_SIZE);
        if (!all_buf)
                goto fail_all_buf;
 
-       data->user_page = all_buf;
-       data->data_pages[0] = all_buf + PAGE_SIZE;
-       data->page_order = ilog2(nr_pages);
-       data->nr_pages = 1;
+       buffer->user_page = all_buf;
+       buffer->data_pages[0] = all_buf + PAGE_SIZE;
+       buffer->page_order = ilog2(nr_pages);
+       buffer->nr_pages = 1;
+
+       perf_buffer_init(buffer, watermark, flags);
 
-       return data;
+       return buffer;
 
 fail_all_buf:
-       kfree(data);
+       kfree(buffer);
 
 fail:
        return NULL;
@@ -2558,15 +2579,15 @@ fail:
 
 #endif
 
-static unsigned long perf_data_size(struct perf_mmap_data *data)
+static unsigned long perf_data_size(struct perf_buffer *buffer)
 {
-       return data->nr_pages << (PAGE_SHIFT + page_order(data));
+       return buffer->nr_pages << (PAGE_SHIFT + page_order(buffer));
 }
 
 static int perf_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 {
        struct perf_event *event = vma->vm_file->private_data;
-       struct perf_mmap_data *data;
+       struct perf_buffer *buffer;
        int ret = VM_FAULT_SIGBUS;
 
        if (vmf->flags & FAULT_FLAG_MKWRITE) {
@@ -2576,14 +2597,14 @@ static int perf_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
        }
 
        rcu_read_lock();
-       data = rcu_dereference(event->data);
-       if (!data)
+       buffer = rcu_dereference(event->buffer);
+       if (!buffer)
                goto unlock;
 
        if (vmf->pgoff && (vmf->flags & FAULT_FLAG_WRITE))
                goto unlock;
 
-       vmf->page = perf_mmap_to_page(data, vmf->pgoff);
+       vmf->page = perf_mmap_to_page(buffer, vmf->pgoff);
        if (!vmf->page)
                goto unlock;
 
@@ -2598,52 +2619,35 @@ unlock:
        return ret;
 }
 
-static void
-perf_mmap_data_init(struct perf_event *event, struct perf_mmap_data *data)
-{
-       long max_size = perf_data_size(data);
-
-       if (event->attr.watermark) {
-               data->watermark = min_t(long, max_size,
-                                       event->attr.wakeup_watermark);
-       }
-
-       if (!data->watermark)
-               data->watermark = max_size / 2;
-
-       atomic_set(&data->refcount, 1);
-       rcu_assign_pointer(event->data, data);
-}
-
-static void perf_mmap_data_free_rcu(struct rcu_head *rcu_head)
+static void perf_buffer_free_rcu(struct rcu_head *rcu_head)
 {
-       struct perf_mmap_data *data;
+       struct perf_buffer *buffer;
 
-       data = container_of(rcu_head, struct perf_mmap_data, rcu_head);
-       perf_mmap_data_free(data);
+       buffer = container_of(rcu_head, struct perf_buffer, rcu_head);
+       perf_buffer_free(buffer);
 }
 
-static struct perf_mmap_data *perf_mmap_data_get(struct perf_event *event)
+static struct perf_buffer *perf_buffer_get(struct perf_event *event)
 {
-       struct perf_mmap_data *data;
+       struct perf_buffer *buffer;
 
        rcu_read_lock();
-       data = rcu_dereference(event->data);
-       if (data) {
-               if (!atomic_inc_not_zero(&data->refcount))
-                       data = NULL;
+       buffer = rcu_dereference(event->buffer);
+       if (buffer) {
+               if (!atomic_inc_not_zero(&buffer->refcount))
+                       buffer = NULL;
        }
        rcu_read_unlock();
 
-       return data;
+       return buffer;
 }
 
-static void perf_mmap_data_put(struct perf_mmap_data *data)
+static void perf_buffer_put(struct perf_buffer *buffer)
 {
-       if (!atomic_dec_and_test(&data->refcount))
+       if (!atomic_dec_and_test(&buffer->refcount))
                return;
 
-       call_rcu(&data->rcu_head, perf_mmap_data_free_rcu);
+       call_rcu(&buffer->rcu_head, perf_buffer_free_rcu);
 }
 
 static void perf_mmap_open(struct vm_area_struct *vma)
@@ -2658,16 +2662,16 @@ static void perf_mmap_close(struct vm_area_struct *vma)
        struct perf_event *event = vma->vm_file->private_data;
 
        if (atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex)) {
-               unsigned long size = perf_data_size(event->data);
+               unsigned long size = perf_data_size(event->buffer);
                struct user_struct *user = event->mmap_user;
-               struct perf_mmap_data *data = event->data;
+               struct perf_buffer *buffer = event->buffer;
 
                atomic_long_sub((size >> PAGE_SHIFT) + 1, &user->locked_vm);
                vma->vm_mm->locked_vm -= event->mmap_locked;
-               rcu_assign_pointer(event->data, NULL);
+               rcu_assign_pointer(event->buffer, NULL);
                mutex_unlock(&event->mmap_mutex);
 
-               perf_mmap_data_put(data);
+               perf_buffer_put(buffer);
                free_uid(user);
        }
 }
@@ -2685,11 +2689,11 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
        unsigned long user_locked, user_lock_limit;
        struct user_struct *user = current_user();
        unsigned long locked, lock_limit;
-       struct perf_mmap_data *data;
+       struct perf_buffer *buffer;
        unsigned long vma_size;
        unsigned long nr_pages;
        long user_extra, extra;
-       int ret = 0;
+       int ret = 0, flags = 0;
 
        /*
         * Don't allow mmap() of inherited per-task counters. This would
@@ -2706,7 +2710,7 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
        nr_pages = (vma_size / PAGE_SIZE) - 1;
 
        /*
-        * If we have data pages ensure they're a power-of-two number, so we
+        * If we have buffer pages ensure they're a power-of-two number, so we
         * can do bitmasks instead of modulo.
         */
        if (nr_pages != 0 && !is_power_of_2(nr_pages))
@@ -2720,9 +2724,9 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
 
        WARN_ON_ONCE(event->ctx->parent_ctx);
        mutex_lock(&event->mmap_mutex);
-       if (event->data) {
-               if (event->data->nr_pages == nr_pages)
-                       atomic_inc(&event->data->refcount);
+       if (event->buffer) {
+               if (event->buffer->nr_pages == nr_pages)
+                       atomic_inc(&event->buffer->refcount);
                else
                        ret = -EINVAL;
                goto unlock;
@@ -2752,17 +2756,18 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
                goto unlock;
        }
 
-       WARN_ON(event->data);
+       WARN_ON(event->buffer);
+
+       if (vma->vm_flags & VM_WRITE)
+               flags |= PERF_BUFFER_WRITABLE;
 
-       data = perf_mmap_data_alloc(event, nr_pages);
-       if (!data) {
+       buffer = perf_buffer_alloc(nr_pages, event->attr.wakeup_watermark,
+                                  event->cpu, flags);
+       if (!buffer) {
                ret = -ENOMEM;
                goto unlock;
        }
-
-       perf_mmap_data_init(event, data);
-       if (vma->vm_flags & VM_WRITE)
-               event->data->writable = 1;
+       rcu_assign_pointer(event->buffer, buffer);
 
        atomic_long_add(user_extra, &user->locked_vm);
        event->mmap_locked = extra;
@@ -2941,11 +2946,6 @@ __weak struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
        return NULL;
 }
 
-__weak
-void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int skip)
-{
-}
-
 
 /*
  * We assume there is only KVM supporting the callbacks.
@@ -2971,15 +2971,15 @@ EXPORT_SYMBOL_GPL(perf_unregister_guest_info_callbacks);
 /*
  * Output
  */
-static bool perf_output_space(struct perf_mmap_data *data, unsigned long tail,
+static bool perf_output_space(struct perf_buffer *buffer, unsigned long tail,
                              unsigned long offset, unsigned long head)
 {
        unsigned long mask;
 
-       if (!data->writable)
+       if (!buffer->writable)
                return true;
 
-       mask = perf_data_size(data) - 1;
+       mask = perf_data_size(buffer) - 1;
 
        offset = (offset - tail) & mask;
        head   = (head   - tail) & mask;
@@ -2992,7 +2992,7 @@ static bool perf_output_space(struct perf_mmap_data *data, unsigned long tail,
 
 static void perf_output_wakeup(struct perf_output_handle *handle)
 {
-       atomic_set(&handle->data->poll, POLL_IN);
+       atomic_set(&handle->buffer->poll, POLL_IN);
 
        if (handle->nmi) {
                handle->event->pending_wakeup = 1;
@@ -3012,45 +3012,45 @@ static void perf_output_wakeup(struct perf_output_handle *handle)
  */
 static void perf_output_get_handle(struct perf_output_handle *handle)
 {
-       struct perf_mmap_data *data = handle->data;
+       struct perf_buffer *buffer = handle->buffer;
 
        preempt_disable();
-       local_inc(&data->nest);
-       handle->wakeup = local_read(&data->wakeup);
+       local_inc(&buffer->nest);
+       handle->wakeup = local_read(&buffer->wakeup);
 }
 
 static void perf_output_put_handle(struct perf_output_handle *handle)
 {
-       struct perf_mmap_data *data = handle->data;
+       struct perf_buffer *buffer = handle->buffer;
        unsigned long head;
 
 again:
-       head = local_read(&data->head);
+       head = local_read(&buffer->head);
 
        /*
         * IRQ/NMI can happen here, which means we can miss a head update.
         */
 
-       if (!local_dec_and_test(&data->nest))
+       if (!local_dec_and_test(&buffer->nest))
                goto out;
 
        /*
         * Publish the known good head. Rely on the full barrier implied
-        * by atomic_dec_and_test() order the data->head read and this
+        * by atomic_dec_and_test() order the buffer->head read and this
         * write.
         */
-       data->user_page->data_head = head;
+       buffer->user_page->data_head = head;
 
        /*
         * Now check if we missed an update, rely on the (compiler)
-        * barrier in atomic_dec_and_test() to re-read data->head.
+        * barrier in atomic_dec_and_test() to re-read buffer->head.
         */
-       if (unlikely(head != local_read(&data->head))) {
-               local_inc(&data->nest);
+       if (unlikely(head != local_read(&buffer->head))) {
+               local_inc(&buffer->nest);
                goto again;
        }
 
-       if (handle->wakeup != local_read(&data->wakeup))
+       if (handle->wakeup != local_read(&buffer->wakeup))
                perf_output_wakeup(handle);
 
  out:
@@ -3070,12 +3070,12 @@ __always_inline void perf_output_copy(struct perf_output_handle *handle,
                buf += size;
                handle->size -= size;
                if (!handle->size) {
-                       struct perf_mmap_data *data = handle->data;
+                       struct perf_buffer *buffer = handle->buffer;
 
                        handle->page++;
-                       handle->page &= data->nr_pages - 1;
-                       handle->addr = data->data_pages[handle->page];
-                       handle->size = PAGE_SIZE << page_order(data);
+                       handle->page &= buffer->nr_pages - 1;
+                       handle->addr = buffer->data_pages[handle->page];
+                       handle->size = PAGE_SIZE << page_order(buffer);
                }
        } while (len);
 }
@@ -3084,7 +3084,7 @@ int perf_output_begin(struct perf_output_handle *handle,
                      struct perf_event *event, unsigned int size,
                      int nmi, int sample)
 {
-       struct perf_mmap_data *data;
+       struct perf_buffer *buffer;
        unsigned long tail, offset, head;
        int have_lost;
        struct {
@@ -3100,19 +3100,19 @@ int perf_output_begin(struct perf_output_handle *handle,
        if (event->parent)
                event = event->parent;
 
-       data = rcu_dereference(event->data);
-       if (!data)
+       buffer = rcu_dereference(event->buffer);
+       if (!buffer)
                goto out;
 
-       handle->data    = data;
+       handle->buffer  = buffer;
        handle->event   = event;
        handle->nmi     = nmi;
        handle->sample  = sample;
 
-       if (!data->nr_pages)
+       if (!buffer->nr_pages)
                goto out;
 
-       have_lost = local_read(&data->lost);
+       have_lost = local_read(&buffer->lost);
        if (have_lost)
                size += sizeof(lost_event);
 
@@ -3124,30 +3124,30 @@ int perf_output_begin(struct perf_output_handle *handle,
                 * tail pointer. So that all reads will be completed before the
                 * write is issued.
                 */
-               tail = ACCESS_ONCE(data->user_page->data_tail);
+               tail = ACCESS_ONCE(buffer->user_page->data_tail);
                smp_rmb();
-               offset = head = local_read(&data->head);
+               offset = head = local_read(&buffer->head);
                head += size;
-               if (unlikely(!perf_output_space(data, tail, offset, head)))
+               if (unlikely(!perf_output_space(buffer, tail, offset, head)))
                        goto fail;
-       } while (local_cmpxchg(&data->head, offset, head) != offset);
+       } while (local_cmpxchg(&buffer->head, offset, head) != offset);
 
-       if (head - local_read(&data->wakeup) > data->watermark)
-               local_add(data->watermark, &data->wakeup);
+       if (head - local_read(&buffer->wakeup) > buffer->watermark)
+               local_add(buffer->watermark, &buffer->wakeup);
 
-       handle->page = offset >> (PAGE_SHIFT + page_order(data));
-       handle->page &= data->nr_pages - 1;
-       handle->size = offset & ((PAGE_SIZE << page_order(data)) - 1);
-       handle->addr = data->data_pages[handle->page];
+       handle->page = offset >> (PAGE_SHIFT + page_order(buffer));
+       handle->page &= buffer->nr_pages - 1;
+       handle->size = offset & ((PAGE_SIZE << page_order(buffer)) - 1);
+       handle->addr = buffer->data_pages[handle->page];
        handle->addr += handle->size;
-       handle->size = (PAGE_SIZE << page_order(data)) - handle->size;
+       handle->size = (PAGE_SIZE << page_order(buffer)) - handle->size;
 
        if (have_lost) {
                lost_event.header.type = PERF_RECORD_LOST;
                lost_event.header.misc = 0;
                lost_event.header.size = sizeof(lost_event);
                lost_event.id          = event->id;
-               lost_event.lost        = local_xchg(&data->lost, 0);
+               lost_event.lost        = local_xchg(&buffer->lost, 0);
 
                perf_output_put(handle, lost_event);
        }
@@ -3155,7 +3155,7 @@ int perf_output_begin(struct perf_output_handle *handle,
        return 0;
 
 fail:
-       local_inc(&data->lost);
+       local_inc(&buffer->lost);
        perf_output_put_handle(handle);
 out:
        rcu_read_unlock();
@@ -3166,15 +3166,15 @@ out:
 void perf_output_end(struct perf_output_handle *handle)
 {
        struct perf_event *event = handle->event;
-       struct perf_mmap_data *data = handle->data;
+       struct perf_buffer *buffer = handle->buffer;
 
        int wakeup_events = event->attr.wakeup_events;
 
        if (handle->sample && wakeup_events) {
-               int events = local_inc_return(&data->events);
+               int events = local_inc_return(&buffer->events);
                if (events >= wakeup_events) {
-                       local_sub(wakeup_events, &data->events);
-                       local_inc(&data->wakeup);
+                       local_sub(wakeup_events, &buffer->events);
+                       local_inc(&buffer->wakeup);
                }
        }
 
@@ -3211,7 +3211,7 @@ static void perf_output_read_one(struct perf_output_handle *handle,
        u64 values[4];
        int n = 0;
 
-       values[n++] = atomic64_read(&event->count);
+       values[n++] = perf_event_count(event);
        if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
                values[n++] = event->total_time_enabled +
                        atomic64_read(&event->child_total_time_enabled);
@@ -3248,7 +3248,7 @@ static void perf_output_read_group(struct perf_output_handle *handle,
        if (leader != event)
                leader->pmu->read(leader);
 
-       values[n++] = atomic64_read(&leader->count);
+       values[n++] = perf_event_count(leader);
        if (read_format & PERF_FORMAT_ID)
                values[n++] = primary_event_id(leader);
 
@@ -3260,7 +3260,7 @@ static void perf_output_read_group(struct perf_output_handle *handle,
                if (sub != event)
                        sub->pmu->read(sub);
 
-               values[n++] = atomic64_read(&sub->count);
+               values[n++] = perf_event_count(sub);
                if (read_format & PERF_FORMAT_ID)
                        values[n++] = primary_event_id(sub);
 
@@ -3491,7 +3491,7 @@ perf_event_read_event(struct perf_event *event,
 /*
  * task tracking -- fork/exit
  *
- * enabled by: attr.comm | attr.mmap | attr.task
+ * enabled by: attr.comm | attr.mmap | attr.mmap_data | attr.task
  */
 
 struct perf_task_event {
@@ -3541,7 +3541,8 @@ static int perf_event_task_match(struct perf_event *event)
        if (event->cpu != -1 && event->cpu != smp_processor_id())
                return 0;
 
-       if (event->attr.comm || event->attr.mmap || event->attr.task)
+       if (event->attr.comm || event->attr.mmap ||
+           event->attr.mmap_data || event->attr.task)
                return 1;
 
        return 0;
@@ -3766,7 +3767,8 @@ static void perf_event_mmap_output(struct perf_event *event,
 }
 
 static int perf_event_mmap_match(struct perf_event *event,
-                                  struct perf_mmap_event *mmap_event)
+                                  struct perf_mmap_event *mmap_event,
+                                  int executable)
 {
        if (event->state < PERF_EVENT_STATE_INACTIVE)
                return 0;
@@ -3774,19 +3776,21 @@ static int perf_event_mmap_match(struct perf_event *event,
        if (event->cpu != -1 && event->cpu != smp_processor_id())
                return 0;
 
-       if (event->attr.mmap)
+       if ((!executable && event->attr.mmap_data) ||
+           (executable && event->attr.mmap))
                return 1;
 
        return 0;
 }
 
 static void perf_event_mmap_ctx(struct perf_event_context *ctx,
-                                 struct perf_mmap_event *mmap_event)
+                                 struct perf_mmap_event *mmap_event,
+                                 int executable)
 {
        struct perf_event *event;
 
        list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
-               if (perf_event_mmap_match(event, mmap_event))
+               if (perf_event_mmap_match(event, mmap_event, executable))
                        perf_event_mmap_output(event, mmap_event);
        }
 }
@@ -3830,6 +3834,14 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event)
                if (!vma->vm_mm) {
                        name = strncpy(tmp, "[vdso]", sizeof(tmp));
                        goto got_name;
+               } else if (vma->vm_start <= vma->vm_mm->start_brk &&
+                               vma->vm_end >= vma->vm_mm->brk) {
+                       name = strncpy(tmp, "[heap]", sizeof(tmp));
+                       goto got_name;
+               } else if (vma->vm_start <= vma->vm_mm->start_stack &&
+                               vma->vm_end >= vma->vm_mm->start_stack) {
+                       name = strncpy(tmp, "[stack]", sizeof(tmp));
+                       goto got_name;
                }
 
                name = strncpy(tmp, "//anon", sizeof(tmp));
@@ -3846,17 +3858,17 @@ got_name:
 
        rcu_read_lock();
        cpuctx = &get_cpu_var(perf_cpu_context);
-       perf_event_mmap_ctx(&cpuctx->ctx, mmap_event);
+       perf_event_mmap_ctx(&cpuctx->ctx, mmap_event, vma->vm_flags & VM_EXEC);
        ctx = rcu_dereference(current->perf_event_ctxp);
        if (ctx)
-               perf_event_mmap_ctx(ctx, mmap_event);
+               perf_event_mmap_ctx(ctx, mmap_event, vma->vm_flags & VM_EXEC);
        put_cpu_var(perf_cpu_context);
        rcu_read_unlock();
 
        kfree(buf);
 }
 
-void __perf_event_mmap(struct vm_area_struct *vma)
+void perf_event_mmap(struct vm_area_struct *vma)
 {
        struct perf_mmap_event mmap_event;
 
@@ -4018,14 +4030,14 @@ static u64 perf_swevent_set_period(struct perf_event *event)
        hwc->last_period = hwc->sample_period;
 
 again:
-       old = val = atomic64_read(&hwc->period_left);
+       old = val = local64_read(&hwc->period_left);
        if (val < 0)
                return 0;
 
        nr = div64_u64(period + val, period);
        offset = nr * period;
        val -= offset;
-       if (atomic64_cmpxchg(&hwc->period_left, old, val) != old)
+       if (local64_cmpxchg(&hwc->period_left, old, val) != old)
                goto again;
 
        return nr;
@@ -4064,7 +4076,7 @@ static void perf_swevent_add(struct perf_event *event, u64 nr,
 {
        struct hw_perf_event *hwc = &event->hw;
 
-       atomic64_add(nr, &event->count);
+       local64_add(nr, &event->count);
 
        if (!regs)
                return;
@@ -4075,7 +4087,7 @@ static void perf_swevent_add(struct perf_event *event, u64 nr,
        if (nr == 1 && hwc->sample_period == 1 && !event->attr.freq)
                return perf_swevent_overflow(event, 1, nmi, data, regs);
 
-       if (atomic64_add_negative(nr, &hwc->period_left))
+       if (local64_add_negative(nr, &hwc->period_left))
                return;
 
        perf_swevent_overflow(event, 0, nmi, data, regs);
@@ -4213,14 +4225,12 @@ int perf_swevent_get_recursion_context(void)
 }
 EXPORT_SYMBOL_GPL(perf_swevent_get_recursion_context);
 
-void perf_swevent_put_recursion_context(int rctx)
+void inline perf_swevent_put_recursion_context(int rctx)
 {
        struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
        barrier();
        cpuctx->recursion[rctx]--;
 }
-EXPORT_SYMBOL_GPL(perf_swevent_put_recursion_context);
-
 
 void __perf_sw_event(u32 event_id, u64 nr, int nmi,
                            struct pt_regs *regs, u64 addr)
@@ -4368,8 +4378,8 @@ static void cpu_clock_perf_event_update(struct perf_event *event)
        u64 now;
 
        now = cpu_clock(cpu);
-       prev = atomic64_xchg(&event->hw.prev_count, now);
-       atomic64_add(now - prev, &event->count);
+       prev = local64_xchg(&event->hw.prev_count, now);
+       local64_add(now - prev, &event->count);
 }
 
 static int cpu_clock_perf_event_enable(struct perf_event *event)
@@ -4377,7 +4387,7 @@ static int cpu_clock_perf_event_enable(struct perf_event *event)
        struct hw_perf_event *hwc = &event->hw;
        int cpu = raw_smp_processor_id();
 
-       atomic64_set(&hwc->prev_count, cpu_clock(cpu));
+       local64_set(&hwc->prev_count, cpu_clock(cpu));
        perf_swevent_start_hrtimer(event);
 
        return 0;
@@ -4409,9 +4419,9 @@ static void task_clock_perf_event_update(struct perf_event *event, u64 now)
        u64 prev;
        s64 delta;
 
-       prev = atomic64_xchg(&event->hw.prev_count, now);
+       prev = local64_xchg(&event->hw.prev_count, now);
        delta = now - prev;
-       atomic64_add(delta, &event->count);
+       local64_add(delta, &event->count);
 }
 
 static int task_clock_perf_event_enable(struct perf_event *event)
@@ -4421,7 +4431,7 @@ static int task_clock_perf_event_enable(struct perf_event *event)
 
        now = event->ctx->time;
 
-       atomic64_set(&hwc->prev_count, now);
+       local64_set(&hwc->prev_count, now);
 
        perf_swevent_start_hrtimer(event);
 
@@ -4601,7 +4611,7 @@ static int perf_tp_event_match(struct perf_event *event,
 }
 
 void perf_tp_event(u64 addr, u64 count, void *record, int entry_size,
-                  struct pt_regs *regs, struct hlist_head *head)
+                  struct pt_regs *regs, struct hlist_head *head, int rctx)
 {
        struct perf_sample_data data;
        struct perf_event *event;
@@ -4615,12 +4625,12 @@ void perf_tp_event(u64 addr, u64 count, void *record, int entry_size,
        perf_sample_data_init(&data, addr);
        data.raw = &raw;
 
-       rcu_read_lock();
        hlist_for_each_entry_rcu(event, node, head, hlist_entry) {
                if (perf_tp_event_match(event, &data, regs))
                        perf_swevent_add(event, count, 1, &data, regs);
        }
-       rcu_read_unlock();
+
+       perf_swevent_put_recursion_context(rctx);
 }
 EXPORT_SYMBOL_GPL(perf_tp_event);
 
@@ -4864,7 +4874,7 @@ perf_event_alloc(struct perf_event_attr *attr,
                hwc->sample_period = 1;
        hwc->last_period = hwc->sample_period;
 
-       atomic64_set(&hwc->period_left, hwc->sample_period);
+       local64_set(&hwc->period_left, hwc->sample_period);
 
        /*
         * we currently do not support PERF_FORMAT_GROUP on inherited events
@@ -4913,7 +4923,7 @@ done:
 
        if (!event->parent) {
                atomic_inc(&nr_events);
-               if (event->attr.mmap)
+               if (event->attr.mmap || event->attr.mmap_data)
                        atomic_inc(&nr_mmap_events);
                if (event->attr.comm)
                        atomic_inc(&nr_comm_events);
@@ -5007,7 +5017,7 @@ err_size:
 static int
 perf_event_set_output(struct perf_event *event, struct perf_event *output_event)
 {
-       struct perf_mmap_data *data = NULL, *old_data = NULL;
+       struct perf_buffer *buffer = NULL, *old_buffer = NULL;
        int ret = -EINVAL;
 
        if (!output_event)
@@ -5037,19 +5047,19 @@ set:
 
        if (output_event) {
                /* get the buffer we want to redirect to */
-               data = perf_mmap_data_get(output_event);
-               if (!data)
+               buffer = perf_buffer_get(output_event);
+               if (!buffer)
                        goto unlock;
        }
 
-       old_data = event->data;
-       rcu_assign_pointer(event->data, data);
+       old_buffer = event->buffer;
+       rcu_assign_pointer(event->buffer, buffer);
        ret = 0;
 unlock:
        mutex_unlock(&event->mmap_mutex);
 
-       if (old_data)
-               perf_mmap_data_put(old_data);
+       if (old_buffer)
+               perf_buffer_put(old_buffer);
 out:
        return ret;
 }
@@ -5298,7 +5308,7 @@ inherit_event(struct perf_event *parent_event,
                hwc->sample_period = sample_period;
                hwc->last_period   = sample_period;
 
-               atomic64_set(&hwc->period_left, sample_period);
+               local64_set(&hwc->period_left, sample_period);
        }
 
        child_event->overflow_handler = parent_event->overflow_handler;
@@ -5359,12 +5369,12 @@ static void sync_child_event(struct perf_event *child_event,
        if (child_event->attr.inherit_stat)
                perf_event_read_event(child_event, child);
 
-       child_val = atomic64_read(&child_event->count);
+       child_val = perf_event_count(child_event);
 
        /*
         * Add back the child's count to the parent's count:
         */
-       atomic64_add(child_val, &parent_event->count);
+       atomic64_add(child_val, &parent_event->child_count);
        atomic64_add(child_event->total_time_enabled,
                     &parent_event->child_total_time_enabled);
        atomic64_add(child_event->total_time_running,
index f52a880..265cf3a 100644 (file)
@@ -3726,7 +3726,7 @@ int mutex_spin_on_owner(struct mutex *lock, struct thread_info *owner)
  * off of preempt_enable. Kernel preemptions off return from interrupt
  * occur there and call schedule directly.
  */
-asmlinkage void __sched preempt_schedule(void)
+asmlinkage void __sched notrace preempt_schedule(void)
 {
        struct thread_info *ti = current_thread_info();
 
@@ -3738,9 +3738,9 @@ asmlinkage void __sched preempt_schedule(void)
                return;
 
        do {
-               add_preempt_count(PREEMPT_ACTIVE);
+               add_preempt_count_notrace(PREEMPT_ACTIVE);
                schedule();
-               sub_preempt_count(PREEMPT_ACTIVE);
+               sub_preempt_count_notrace(PREEMPT_ACTIVE);
 
                /*
                 * Check again in case we missed a preemption opportunity
index 8b1797c..c7683fd 100644 (file)
@@ -194,15 +194,6 @@ config PREEMPT_TRACER
          enabled. This option and the irqs-off timing option can be
          used together or separately.)
 
-config SYSPROF_TRACER
-       bool "Sysprof Tracer"
-       depends on X86
-       select GENERIC_TRACER
-       select CONTEXT_SWITCH_TRACER
-       help
-         This tracer provides the trace needed by the 'Sysprof' userspace
-         tool.
-
 config SCHED_TRACER
        bool "Scheduling Latency Tracer"
        select GENERIC_TRACER
@@ -229,23 +220,6 @@ config FTRACE_SYSCALLS
        help
          Basic tracer to catch the syscall entry and exit events.
 
-config BOOT_TRACER
-       bool "Trace boot initcalls"
-       select GENERIC_TRACER
-       select CONTEXT_SWITCH_TRACER
-       help
-         This tracer helps developers to optimize boot times: it records
-         the timings of the initcalls and traces key events and the identity
-         of tasks that can cause boot delays, such as context-switches.
-
-         Its aim is to be parsed by the scripts/bootgraph.pl tool to
-         produce pretty graphics about boot inefficiencies, giving a visual
-         representation of the delays during initcalls - but the raw
-         /debug/tracing/trace text output is readable too.
-
-         You must pass in initcall_debug and ftrace=initcall to the kernel
-         command line to enable this on bootup.
-
 config TRACE_BRANCH_PROFILING
        bool
        select GENERIC_TRACER
@@ -325,28 +299,6 @@ config BRANCH_TRACER
 
          Say N if unsure.
 
-config KSYM_TRACER
-       bool "Trace read and write access on kernel memory locations"
-       depends on HAVE_HW_BREAKPOINT
-       select TRACING
-       help
-         This tracer helps find read and write operations on any given kernel
-         symbol i.e. /proc/kallsyms.
-
-config PROFILE_KSYM_TRACER
-       bool "Profile all kernel memory accesses on 'watched' variables"
-       depends on KSYM_TRACER
-       help
-         This tracer profiles kernel accesses on variables watched through the
-         ksym tracer ftrace plugin. Depending upon the hardware, all read
-         and write operations on kernel variables can be monitored for
-         accesses.
-
-         The results will be displayed in:
-         /debugfs/tracing/profile_ksym
-
-         Say N if unsure.
-
 config STACK_TRACER
        bool "Trace max stack"
        depends on HAVE_FUNCTION_TRACER
@@ -371,26 +323,6 @@ config STACK_TRACER
 
          Say N if unsure.
 
-config KMEMTRACE
-       bool "Trace SLAB allocations"
-       select GENERIC_TRACER
-       help
-         kmemtrace provides tracing for slab allocator functions, such as
-         kmalloc, kfree, kmem_cache_alloc, kmem_cache_free, etc. Collected
-         data is then fed to the userspace application in order to analyse
-         allocation hotspots, internal fragmentation and so on, making it
-         possible to see how well an allocator performs, as well as debug
-         and profile kernel code.
-
-         This requires an userspace application to use. See
-         Documentation/trace/kmemtrace.txt for more information.
-
-         Saying Y will make the kernel somewhat larger and slower. However,
-         if you disable kmemtrace at run-time or boot-time, the performance
-         impact is minimal (depending on the arch the kernel is built for).
-
-         If unsure, say N.
-
 config WORKQUEUE_TRACER
        bool "Trace workqueues"
        select GENERIC_TRACER
index ffb1a5b..438e84a 100644 (file)
@@ -30,7 +30,6 @@ obj-$(CONFIG_TRACING) += trace_output.o
 obj-$(CONFIG_TRACING) += trace_stat.o
 obj-$(CONFIG_TRACING) += trace_printk.o
 obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o
-obj-$(CONFIG_SYSPROF_TRACER) += trace_sysprof.o
 obj-$(CONFIG_FUNCTION_TRACER) += trace_functions.o
 obj-$(CONFIG_IRQSOFF_TRACER) += trace_irqsoff.o
 obj-$(CONFIG_PREEMPT_TRACER) += trace_irqsoff.o
@@ -38,10 +37,8 @@ obj-$(CONFIG_SCHED_TRACER) += trace_sched_wakeup.o
 obj-$(CONFIG_NOP_TRACER) += trace_nop.o
 obj-$(CONFIG_STACK_TRACER) += trace_stack.o
 obj-$(CONFIG_MMIOTRACE) += trace_mmiotrace.o
-obj-$(CONFIG_BOOT_TRACER) += trace_boot.o
 obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += trace_functions_graph.o
 obj-$(CONFIG_TRACE_BRANCH_PROFILING) += trace_branch.o
-obj-$(CONFIG_KMEMTRACE) += kmemtrace.o
 obj-$(CONFIG_WORKQUEUE_TRACER) += trace_workqueue.o
 obj-$(CONFIG_BLK_DEV_IO_TRACE) += blktrace.o
 ifeq ($(CONFIG_BLOCK),y)
@@ -55,7 +52,6 @@ obj-$(CONFIG_EVENT_TRACING) += trace_event_perf.o
 endif
 obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o
 obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o
-obj-$(CONFIG_KSYM_TRACER) += trace_ksym.o
 obj-$(CONFIG_EVENT_TRACING) += power-traces.o
 
 libftrace-y := ftrace.o
index 6d2cb14..0d88ce9 100644 (file)
@@ -1883,7 +1883,6 @@ function_trace_probe_call(unsigned long ip, unsigned long parent_ip)
        struct hlist_head *hhd;
        struct hlist_node *n;
        unsigned long key;
-       int resched;
 
        key = hash_long(ip, FTRACE_HASH_BITS);
 
@@ -1897,12 +1896,12 @@ function_trace_probe_call(unsigned long ip, unsigned long parent_ip)
         * period. This syncs the hash iteration and freeing of items
         * on the hash. rcu_read_lock is too dangerous here.
         */
-       resched = ftrace_preempt_disable();
+       preempt_disable_notrace();
        hlist_for_each_entry_rcu(entry, n, hhd, node) {
                if (entry->ip == ip)
                        entry->ops->func(ip, parent_ip, &entry->data);
        }
-       ftrace_preempt_enable(resched);
+       preempt_enable_notrace();
 }
 
 static struct ftrace_ops trace_probe_ops __read_mostly =
diff --git a/kernel/trace/kmemtrace.c b/kernel/trace/kmemtrace.c
deleted file mode 100644 (file)
index bbfc1bb..0000000
+++ /dev/null
@@ -1,529 +0,0 @@
-/*
- * Memory allocator tracing
- *
- * Copyright (C) 2008 Eduard - Gabriel Munteanu
- * Copyright (C) 2008 Pekka Enberg <penberg@cs.helsinki.fi>
- * Copyright (C) 2008 Frederic Weisbecker <fweisbec@gmail.com>
- */
-
-#include <linux/tracepoint.h>
-#include <linux/seq_file.h>
-#include <linux/debugfs.h>
-#include <linux/dcache.h>
-#include <linux/fs.h>
-
-#include <linux/kmemtrace.h>
-
-#include "trace_output.h"
-#include "trace.h"
-
-/* Select an alternative, minimalistic output than the original one */
-#define TRACE_KMEM_OPT_MINIMAL 0x1
-
-static struct tracer_opt kmem_opts[] = {
-       /* Default disable the minimalistic output */
-       { TRACER_OPT(kmem_minimalistic, TRACE_KMEM_OPT_MINIMAL) },
-       { }
-};
-
-static struct tracer_flags kmem_tracer_flags = {
-       .val                    = 0,
-       .opts                   = kmem_opts
-};
-
-static struct trace_array *kmemtrace_array;
-
-/* Trace allocations */
-static inline void kmemtrace_alloc(enum kmemtrace_type_id type_id,
-                                  unsigned long call_site,
-                                  const void *ptr,
-                                  size_t bytes_req,
-                                  size_t bytes_alloc,
-                                  gfp_t gfp_flags,
-                                  int node)
-{
-       struct ftrace_event_call *call = &event_kmem_alloc;
-       struct trace_array *tr = kmemtrace_array;
-       struct kmemtrace_alloc_entry *entry;
-       struct ring_buffer_event *event;
-
-       event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry));
-       if (!event)
-               return;
-
-       entry = ring_buffer_event_data(event);
-       tracing_generic_entry_update(&entry->ent, 0, 0);
-
-       entry->ent.type         = TRACE_KMEM_ALLOC;
-       entry->type_id          = type_id;
-       entry->call_site        = call_site;
-       entry->ptr              = ptr;
-       entry->bytes_req        = bytes_req;
-       entry->bytes_alloc      = bytes_alloc;
-       entry->gfp_flags        = gfp_flags;
-       entry->node             = node;
-
-       if (!filter_check_discard(call, entry, tr->buffer, event))
-               ring_buffer_unlock_commit(tr->buffer, event);
-
-       trace_wake_up();
-}
-
-static inline void kmemtrace_free(enum kmemtrace_type_id type_id,
-                                 unsigned long call_site,
-                                 const void *ptr)
-{
-       struct ftrace_event_call *call = &event_kmem_free;
-       struct trace_array *tr = kmemtrace_array;
-       struct kmemtrace_free_entry *entry;
-       struct ring_buffer_event *event;
-
-       event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry));
-       if (!event)
-               return;
-       entry   = ring_buffer_event_data(event);
-       tracing_generic_entry_update(&entry->ent, 0, 0);
-
-       entry->ent.type         = TRACE_KMEM_FREE;
-       entry->type_id          = type_id;
-       entry->call_site        = call_site;
-       entry->ptr              = ptr;
-
-       if (!filter_check_discard(call, entry, tr->buffer, event))
-               ring_buffer_unlock_commit(tr->buffer, event);
-
-       trace_wake_up();
-}
-
-static void kmemtrace_kmalloc(void *ignore,
-                             unsigned long call_site,
-                             const void *ptr,
-                             size_t bytes_req,
-                             size_t bytes_alloc,
-                             gfp_t gfp_flags)
-{
-       kmemtrace_alloc(KMEMTRACE_TYPE_KMALLOC, call_site, ptr,
-                       bytes_req, bytes_alloc, gfp_flags, -1);
-}
-
-static void kmemtrace_kmem_cache_alloc(void *ignore,
-                                      unsigned long call_site,
-                                      const void *ptr,
-                                      size_t bytes_req,
-                                      size_t bytes_alloc,
-                                      gfp_t gfp_flags)
-{
-       kmemtrace_alloc(KMEMTRACE_TYPE_CACHE, call_site, ptr,
-                       bytes_req, bytes_alloc, gfp_flags, -1);
-}
-
-static void kmemtrace_kmalloc_node(void *ignore,
-                                  unsigned long call_site,
-                                  const void *ptr,
-                                  size_t bytes_req,
-                                  size_t bytes_alloc,
-                                  gfp_t gfp_flags,
-                                  int node)
-{
-       kmemtrace_alloc(KMEMTRACE_TYPE_KMALLOC, call_site, ptr,
-                       bytes_req, bytes_alloc, gfp_flags, node);
-}
-
-static void kmemtrace_kmem_cache_alloc_node(void *ignore,
-                                           unsigned long call_site,
-                                           const void *ptr,
-                                           size_t bytes_req,
-                                           size_t bytes_alloc,
-                                           gfp_t gfp_flags,
-                                           int node)
-{
-       kmemtrace_alloc(KMEMTRACE_TYPE_CACHE, call_site, ptr,
-                       bytes_req, bytes_alloc, gfp_flags, node);
-}
-
-static void
-kmemtrace_kfree(void *ignore, unsigned long call_site, const void *ptr)
-{
-       kmemtrace_free(KMEMTRACE_TYPE_KMALLOC, call_site, ptr);
-}
-
-static void kmemtrace_kmem_cache_free(void *ignore,
-                                     unsigned long call_site, const void *ptr)
-{
-       kmemtrace_free(KMEMTRACE_TYPE_CACHE, call_site, ptr);
-}
-
-static int kmemtrace_start_probes(void)
-{
-       int err;
-
-       err = register_trace_kmalloc(kmemtrace_kmalloc, NULL);
-       if (err)
-               return err;
-       err = register_trace_kmem_cache_alloc(kmemtrace_kmem_cache_alloc, NULL);
-       if (err)
-               return err;
-       err = register_trace_kmalloc_node(kmemtrace_kmalloc_node, NULL);
-       if (err)
-               return err;
-       err = register_trace_kmem_cache_alloc_node(kmemtrace_kmem_cache_alloc_node, NULL);
-       if (err)
-               return err;
-       err = register_trace_kfree(kmemtrace_kfree, NULL);
-       if (err)
-               return err;
-       err = register_trace_kmem_cache_free(kmemtrace_kmem_cache_free, NULL);
-
-       return err;
-}
-
-static void kmemtrace_stop_probes(void)
-{
-       unregister_trace_kmalloc(kmemtrace_kmalloc, NULL);
-       unregister_trace_kmem_cache_alloc(kmemtrace_kmem_cache_alloc, NULL);
-       unregister_trace_kmalloc_node(kmemtrace_kmalloc_node, NULL);
-       unregister_trace_kmem_cache_alloc_node(kmemtrace_kmem_cache_alloc_node, NULL);
-       unregister_trace_kfree(kmemtrace_kfree, NULL);
-       unregister_trace_kmem_cache_free(kmemtrace_kmem_cache_free, NULL);
-}
-
-static int kmem_trace_init(struct trace_array *tr)
-{
-       kmemtrace_array = tr;
-
-       tracing_reset_online_cpus(tr);
-
-       kmemtrace_start_probes();
-
-       return 0;
-}
-
-static void kmem_trace_reset(struct trace_array *tr)
-{
-       kmemtrace_stop_probes();
-}
-
-static void kmemtrace_headers(struct seq_file *s)
-{
-       /* Don't need headers for the original kmemtrace output */
-       if (!(kmem_tracer_flags.val & TRACE_KMEM_OPT_MINIMAL))
-               return;
-
-       seq_printf(s, "#\n");
-       seq_printf(s, "# ALLOC  TYPE  REQ   GIVEN  FLAGS     "
-                       "      POINTER         NODE    CALLER\n");
-       seq_printf(s, "# FREE   |      |     |       |       "
-                       "       |   |            |        |\n");
-       seq_printf(s, "# |\n\n");
-}
-
-/*
- * The following functions give the original output from kmemtrace,
- * plus the origin CPU, since reordering occurs in-kernel now.
- */
-
-#define KMEMTRACE_USER_ALLOC   0
-#define KMEMTRACE_USER_FREE    1
-
-struct kmemtrace_user_event {
-       u8                      event_id;
-       u8                      type_id;
-       u16                     event_size;
-       u32                     cpu;
-       u64                     timestamp;
-       unsigned long           call_site;
-       unsigned long           ptr;
-};
-
-struct kmemtrace_user_event_alloc {
-       size_t                  bytes_req;
-       size_t                  bytes_alloc;
-       unsigned                gfp_flags;
-       int                     node;
-};
-
-static enum print_line_t
-kmemtrace_print_alloc(struct trace_iterator *iter, int flags,
-                     struct trace_event *event)
-{
-       struct trace_seq *s = &iter->seq;
-       struct kmemtrace_alloc_entry *entry;
-       int ret;
-
-       trace_assign_type(entry, iter->ent);
-
-       ret = trace_seq_printf(s, "type_id %d call_site %pF ptr %lu "
-           "bytes_req %lu bytes_alloc %lu gfp_flags %lu node %d\n",
-           entry->type_id, (void *)entry->call_site, (unsigned long)entry->ptr,
-           (unsigned long)entry->bytes_req, (unsigned long)entry->bytes_alloc,
-           (unsigned long)entry->gfp_flags, entry->node);
-
-       if (!ret)
-               return TRACE_TYPE_PARTIAL_LINE;
-       return TRACE_TYPE_HANDLED;
-}
-
-static enum print_line_t
-kmemtrace_print_free(struct trace_iterator *iter, int flags,
-                    struct trace_event *event)
-{
-       struct trace_seq *s = &iter->seq;
-       struct kmemtrace_free_entry *entry;
-       int ret;
-
-       trace_assign_type(entry, iter->ent);
-
-       ret = trace_seq_printf(s, "type_id %d call_site %pF ptr %lu\n",
-                              entry->type_id, (void *)entry->call_site,
-                              (unsigned long)entry->ptr);
-
-       if (!ret)
-               return TRACE_TYPE_PARTIAL_LINE;
-       return TRACE_TYPE_HANDLED;
-}
-
-static enum print_line_t
-kmemtrace_print_alloc_user(struct trace_iterator *iter, int flags,
-                          struct trace_event *event)
-{
-       struct trace_seq *s = &iter->seq;
-       struct kmemtrace_alloc_entry *entry;
-       struct kmemtrace_user_event *ev;
-       struct kmemtrace_user_event_alloc *ev_alloc;
-
-       trace_assign_type(entry, iter->ent);
-
-       ev = trace_seq_reserve(s, sizeof(*ev));
-       if (!ev)
-               return TRACE_TYPE_PARTIAL_LINE;
-
-       ev->event_id            = KMEMTRACE_USER_ALLOC;
-       ev->type_id             = entry->type_id;
-       ev->event_size          = sizeof(*ev) + sizeof(*ev_alloc);
-       ev->cpu                 = iter->cpu;
-       ev->timestamp           = iter->ts;
-       ev->call_site           = entry->call_site;
-       ev->ptr                 = (unsigned long)entry->ptr;
-
-       ev_alloc = trace_seq_reserve(s, sizeof(*ev_alloc));
-       if (!ev_alloc)
-               return TRACE_TYPE_PARTIAL_LINE;
-
-       ev_alloc->bytes_req     = entry->bytes_req;
-       ev_alloc->bytes_alloc   = entry->bytes_alloc;
-       ev_alloc->gfp_flags     = entry->gfp_flags;
-       ev_alloc->node          = entry->node;
-
-       return TRACE_TYPE_HANDLED;
-}
-
-static enum print_line_t
-kmemtrace_print_free_user(struct trace_iterator *iter, int flags,
-                         struct trace_event *event)
-{
-       struct trace_seq *s = &iter->seq;
-       struct kmemtrace_free_entry *entry;
-       struct kmemtrace_user_event *ev;
-
-       trace_assign_type(entry, iter->ent);
-
-       ev = trace_seq_reserve(s, sizeof(*ev));
-       if (!ev)
-               return TRACE_TYPE_PARTIAL_LINE;
-
-       ev->event_id            = KMEMTRACE_USER_FREE;
-       ev->type_id             = entry->type_id;
-       ev->event_size          = sizeof(*ev);
-       ev->cpu                 = iter->cpu;
-       ev->timestamp           = iter->ts;
-       ev->call_site           = entry->call_site;
-       ev->ptr                 = (unsigned long)entry->ptr;
-
-       return TRACE_TYPE_HANDLED;
-}
-
-/* The two other following provide a more minimalistic output */
-static enum print_line_t
-kmemtrace_print_alloc_compress(struct trace_iterator *iter)
-{
-       struct kmemtrace_alloc_entry *entry;
-       struct trace_seq *s = &iter->seq;
-       int ret;
-
-       trace_assign_type(entry, iter->ent);
-
-       /* Alloc entry */
-       ret = trace_seq_printf(s, "  +      ");
-       if (!ret)
-               return TRACE_TYPE_PARTIAL_LINE;
-
-       /* Type */
-       switch (entry->type_id) {
-       case KMEMTRACE_TYPE_KMALLOC:
-               ret = trace_seq_printf(s, "K   ");
-               break;
-       case KMEMTRACE_TYPE_CACHE:
-               ret = trace_seq_printf(s, "C   ");
-               break;
-       case KMEMTRACE_TYPE_PAGES:
-               ret = trace_seq_printf(s, "P   ");
-               break;
-       default:
-               ret = trace_seq_printf(s, "?   ");
-       }
-
-       if (!ret)
-               return TRACE_TYPE_PARTIAL_LINE;
-
-       /* Requested */
-       ret = trace_seq_printf(s, "%4zu   ", entry->bytes_req);
-       if (!ret)
-               return TRACE_TYPE_PARTIAL_LINE;
-
-       /* Allocated */
-       ret = trace_seq_printf(s, "%4zu   ", entry->bytes_alloc);
-       if (!ret)
-               return TRACE_TYPE_PARTIAL_LINE;
-
-       /* Flags
-        * TODO: would be better to see the name of the GFP flag names
-        */
-       ret = trace_seq_printf(s, "%08x   ", entry->gfp_flags);
-       if (!ret)
-               return TRACE_TYPE_PARTIAL_LINE;
-
-       /* Pointer to allocated */
-       ret = trace_seq_printf(s, "0x%tx   ", (ptrdiff_t)entry->ptr);
-       if (!ret)
-               return TRACE_TYPE_PARTIAL_LINE;
-
-       /* Node and call site*/
-       ret = trace_seq_printf(s, "%4d   %pf\n", entry->node,
-                                                (void *)entry->call_site);
-       if (!ret)
-               return TRACE_TYPE_PARTIAL_LINE;
-
-       return TRACE_TYPE_HANDLED;
-}
-
-static enum print_line_t
-kmemtrace_print_free_compress(struct trace_iterator *iter)
-{
-       struct kmemtrace_free_entry *entry;
-       struct trace_seq *s = &iter->seq;
-       int ret;
-
-       trace_assign_type(entry, iter->ent);
-
-       /* Free entry */
-       ret = trace_seq_printf(s, "  -      ");
-       if (!ret)
-               return TRACE_TYPE_PARTIAL_LINE;
-
-       /* Type */
-       switch (entry->type_id) {
-       case KMEMTRACE_TYPE_KMALLOC:
-               ret = trace_seq_printf(s, "K     ");
-               break;
-       case KMEMTRACE_TYPE_CACHE:
-               ret = trace_seq_printf(s, "C     ");
-               break;
-       case KMEMTRACE_TYPE_PAGES:
-               ret = trace_seq_printf(s, "P     ");
-               break;
-       default:
-               ret = trace_seq_printf(s, "?     ");
-       }
-
-       if (!ret)
-               return TRACE_TYPE_PARTIAL_LINE;
-
-       /* Skip requested/allocated/flags */
-       ret = trace_seq_printf(s, "                       ");
-       if (!ret)
-               return TRACE_TYPE_PARTIAL_LINE;
-
-       /* Pointer to allocated */
-       ret = trace_seq_printf(s, "0x%tx   ", (ptrdiff_t)entry->ptr);
-       if (!ret)
-               return TRACE_TYPE_PARTIAL_LINE;
-
-       /* Skip node and print call site*/
-       ret = trace_seq_printf(s, "       %pf\n", (void *)entry->call_site);
-       if (!ret)
-               return TRACE_TYPE_PARTIAL_LINE;
-
-       return TRACE_TYPE_HANDLED;
-}
-
-static enum print_line_t kmemtrace_print_line(struct trace_iterator *iter)
-{
-       struct trace_entry *entry = iter->ent;
-
-       if (!(kmem_tracer_flags.val & TRACE_KMEM_OPT_MINIMAL))
-               return TRACE_TYPE_UNHANDLED;
-
-       switch (entry->type) {
-       case TRACE_KMEM_ALLOC:
-               return kmemtrace_print_alloc_compress(iter);
-       case TRACE_KMEM_FREE:
-               return kmemtrace_print_free_compress(iter);
-       default:
-               return TRACE_TYPE_UNHANDLED;
-       }
-}
-
-static struct trace_event_functions kmem_trace_alloc_funcs = {
-       .trace                  = kmemtrace_print_alloc,
-       .binary                 = kmemtrace_print_alloc_user,
-};
-
-static struct trace_event kmem_trace_alloc = {
-       .type                   = TRACE_KMEM_ALLOC,
-       .funcs                  = &kmem_trace_alloc_funcs,
-};
-
-static struct trace_event_functions kmem_trace_free_funcs = {
-       .trace                  = kmemtrace_print_free,
-       .binary                 = kmemtrace_print_free_user,
-};
-
-static struct trace_event kmem_trace_free = {
-       .type                   = TRACE_KMEM_FREE,
-       .funcs                  = &kmem_trace_free_funcs,
-};
-
-static struct tracer kmem_tracer __read_mostly = {
-       .name                   = "kmemtrace",
-       .init                   = kmem_trace_init,
-       .reset                  = kmem_trace_reset,
-       .print_line             = kmemtrace_print_line,
-       .print_header           = kmemtrace_headers,
-       .flags                  = &kmem_tracer_flags
-};
-
-void kmemtrace_init(void)
-{
-       /* earliest opportunity to start kmem tracing */
-}
-
-static int __init init_kmem_tracer(void)
-{
-       if (!register_ftrace_event(&kmem_trace_alloc)) {
-               pr_warning("Warning: could not register kmem events\n");
-               return 1;
-       }
-
-       if (!register_ftrace_event(&kmem_trace_free)) {
-               pr_warning("Warning: could not register kmem events\n");
-               return 1;
-       }
-
-       if (register_tracer(&kmem_tracer) != 0) {
-               pr_warning("Warning: could not register the kmem tracer\n");
-               return 1;
-       }
-
-       return 0;
-}
-device_initcall(init_kmem_tracer);
index 1da7b6e..3632ce8 100644 (file)
@@ -443,6 +443,7 @@ int ring_buffer_print_page_header(struct trace_seq *s)
  */
 struct ring_buffer_per_cpu {
        int                             cpu;
+       atomic_t                        record_disabled;
        struct ring_buffer              *buffer;
        spinlock_t                      reader_lock;    /* serialize readers */
        arch_spinlock_t                 lock;
@@ -462,7 +463,6 @@ struct ring_buffer_per_cpu {
        unsigned long                   read;
        u64                             write_stamp;
        u64                             read_stamp;
-       atomic_t                        record_disabled;
 };
 
 struct ring_buffer {
@@ -2242,8 +2242,6 @@ static void trace_recursive_unlock(void)
 
 #endif
 
-static DEFINE_PER_CPU(int, rb_need_resched);
-
 /**
  * ring_buffer_lock_reserve - reserve a part of the buffer
  * @buffer: the ring buffer to reserve from
@@ -2264,13 +2262,13 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long length)
 {
        struct ring_buffer_per_cpu *cpu_buffer;
        struct ring_buffer_event *event;
-       int cpu, resched;
+       int cpu;
 
        if (ring_buffer_flags != RB_BUFFERS_ON)
                return NULL;
 
        /* If we are tracing schedule, we don't want to recurse */
-       resched = ftrace_preempt_disable();
+       preempt_disable_notrace();
 
        if (atomic_read(&buffer->record_disabled))
                goto out_nocheck;
@@ -2295,21 +2293,13 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long length)
        if (!event)
                goto out;
 
-       /*
-        * Need to store resched state on this cpu.
-        * Only the first needs to.
-        */
-
-       if (preempt_count() == 1)
-               per_cpu(rb_need_resched, cpu) = resched;
-
        return event;
 
  out:
        trace_recursive_unlock();
 
  out_nocheck:
-       ftrace_preempt_enable(resched);
+       preempt_enable_notrace();
        return NULL;
 }
 EXPORT_SYMBOL_GPL(ring_buffer_lock_reserve);
@@ -2355,13 +2345,7 @@ int ring_buffer_unlock_commit(struct ring_buffer *buffer,
 
        trace_recursive_unlock();
 
-       /*
-        * Only the last preempt count needs to restore preemption.
-        */
-       if (preempt_count() == 1)
-               ftrace_preempt_enable(per_cpu(rb_need_resched, cpu));
-       else
-               preempt_enable_no_resched_notrace();
+       preempt_enable_notrace();
 
        return 0;
 }
@@ -2469,13 +2453,7 @@ void ring_buffer_discard_commit(struct ring_buffer *buffer,
 
        trace_recursive_unlock();
 
-       /*
-        * Only the last preempt count needs to restore preemption.
-        */
-       if (preempt_count() == 1)
-               ftrace_preempt_enable(per_cpu(rb_need_resched, cpu));
-       else
-               preempt_enable_no_resched_notrace();
+       preempt_enable_notrace();
 
 }
 EXPORT_SYMBOL_GPL(ring_buffer_discard_commit);
@@ -2501,12 +2479,12 @@ int ring_buffer_write(struct ring_buffer *buffer,
        struct ring_buffer_event *event;
        void *body;
        int ret = -EBUSY;
-       int cpu, resched;
+       int cpu;
 
        if (ring_buffer_flags != RB_BUFFERS_ON)
                return -EBUSY;
 
-       resched = ftrace_preempt_disable();
+       preempt_disable_notrace();
 
        if (atomic_read(&buffer->record_disabled))
                goto out;
@@ -2536,7 +2514,7 @@ int ring_buffer_write(struct ring_buffer *buffer,
 
        ret = 0;
  out:
-       ftrace_preempt_enable(resched);
+       preempt_enable_notrace();
 
        return ret;
 }
index 086d363..4b1122d 100644 (file)
@@ -344,7 +344,7 @@ static DECLARE_WAIT_QUEUE_HEAD(trace_wait);
 /* trace_flags holds trace_options default values */
 unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |
        TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME |
-       TRACE_ITER_GRAPH_TIME;
+       TRACE_ITER_GRAPH_TIME | TRACE_ITER_RECORD_CMD;
 
 static int trace_stop_count;
 static DEFINE_SPINLOCK(tracing_start_lock);
@@ -428,6 +428,7 @@ static const char *trace_options[] = {
        "latency-format",
        "sleep-time",
        "graph-time",
+       "record-cmd",
        NULL
 };
 
@@ -659,6 +660,10 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
                return;
 
        WARN_ON_ONCE(!irqs_disabled());
+       if (!current_trace->use_max_tr) {
+               WARN_ON_ONCE(1);
+               return;
+       }
        arch_spin_lock(&ftrace_max_lock);
 
        tr->buffer = max_tr.buffer;
@@ -685,6 +690,11 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
                return;
 
        WARN_ON_ONCE(!irqs_disabled());
+       if (!current_trace->use_max_tr) {
+               WARN_ON_ONCE(1);
+               return;
+       }
+
        arch_spin_lock(&ftrace_max_lock);
 
        ftrace_disable_cpu();
@@ -729,7 +739,7 @@ __acquires(kernel_lock)
                return -1;
        }
 
-       if (strlen(type->name) > MAX_TRACER_SIZE) {
+       if (strlen(type->name) >= MAX_TRACER_SIZE) {
                pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
                return -1;
        }
@@ -1331,61 +1341,6 @@ static void __trace_userstack(struct trace_array *tr, unsigned long flags)
 
 #endif /* CONFIG_STACKTRACE */
 
-static void
-ftrace_trace_special(void *__tr,
-                    unsigned long arg1, unsigned long arg2, unsigned long arg3,
-                    int pc)
-{
-       struct ftrace_event_call *call = &event_special;
-       struct ring_buffer_event *event;
-       struct trace_array *tr = __tr;
-       struct ring_buffer *buffer = tr->buffer;
-       struct special_entry *entry;
-
-       event = trace_buffer_lock_reserve(buffer, TRACE_SPECIAL,
-                                         sizeof(*entry), 0, pc);
-       if (!event)
-               return;
-       entry   = ring_buffer_event_data(event);
-       entry->arg1                     = arg1;
-       entry->arg2                     = arg2;
-       entry->arg3                     = arg3;
-
-       if (!filter_check_discard(call, entry, buffer, event))
-               trace_buffer_unlock_commit(buffer, event, 0, pc);
-}
-
-void
-__trace_special(void *__tr, void *__data,
-               unsigned long arg1, unsigned long arg2, unsigned long arg3)
-{
-       ftrace_trace_special(__tr, arg1, arg2, arg3, preempt_count());
-}
-
-void
-ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3)
-{
-       struct trace_array *tr = &global_trace;
-       struct trace_array_cpu *data;
-       unsigned long flags;
-       int cpu;
-       int pc;
-
-       if (tracing_disabled)
-               return;
-
-       pc = preempt_count();
-       local_irq_save(flags);
-       cpu = raw_smp_processor_id();
-       data = tr->data[cpu];
-
-       if (likely(atomic_inc_return(&data->disabled) == 1))
-               ftrace_trace_special(tr, arg1, arg2, arg3, pc);
-
-       atomic_dec(&data->disabled);
-       local_irq_restore(flags);
-}
-
 /**
  * trace_vbprintk - write binary msg to tracing buffer
  *
@@ -1404,7 +1359,6 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
        struct bprint_entry *entry;
        unsigned long flags;
        int disable;
-       int resched;
        int cpu, len = 0, size, pc;
 
        if (unlikely(tracing_selftest_running || tracing_disabled))
@@ -1414,7 +1368,7 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
        pause_graph_tracing();
 
        pc = preempt_count();
-       resched = ftrace_preempt_disable();
+       preempt_disable_notrace();
        cpu = raw_smp_processor_id();
        data = tr->data[cpu];
 
@@ -1452,7 +1406,7 @@ out_unlock:
 
 out:
        atomic_dec_return(&data->disabled);
-       ftrace_preempt_enable(resched);
+       preempt_enable_notrace();
        unpause_graph_tracing();
 
        return len;
@@ -2394,6 +2348,7 @@ static const struct file_operations show_traces_fops = {
        .open           = show_traces_open,
        .read           = seq_read,
        .release        = seq_release,
+       .llseek         = seq_lseek,
 };
 
 /*
@@ -2487,6 +2442,7 @@ static const struct file_operations tracing_cpumask_fops = {
        .open           = tracing_open_generic,
        .read           = tracing_cpumask_read,
        .write          = tracing_cpumask_write,
+       .llseek         = generic_file_llseek,
 };
 
 static int tracing_trace_options_show(struct seq_file *m, void *v)
@@ -2562,6 +2518,9 @@ static void set_tracer_flags(unsigned int mask, int enabled)
                trace_flags |= mask;
        else
                trace_flags &= ~mask;
+
+       if (mask == TRACE_ITER_RECORD_CMD)
+               trace_event_enable_cmd_record(enabled);
 }
 
 static ssize_t
@@ -2653,6 +2612,7 @@ tracing_readme_read(struct file *filp, char __user *ubuf,
 static const struct file_operations tracing_readme_fops = {
        .open           = tracing_open_generic,
        .read           = tracing_readme_read,
+       .llseek         = generic_file_llseek,
 };
 
 static ssize_t
@@ -2703,6 +2663,7 @@ tracing_saved_cmdlines_read(struct file *file, char __user *ubuf,
 static const struct file_operations tracing_saved_cmdlines_fops = {
     .open       = tracing_open_generic,
     .read       = tracing_saved_cmdlines_read,
+    .llseek    = generic_file_llseek,
 };
 
 static ssize_t
@@ -2798,6 +2759,9 @@ static int tracing_resize_ring_buffer(unsigned long size)
        if (ret < 0)
                return ret;
 
+       if (!current_trace->use_max_tr)
+               goto out;
+
        ret = ring_buffer_resize(max_tr.buffer, size);
        if (ret < 0) {
                int r;
@@ -2825,11 +2789,14 @@ static int tracing_resize_ring_buffer(unsigned long size)
                return ret;
        }
 
+       max_tr.entries = size;
+ out:
        global_trace.entries = size;
 
        return ret;
 }
 
+
 /**
  * tracing_update_buffers - used by tracing facility to expand ring buffers
  *
@@ -2890,12 +2857,26 @@ static int tracing_set_tracer(const char *buf)
        trace_branch_disable();
        if (current_trace && current_trace->reset)
                current_trace->reset(tr);
-
+       if (current_trace && current_trace->use_max_tr) {
+               /*
+                * We don't free the ring buffer. instead, resize it because
+                * The max_tr ring buffer has some state (e.g. ring->clock) and
+                * we want preserve it.
+                */
+               ring_buffer_resize(max_tr.buffer, 1);
+               max_tr.entries = 1;
+       }
        destroy_trace_option_files(topts);
 
        current_trace = t;
 
        topts = create_trace_option_files(current_trace);
+       if (current_trace->use_max_tr) {
+               ret = ring_buffer_resize(max_tr.buffer, global_trace.entries);
+               if (ret < 0)
+                       goto out;
+               max_tr.entries = global_trace.entries;
+       }
 
        if (t->init) {
                ret = tracer_init(t, tr);
@@ -3032,6 +3013,7 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp)
        if (iter->trace->pipe_open)
                iter->trace->pipe_open(iter);
 
+       nonseekable_open(inode, filp);
 out:
        mutex_unlock(&trace_types_lock);
        return ret;
@@ -3477,7 +3459,6 @@ tracing_entries_write(struct file *filp, const char __user *ubuf,
        }
 
        tracing_start();
-       max_tr.entries = global_trace.entries;
        mutex_unlock(&trace_types_lock);
 
        return cnt;
@@ -3590,18 +3571,21 @@ static const struct file_operations tracing_max_lat_fops = {
        .open           = tracing_open_generic,
        .read           = tracing_max_lat_read,
        .write          = tracing_max_lat_write,
+       .llseek         = generic_file_llseek,
 };
 
 static const struct file_operations tracing_ctrl_fops = {
        .open           = tracing_open_generic,
        .read           = tracing_ctrl_read,
        .write          = tracing_ctrl_write,
+       .llseek         = generic_file_llseek,
 };
 
 static const struct file_operations set_tracer_fops = {
        .open           = tracing_open_generic,
        .read           = tracing_set_trace_read,
        .write          = tracing_set_trace_write,
+       .llseek         = generic_file_llseek,
 };
 
 static const struct file_operations tracing_pipe_fops = {
@@ -3610,17 +3594,20 @@ static const struct file_operations tracing_pipe_fops = {
        .read           = tracing_read_pipe,
        .splice_read    = tracing_splice_read_pipe,
        .release        = tracing_release_pipe,
+       .llseek         = no_llseek,
 };
 
 static const struct file_operations tracing_entries_fops = {
        .open           = tracing_open_generic,
        .read           = tracing_entries_read,
        .write          = tracing_entries_write,
+       .llseek         = generic_file_llseek,
 };
 
 static const struct file_operations tracing_mark_fops = {
        .open           = tracing_open_generic,
        .write          = tracing_mark_write,
+       .llseek         = generic_file_llseek,
 };
 
 static const struct file_operations trace_clock_fops = {
@@ -3926,6 +3913,7 @@ tracing_stats_read(struct file *filp, char __user *ubuf,
 static const struct file_operations tracing_stats_fops = {
        .open           = tracing_open_generic,
        .read           = tracing_stats_read,
+       .llseek         = generic_file_llseek,
 };
 
 #ifdef CONFIG_DYNAMIC_FTRACE
@@ -3962,6 +3950,7 @@ tracing_read_dyn_info(struct file *filp, char __user *ubuf,
 static const struct file_operations tracing_dyn_info_fops = {
        .open           = tracing_open_generic,
        .read           = tracing_read_dyn_info,
+       .llseek         = generic_file_llseek,
 };
 #endif
 
@@ -4115,6 +4104,7 @@ static const struct file_operations trace_options_fops = {
        .open = tracing_open_generic,
        .read = trace_options_read,
        .write = trace_options_write,
+       .llseek = generic_file_llseek,
 };
 
 static ssize_t
@@ -4166,6 +4156,7 @@ static const struct file_operations trace_options_core_fops = {
        .open = tracing_open_generic,
        .read = trace_options_core_read,
        .write = trace_options_core_write,
+       .llseek = generic_file_llseek,
 };
 
 struct dentry *trace_create_file(const char *name,
@@ -4355,9 +4346,6 @@ static __init int tracer_init_debugfs(void)
        trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
                        &ftrace_update_tot_cnt, &tracing_dyn_info_fops);
 #endif
-#ifdef CONFIG_SYSPROF_TRACER
-       init_tracer_sysprof_debugfs(d_tracer);
-#endif
 
        create_trace_options_dir();
 
@@ -4575,16 +4563,14 @@ __init static int tracer_alloc_buffers(void)
 
 
 #ifdef CONFIG_TRACER_MAX_TRACE
-       max_tr.buffer = ring_buffer_alloc(ring_buf_size,
-                                            TRACE_BUFFER_FLAGS);
+       max_tr.buffer = ring_buffer_alloc(1, TRACE_BUFFER_FLAGS);
        if (!max_tr.buffer) {
                printk(KERN_ERR "tracer: failed to allocate max ring buffer!\n");
                WARN_ON(1);
                ring_buffer_free(global_trace.buffer);
                goto out_free_cpumask;
        }
-       max_tr.entries = ring_buffer_size(max_tr.buffer);
-       WARN_ON(max_tr.entries != global_trace.entries);
+       max_tr.entries = 1;
 #endif
 
        /* Allocate the first page for all buffers */
@@ -4597,9 +4583,6 @@ __init static int tracer_alloc_buffers(void)
 
        register_tracer(&nop_trace);
        current_trace = &nop_trace;
-#ifdef CONFIG_BOOT_TRACER
-       register_tracer(&boot_tracer);
-#endif
        /* All seems OK, enable tracing */
        tracing_disabled = 0;
 
index 2cd9639..d05c873 100644 (file)
@@ -9,10 +9,7 @@
 #include <linux/mmiotrace.h>
 #include <linux/tracepoint.h>
 #include <linux/ftrace.h>
-#include <trace/boot.h>
-#include <linux/kmemtrace.h>
 #include <linux/hw_breakpoint.h>
-
 #include <linux/trace_seq.h>
 #include <linux/ftrace_event.h>
 
@@ -25,30 +22,17 @@ enum trace_type {
        TRACE_STACK,
        TRACE_PRINT,
        TRACE_BPRINT,
-       TRACE_SPECIAL,
        TRACE_MMIO_RW,
        TRACE_MMIO_MAP,
        TRACE_BRANCH,
-       TRACE_BOOT_CALL,
-       TRACE_BOOT_RET,
        TRACE_GRAPH_RET,
        TRACE_GRAPH_ENT,
        TRACE_USER_STACK,
-       TRACE_KMEM_ALLOC,
-       TRACE_KMEM_FREE,
        TRACE_BLK,
-       TRACE_KSYM,
 
        __TRACE_LAST_TYPE,
 };
 
-enum kmemtrace_type_id {
-       KMEMTRACE_TYPE_KMALLOC = 0,     /* kmalloc() or kfree(). */
-       KMEMTRACE_TYPE_CACHE,           /* kmem_cache_*(). */
-       KMEMTRACE_TYPE_PAGES,           /* __get_free_pages() and friends. */
-};
-
-extern struct tracer boot_tracer;
 
 #undef __field
 #define __field(type, item)            type    item;
@@ -204,23 +188,15 @@ extern void __ftrace_bad_type(void);
                IF_ASSIGN(var, ent, struct userstack_entry, TRACE_USER_STACK);\
                IF_ASSIGN(var, ent, struct print_entry, TRACE_PRINT);   \
                IF_ASSIGN(var, ent, struct bprint_entry, TRACE_BPRINT); \
-               IF_ASSIGN(var, ent, struct special_entry, 0);           \
                IF_ASSIGN(var, ent, struct trace_mmiotrace_rw,          \
                          TRACE_MMIO_RW);                               \
                IF_ASSIGN(var, ent, struct trace_mmiotrace_map,         \
                          TRACE_MMIO_MAP);                              \
-               IF_ASSIGN(var, ent, struct trace_boot_call, TRACE_BOOT_CALL);\
-               IF_ASSIGN(var, ent, struct trace_boot_ret, TRACE_BOOT_RET);\
                IF_ASSIGN(var, ent, struct trace_branch, TRACE_BRANCH); \
                IF_ASSIGN(var, ent, struct ftrace_graph_ent_entry,      \
                          TRACE_GRAPH_ENT);             \
                IF_ASSIGN(var, ent, struct ftrace_graph_ret_entry,      \
                          TRACE_GRAPH_RET);             \
-               IF_ASSIGN(var, ent, struct kmemtrace_alloc_entry,       \
-                         TRACE_KMEM_ALLOC);    \
-               IF_ASSIGN(var, ent, struct kmemtrace_free_entry,        \
-                         TRACE_KMEM_FREE);     \
-               IF_ASSIGN(var, ent, struct ksym_trace_entry, TRACE_KSYM);\
                __ftrace_bad_type();                                    \
        } while (0)
 
@@ -298,6 +274,7 @@ struct tracer {
        struct tracer           *next;
        int                     print_max;
        struct tracer_flags     *flags;
+       int                     use_max_tr;
 };
 
 
@@ -318,7 +295,6 @@ struct dentry *trace_create_file(const char *name,
                                 const struct file_operations *fops);
 
 struct dentry *tracing_init_dentry(void);
-void init_tracer_sysprof_debugfs(struct dentry *d_tracer);
 
 struct ring_buffer_event;
 
@@ -355,11 +331,6 @@ void tracing_sched_wakeup_trace(struct trace_array *tr,
                                struct task_struct *wakee,
                                struct task_struct *cur,
                                unsigned long flags, int pc);
-void trace_special(struct trace_array *tr,
-                  struct trace_array_cpu *data,
-                  unsigned long arg1,
-                  unsigned long arg2,
-                  unsigned long arg3, int pc);
 void trace_function(struct trace_array *tr,
                    unsigned long ip,
                    unsigned long parent_ip,
@@ -381,8 +352,6 @@ int register_tracer(struct tracer *type);
 void unregister_tracer(struct tracer *type);
 int is_tracing_stopped(void);
 
-extern int process_new_ksym_entry(char *ksymname, int op, unsigned long addr);
-
 extern unsigned long nsecs_to_usecs(unsigned long nsecs);
 
 extern unsigned long tracing_thresh;
@@ -452,12 +421,8 @@ extern int trace_selftest_startup_nop(struct tracer *trace,
                                         struct trace_array *tr);
 extern int trace_selftest_startup_sched_switch(struct tracer *trace,
                                               struct trace_array *tr);
-extern int trace_selftest_startup_sysprof(struct tracer *trace,
-                                              struct trace_array *tr);
 extern int trace_selftest_startup_branch(struct tracer *trace,
                                         struct trace_array *tr);
-extern int trace_selftest_startup_ksym(struct tracer *trace,
-                                        struct trace_array *tr);
 #endif /* CONFIG_FTRACE_STARTUP_TEST */
 
 extern void *head_page(struct trace_array_cpu *data);
@@ -617,6 +582,7 @@ enum trace_iterator_flags {
        TRACE_ITER_LATENCY_FMT          = 0x20000,
        TRACE_ITER_SLEEP_TIME           = 0x40000,
        TRACE_ITER_GRAPH_TIME           = 0x80000,
+       TRACE_ITER_RECORD_CMD           = 0x100000,
 };
 
 /*
@@ -628,54 +594,6 @@ enum trace_iterator_flags {
 
 extern struct tracer nop_trace;
 
-/**
- * ftrace_preempt_disable - disable preemption scheduler safe
- *
- * When tracing can happen inside the scheduler, there exists
- * cases that the tracing might happen before the need_resched
- * flag is checked. If this happens and the tracer calls
- * preempt_enable (after a disable), a schedule might take place
- * causing an infinite recursion.
- *
- * To prevent this, we read the need_resched flag before
- * disabling preemption. When we want to enable preemption we
- * check the flag, if it is set, then we call preempt_enable_no_resched.
- * Otherwise, we call preempt_enable.
- *
- * The rational for doing the above is that if need_resched is set
- * and we have yet to reschedule, we are either in an atomic location
- * (where we do not need to check for scheduling) or we are inside
- * the scheduler and do not want to resched.
- */
-static inline int ftrace_preempt_disable(void)
-{
-       int resched;
-
-       resched = need_resched();
-       preempt_disable_notrace();
-
-       return resched;
-}
-
-/**
- * ftrace_preempt_enable - enable preemption scheduler safe
- * @resched: the return value from ftrace_preempt_disable
- *
- * This is a scheduler safe way to enable preemption and not miss
- * any preemption checks. The disabled saved the state of preemption.
- * If resched is set, then we are either inside an atomic or
- * are inside the scheduler (we would have already scheduled
- * otherwise). In this case, we do not want to call normal
- * preempt_enable, but preempt_enable_no_resched instead.
- */
-static inline void ftrace_preempt_enable(int resched)
-{
-       if (resched)
-               preempt_enable_no_resched_notrace();
-       else
-               preempt_enable_notrace();
-}
-
 #ifdef CONFIG_BRANCH_TRACER
 extern int enable_branch_tracing(struct trace_array *tr);
 extern void disable_branch_tracing(void);
@@ -766,6 +684,8 @@ struct filter_pred {
        int                     pop_n;
 };
 
+extern struct list_head ftrace_common_fields;
+
 extern enum regex_type
 filter_parse_regex(char *buff, int len, char **search, int *not);
 extern void print_event_filter(struct ftrace_event_call *call,
@@ -795,6 +715,8 @@ filter_check_discard(struct ftrace_event_call *call, void *rec,
        return 0;
 }
 
+extern void trace_event_enable_cmd_record(bool enable);
+
 extern struct mutex event_mutex;
 extern struct list_head ftrace_events;
 
diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c
deleted file mode 100644 (file)
index c21d5f3..0000000
+++ /dev/null
@@ -1,185 +0,0 @@
-/*
- * ring buffer based initcalls tracer
- *
- * Copyright (C) 2008 Frederic Weisbecker <fweisbec@gmail.com>
- *
- */
-
-#include <linux/init.h>
-#include <linux/debugfs.h>
-#include <linux/ftrace.h>
-#include <linux/kallsyms.h>
-#include <linux/time.h>
-
-#include "trace.h"
-#include "trace_output.h"
-
-static struct trace_array *boot_trace;
-static bool pre_initcalls_finished;
-
-/* Tells the boot tracer that the pre_smp_initcalls are finished.
- * So we are ready .
- * It doesn't enable sched events tracing however.
- * You have to call enable_boot_trace to do so.
- */
-void start_boot_trace(void)
-{
-       pre_initcalls_finished = true;
-}
-
-void enable_boot_trace(void)
-{
-       if (boot_trace && pre_initcalls_finished)
-               tracing_start_sched_switch_record();
-}
-
-void disable_boot_trace(void)
-{
-       if (boot_trace && pre_initcalls_finished)
-               tracing_stop_sched_switch_record();
-}
-
-static int boot_trace_init(struct trace_array *tr)
-{
-       boot_trace = tr;
-
-       if (!tr)
-               return 0;
-
-       tracing_reset_online_cpus(tr);
-
-       tracing_sched_switch_assign_trace(tr);
-       return 0;
-}
-
-static enum print_line_t
-initcall_call_print_line(struct trace_iterator *iter)
-{
-       struct trace_entry *entry = iter->ent;
-       struct trace_seq *s = &iter->seq;
-       struct trace_boot_call *field;
-       struct boot_trace_call *call;
-       u64 ts;
-       unsigned long nsec_rem;
-       int ret;
-
-       trace_assign_type(field, entry);
-       call = &field->boot_call;
-       ts = iter->ts;
-       nsec_rem = do_div(ts, NSEC_PER_SEC);
-
-       ret = trace_seq_printf(s, "[%5ld.%09ld] calling  %s @ %i\n",
-                       (unsigned long)ts, nsec_rem, call->func, call->caller);
-
-       if (!ret)
-               return TRACE_TYPE_PARTIAL_LINE;
-       else
-               return TRACE_TYPE_HANDLED;
-}
-
-static enum print_line_t
-initcall_ret_print_line(struct trace_iterator *iter)
-{
-       struct trace_entry *entry = iter->ent;
-       struct trace_seq *s = &iter->seq;
-       struct trace_boot_ret *field;
-       struct boot_trace_ret *init_ret;
-       u64 ts;
-       unsigned long nsec_rem;
-       int ret;
-
-       trace_assign_type(field, entry);
-       init_ret = &field->boot_ret;
-       ts = iter->ts;
-       nsec_rem = do_div(ts, NSEC_PER_SEC);
-
-       ret = trace_seq_printf(s, "[%5ld.%09ld] initcall %s "
-                       "returned %d after %llu msecs\n",
-                       (unsigned long) ts,
-                       nsec_rem,
-                       init_ret->func, init_ret->result, init_ret->duration);
-
-       if (!ret)
-               return TRACE_TYPE_PARTIAL_LINE;
-       else
-               return TRACE_TYPE_HANDLED;
-}
-
-static enum print_line_t initcall_print_line(struct trace_iterator *iter)
-{
-       struct trace_entry *entry = iter->ent;
-
-       switch (entry->type) {
-       case TRACE_BOOT_CALL:
-               return initcall_call_print_line(iter);
-       case TRACE_BOOT_RET:
-               return initcall_ret_print_line(iter);
-       default:
-               return TRACE_TYPE_UNHANDLED;
-       }
-}
-
-struct tracer boot_tracer __read_mostly =
-{
-       .name           = "initcall",
-       .init           = boot_trace_init,
-       .reset          = tracing_reset_online_cpus,
-       .print_line     = initcall_print_line,
-};
-
-void trace_boot_call(struct boot_trace_call *bt, initcall_t fn)
-{
-       struct ftrace_event_call *call = &event_boot_call;
-       struct ring_buffer_event *event;
-       struct ring_buffer *buffer;
-       struct trace_boot_call *entry;
-       struct trace_array *tr = boot_trace;
-
-       if (!tr || !pre_initcalls_finished)
-               return;
-
-       /* Get its name now since this function could
-        * disappear because it is in the .init section.
-        */
-       sprint_symbol(bt->func, (unsigned long)fn);
-       preempt_disable();
-
-       buffer = tr->buffer;
-       event = trace_buffer_lock_reserve(buffer, TRACE_BOOT_CALL,
-                                         sizeof(*entry), 0, 0);
-       if (!event)
-               goto out;
-       entry   = ring_buffer_event_data(event);
-       entry->boot_call = *bt;
-       if (!filter_check_discard(call, entry, buffer, event))
-               trace_buffer_unlock_commit(buffer, event, 0, 0);
- out:
-       preempt_enable();
-}
-
-void trace_boot_ret(struct boot_trace_ret *bt, initcall_t fn)
-{
-       struct ftrace_event_call *call = &event_boot_ret;
-       struct ring_buffer_event *event;
-       struct ring_buffer *buffer;
-       struct trace_boot_ret *entry;
-       struct trace_array *tr = boot_trace;
-
-       if (!tr || !pre_initcalls_finished)
-               return;
-
-       sprint_symbol(bt->func, (unsigned long)fn);
-       preempt_disable();
-
-       buffer = tr->buffer;
-       event = trace_buffer_lock_reserve(buffer, TRACE_BOOT_RET,
-                                         sizeof(*entry), 0, 0);
-       if (!event)
-               goto out;
-       entry   = ring_buffer_event_data(event);
-       entry->boot_ret = *bt;
-       if (!filter_check_discard(call, entry, buffer, event))
-               trace_buffer_unlock_commit(buffer, event, 0, 0);
- out:
-       preempt_enable();
-}
index 9d589d8..52fda6c 100644 (file)
 u64 notrace trace_clock_local(void)
 {
        u64 clock;
-       int resched;
 
        /*
         * sched_clock() is an architecture implemented, fast, scalable,
         * lockless clock. It is not guaranteed to be coherent across
         * CPUs, nor across CPU idle events.
         */
-       resched = ftrace_preempt_disable();
+       preempt_disable_notrace();
        clock = sched_clock();
-       ftrace_preempt_enable(resched);
+       preempt_enable_notrace();
 
        return clock;
 }
index dc008c1..e3dfeca 100644 (file)
@@ -151,23 +151,6 @@ FTRACE_ENTRY_DUP(wakeup, ctx_switch_entry,
 );
 
 /*
- * Special (free-form) trace entry:
- */
-FTRACE_ENTRY(special, special_entry,
-
-       TRACE_SPECIAL,
-
-       F_STRUCT(
-               __field(        unsigned long,  arg1    )
-               __field(        unsigned long,  arg2    )
-               __field(        unsigned long,  arg3    )
-       ),
-
-       F_printk("(%08lx) (%08lx) (%08lx)",
-                __entry->arg1, __entry->arg2, __entry->arg3)
-);
-
-/*
  * Stack-trace entry:
  */
 
@@ -271,33 +254,6 @@ FTRACE_ENTRY(mmiotrace_map, trace_mmiotrace_map,
                 __entry->map_id, __entry->opcode)
 );
 
-FTRACE_ENTRY(boot_call, trace_boot_call,
-
-       TRACE_BOOT_CALL,
-
-       F_STRUCT(
-               __field_struct( struct boot_trace_call, boot_call       )
-               __field_desc(   pid_t,  boot_call,      caller          )
-               __array_desc(   char,   boot_call,      func,   KSYM_SYMBOL_LEN)
-       ),
-
-       F_printk("%d  %s", __entry->caller, __entry->func)
-);
-
-FTRACE_ENTRY(boot_ret, trace_boot_ret,
-
-       TRACE_BOOT_RET,
-
-       F_STRUCT(
-               __field_struct( struct boot_trace_ret,  boot_ret        )
-               __array_desc(   char,   boot_ret,       func,   KSYM_SYMBOL_LEN)
-               __field_desc(   int,    boot_ret,       result          )
-               __field_desc(   unsigned long, boot_ret, duration       )
-       ),
-
-       F_printk("%s %d %lx",
-                __entry->func, __entry->result, __entry->duration)
-);
 
 #define TRACE_FUNC_SIZE 30
 #define TRACE_FILE_SIZE 20
@@ -318,53 +274,3 @@ FTRACE_ENTRY(branch, trace_branch,
                 __entry->func, __entry->file, __entry->correct)
 );
 
-FTRACE_ENTRY(kmem_alloc, kmemtrace_alloc_entry,
-
-       TRACE_KMEM_ALLOC,
-
-       F_STRUCT(
-               __field(        enum kmemtrace_type_id, type_id         )
-               __field(        unsigned long,          call_site       )
-               __field(        const void *,           ptr             )
-               __field(        size_t,                 bytes_req       )
-               __field(        size_t,                 bytes_alloc     )
-               __field(        gfp_t,                  gfp_flags       )
-               __field(        int,                    node            )
-       ),
-
-       F_printk("type:%u call_site:%lx ptr:%p req:%zi alloc:%zi"
-                " flags:%x node:%d",
-                __entry->type_id, __entry->call_site, __entry->ptr,
-                __entry->bytes_req, __entry->bytes_alloc,
-                __entry->gfp_flags, __entry->node)
-);
-
-FTRACE_ENTRY(kmem_free, kmemtrace_free_entry,
-
-       TRACE_KMEM_FREE,
-
-       F_STRUCT(
-               __field(        enum kmemtrace_type_id, type_id         )
-               __field(        unsigned long,          call_site       )
-               __field(        const void *,           ptr             )
-       ),
-
-       F_printk("type:%u call_site:%lx ptr:%p",
-                __entry->type_id, __entry->call_site, __entry->ptr)
-);
-
-FTRACE_ENTRY(ksym_trace, ksym_trace_entry,
-
-       TRACE_KSYM,
-
-       F_STRUCT(
-               __field(        unsigned long,  ip                        )
-               __field(        unsigned char,  type                      )
-               __array(        char         ,  cmd,       TASK_COMM_LEN  )
-               __field(        unsigned long,  addr                      )
-       ),
-
-       F_printk("ip: %pF type: %d ksym_name: %pS cmd: %s",
-               (void *)__entry->ip, (unsigned int)__entry->type,
-               (void *)__entry->addr,  __entry->cmd)
-);
index 8a2b73f..000e6e8 100644 (file)
@@ -9,8 +9,6 @@
 #include <linux/kprobes.h>
 #include "trace.h"
 
-EXPORT_SYMBOL_GPL(perf_arch_fetch_caller_regs);
-
 static char *perf_trace_buf[4];
 
 /*
@@ -56,13 +54,7 @@ static int perf_trace_event_init(struct ftrace_event_call *tp_event,
                }
        }
 
-       if (tp_event->class->reg)
-               ret = tp_event->class->reg(tp_event, TRACE_REG_PERF_REGISTER);
-       else
-               ret = tracepoint_probe_register(tp_event->name,
-                                               tp_event->class->perf_probe,
-                                               tp_event);
-
+       ret = tp_event->class->reg(tp_event, TRACE_REG_PERF_REGISTER);
        if (ret)
                goto fail;
 
@@ -96,9 +88,7 @@ int perf_trace_init(struct perf_event *p_event)
        mutex_lock(&event_mutex);
        list_for_each_entry(tp_event, &ftrace_events, list) {
                if (tp_event->event.type == event_id &&
-                   tp_event->class &&
-                   (tp_event->class->perf_probe ||
-                    tp_event->class->reg) &&
+                   tp_event->class && tp_event->class->reg &&
                    try_module_get(tp_event->mod)) {
                        ret = perf_trace_event_init(tp_event, p_event);
                        break;
@@ -138,18 +128,13 @@ void perf_trace_destroy(struct perf_event *p_event)
        if (--tp_event->perf_refcount > 0)
                goto out;
 
-       if (tp_event->class->reg)
-               tp_event->class->reg(tp_event, TRACE_REG_PERF_UNREGISTER);
-       else
-               tracepoint_probe_unregister(tp_event->name,
-                                           tp_event->class->perf_probe,
-                                           tp_event);
+       tp_event->class->reg(tp_event, TRACE_REG_PERF_UNREGISTER);
 
        /*
-        * Ensure our callback won't be called anymore. See
-        * tracepoint_probe_unregister() and __DO_TRACE().
+        * Ensure our callback won't be called anymore. The buffers
+        * will be freed after that.
         */
-       synchronize_sched();
+       tracepoint_synchronize_unregister();
 
        free_percpu(tp_event->perf_events);
        tp_event->perf_events = NULL;
index 53cffc0..09b4fa6 100644 (file)
@@ -28,6 +28,7 @@
 DEFINE_MUTEX(event_mutex);
 
 LIST_HEAD(ftrace_events);
+LIST_HEAD(ftrace_common_fields);
 
 struct list_head *
 trace_get_fields(struct ftrace_event_call *event_call)
@@ -37,15 +38,11 @@ trace_get_fields(struct ftrace_event_call *event_call)
        return event_call->class->get_fields(event_call);
 }
 
-int trace_define_field(struct ftrace_event_call *call, const char *type,
-                      const char *name, int offset, int size, int is_signed,
-                      int filter_type)
+static int __trace_define_field(struct list_head *head, const char *type,
+                               const char *name, int offset, int size,
+                               int is_signed, int filter_type)
 {
        struct ftrace_event_field *field;
-       struct list_head *head;
-
-       if (WARN_ON(!call->class))
-               return 0;
 
        field = kzalloc(sizeof(*field), GFP_KERNEL);
        if (!field)
@@ -68,7 +65,6 @@ int trace_define_field(struct ftrace_event_call *call, const char *type,
        field->size = size;
        field->is_signed = is_signed;
 
-       head = trace_get_fields(call);
        list_add(&field->link, head);
 
        return 0;
@@ -80,17 +76,32 @@ err:
 
        return -ENOMEM;
 }
+
+int trace_define_field(struct ftrace_event_call *call, const char *type,
+                      const char *name, int offset, int size, int is_signed,
+                      int filter_type)
+{
+       struct list_head *head;
+
+       if (WARN_ON(!call->class))
+               return 0;
+
+       head = trace_get_fields(call);
+       return __trace_define_field(head, type, name, offset, size,
+                                   is_signed, filter_type);
+}
 EXPORT_SYMBOL_GPL(trace_define_field);
 
 #define __common_field(type, item)                                     \
-       ret = trace_define_field(call, #type, "common_" #item,          \
-                                offsetof(typeof(ent), item),           \
-                                sizeof(ent.item),                      \
-                                is_signed_type(type), FILTER_OTHER);   \
+       ret = __trace_define_field(&ftrace_common_fields, #type,        \
+                                  "common_" #item,                     \
+                                  offsetof(typeof(ent), item),         \
+                                  sizeof(ent.item),                    \
+                                  is_signed_type(type), FILTER_OTHER); \
        if (ret)                                                        \
                return ret;
 
-static int trace_define_common_fields(struct ftrace_event_call *call)
+static int trace_define_common_fields(void)
 {
        int ret;
        struct trace_entry ent;
@@ -130,6 +141,55 @@ int trace_event_raw_init(struct ftrace_event_call *call)
 }
 EXPORT_SYMBOL_GPL(trace_event_raw_init);
 
+int ftrace_event_reg(struct ftrace_event_call *call, enum trace_reg type)
+{
+       switch (type) {
+       case TRACE_REG_REGISTER:
+               return tracepoint_probe_register(call->name,
+                                                call->class->probe,
+                                                call);
+       case TRACE_REG_UNREGISTER:
+               tracepoint_probe_unregister(call->name,
+                                           call->class->probe,
+                                           call);
+               return 0;
+
+#ifdef CONFIG_PERF_EVENTS
+       case TRACE_REG_PERF_REGISTER:
+               return tracepoint_probe_register(call->name,
+                                                call->class->perf_probe,
+                                                call);
+       case TRACE_REG_PERF_UNREGISTER:
+               tracepoint_probe_unregister(call->name,
+                                           call->class->perf_probe,
+                                           call);
+               return 0;
+#endif
+       }
+       return 0;
+}
+EXPORT_SYMBOL_GPL(ftrace_event_reg);
+
+void trace_event_enable_cmd_record(bool enable)
+{
+       struct ftrace_event_call *call;
+
+       mutex_lock(&event_mutex);
+       list_for_each_entry(call, &ftrace_events, list) {
+               if (!(call->flags & TRACE_EVENT_FL_ENABLED))
+                       continue;
+
+               if (enable) {
+                       tracing_start_cmdline_record();
+                       call->flags |= TRACE_EVENT_FL_RECORDED_CMD;
+               } else {
+      &nbs