Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git...
authorLinus Torvalds <torvalds@linux-foundation.org>
Fri, 6 Aug 2010 16:30:52 +0000 (09:30 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 6 Aug 2010 16:30:52 +0000 (09:30 -0700)
* 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (162 commits)
  tracing/kprobes: unregister_trace_probe needs to be called under mutex
  perf: expose event__process function
  perf events: Fix mmap offset determination
  perf, powerpc: fsl_emb: Restore setting perf_sample_data.period
  perf, powerpc: Convert the FSL driver to use local64_t
  perf tools: Don't keep unreferenced maps when unmaps are detected
  perf session: Invalidate last_match when removing threads from rb_tree
  perf session: Free the ref_reloc_sym memory at the right place
  x86,mmiotrace: Add support for tracing STOS instruction
  perf, sched migration: Librarize task states and event headers helpers
  perf, sched migration: Librarize the GUI class
  perf, sched migration: Make the GUI class client agnostic
  perf, sched migration: Make it vertically scrollable
  perf, sched migration: Parameterize cpu height and spacing
  perf, sched migration: Fix key bindings
  perf, sched migration: Ignore unhandled task states
  perf, sched migration: Handle ignored migrate out events
  perf: New migration tool overview
  tracing: Drop cpparg() macro
  perf: Use tracepoint_synchronize_unregister() to flush any pending tracepoint call
  ...

Fix up trivial conflicts in Makefile and drivers/cpufreq/cpufreq.c

179 files changed:
Documentation/ABI/testing/debugfs-kmemtrace [deleted file]
Documentation/kernel-parameters.txt
Documentation/trace/ftrace-design.txt
Documentation/trace/kmemtrace.txt [deleted file]
Documentation/trace/kprobetrace.txt
MAINTAINERS
Makefile
arch/Kconfig
arch/alpha/include/asm/local64.h [new file with mode: 0644]
arch/arm/include/asm/local64.h [new file with mode: 0644]
arch/arm/kernel/perf_event.c
arch/avr32/include/asm/local64.h [new file with mode: 0644]
arch/blackfin/include/asm/local64.h [new file with mode: 0644]
arch/cris/include/asm/local64.h [new file with mode: 0644]
arch/frv/include/asm/local64.h [new file with mode: 0644]
arch/frv/kernel/local64.h [new file with mode: 0644]
arch/h8300/include/asm/local64.h [new file with mode: 0644]
arch/ia64/include/asm/local64.h [new file with mode: 0644]
arch/m32r/include/asm/local64.h [new file with mode: 0644]
arch/m68k/include/asm/local64.h [new file with mode: 0644]
arch/microblaze/include/asm/local64.h [new file with mode: 0644]
arch/mips/include/asm/local64.h [new file with mode: 0644]
arch/mn10300/include/asm/local64.h [new file with mode: 0644]
arch/parisc/include/asm/local64.h [new file with mode: 0644]
arch/powerpc/include/asm/local64.h [new file with mode: 0644]
arch/powerpc/include/asm/perf_event.h
arch/powerpc/kernel/misc.S
arch/powerpc/kernel/perf_event.c
arch/powerpc/kernel/perf_event_fsl_emb.c
arch/s390/include/asm/local64.h [new file with mode: 0644]
arch/score/include/asm/local64.h [new file with mode: 0644]
arch/sh/include/asm/local64.h [new file with mode: 0644]
arch/sh/kernel/perf_event.c
arch/sparc/include/asm/local64.h [new file with mode: 0644]
arch/sparc/include/asm/perf_event.h
arch/sparc/kernel/helpers.S
arch/sparc/kernel/perf_event.c
arch/x86/Kconfig
arch/x86/include/asm/hw_breakpoint.h
arch/x86/include/asm/local64.h [new file with mode: 0644]
arch/x86/include/asm/nmi.h
arch/x86/include/asm/perf_event.h
arch/x86/include/asm/perf_event_p4.h
arch/x86/include/asm/stacktrace.h
arch/x86/kernel/apic/Makefile
arch/x86/kernel/apic/hw_nmi.c [new file with mode: 0644]
arch/x86/kernel/apic/nmi.c
arch/x86/kernel/cpu/perf_event.c
arch/x86/kernel/cpu/perf_event_p4.c
arch/x86/kernel/dumpstack.c
arch/x86/kernel/dumpstack.h [deleted file]
arch/x86/kernel/dumpstack_32.c
arch/x86/kernel/dumpstack_64.c
arch/x86/kernel/hw_breakpoint.c
arch/x86/kernel/kprobes.c
arch/x86/kernel/process_32.c
arch/x86/kernel/process_64.c
arch/x86/kernel/stacktrace.c
arch/x86/kernel/traps.c
arch/x86/mm/pf_in.c
arch/x86/oprofile/nmi_int.c
arch/xtensa/include/asm/local64.h [new file with mode: 0644]
drivers/oprofile/event_buffer.c
fs/exec.c
include/asm-generic/local64.h [new file with mode: 0644]
include/asm-generic/vmlinux.lds.h
include/linux/ftrace.h
include/linux/ftrace_event.h
include/linux/kernel.h
include/linux/kmemtrace.h [deleted file]
include/linux/nmi.h
include/linux/perf_event.h
include/linux/sched.h
include/linux/slab_def.h
include/linux/slub_def.h
include/linux/syscalls.h
include/trace/boot.h [deleted file]
include/trace/events/sched.h
include/trace/events/timer.h
include/trace/ftrace.h
include/trace/syscall.h
init/main.c
kernel/Makefile
kernel/hw_breakpoint.c
kernel/perf_event.c
kernel/sched.c
kernel/softlockup.c [deleted file]
kernel/sysctl.c
kernel/timer.c
kernel/trace/Kconfig
kernel/trace/Makefile
kernel/trace/ftrace.c
kernel/trace/kmemtrace.c [deleted file]
kernel/trace/ring_buffer.c
kernel/trace/trace.c
kernel/trace/trace.h
kernel/trace/trace_boot.c [deleted file]
kernel/trace/trace_clock.c
kernel/trace/trace_entries.h
kernel/trace/trace_event_perf.c
kernel/trace/trace_events.c
kernel/trace/trace_events_filter.c
kernel/trace/trace_export.c
kernel/trace/trace_functions.c
kernel/trace/trace_functions_graph.c
kernel/trace/trace_irqsoff.c
kernel/trace/trace_kprobe.c
kernel/trace/trace_ksym.c [deleted file]
kernel/trace/trace_output.c
kernel/trace/trace_sched_wakeup.c
kernel/trace/trace_selftest.c
kernel/trace/trace_stack.c
kernel/trace/trace_syscalls.c
kernel/trace/trace_sysprof.c [deleted file]
kernel/watchdog.c [new file with mode: 0644]
lib/Kconfig.debug
mm/mmap.c
mm/slab.c
mm/slob.c
mm/slub.c
scripts/package/Makefile
scripts/recordmcount.pl
tools/perf/.gitignore
tools/perf/Documentation/perf-buildid-cache.txt
tools/perf/Documentation/perf-probe.txt
tools/perf/Documentation/perf-record.txt
tools/perf/Documentation/perf-stat.txt
tools/perf/Documentation/perf-top.txt
tools/perf/MANIFEST [new file with mode: 0644]
tools/perf/Makefile
tools/perf/arch/sh/Makefile [new file with mode: 0644]
tools/perf/arch/sh/util/dwarf-regs.c [new file with mode: 0644]
tools/perf/builtin-annotate.c
tools/perf/builtin-buildid-cache.c
tools/perf/builtin-buildid-list.c
tools/perf/builtin-diff.c
tools/perf/builtin-probe.c
tools/perf/builtin-record.c
tools/perf/builtin-report.c
tools/perf/builtin-stat.c
tools/perf/builtin-top.c
tools/perf/builtin-trace.c
tools/perf/feature-tests.mak [new file with mode: 0644]
tools/perf/perf-archive.sh
tools/perf/perf.c
tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Core.py
tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/SchedGui.py [new file with mode: 0644]
tools/perf/scripts/python/bin/sched-migration-record [new file with mode: 0644]
tools/perf/scripts/python/bin/sched-migration-report [new file with mode: 0644]
tools/perf/scripts/python/sched-migration.py [new file with mode: 0644]
tools/perf/util/build-id.c
tools/perf/util/cache.h
tools/perf/util/callchain.c
tools/perf/util/callchain.h
tools/perf/util/config.c
tools/perf/util/cpumap.c
tools/perf/util/cpumap.h
tools/perf/util/debug.c
tools/perf/util/event.c
tools/perf/util/event.h
tools/perf/util/header.c
tools/perf/util/hist.c
tools/perf/util/hist.h
tools/perf/util/map.c
tools/perf/util/map.h
tools/perf/util/newt.c
tools/perf/util/parse-events.c
tools/perf/util/probe-event.c
tools/perf/util/probe-event.h
tools/perf/util/probe-finder.c
tools/perf/util/probe-finder.h
tools/perf/util/session.c
tools/perf/util/sort.c
tools/perf/util/sort.h
tools/perf/util/symbol.c
tools/perf/util/symbol.h
tools/perf/util/thread.c
tools/perf/util/thread.h
tools/perf/util/util.h

diff --git a/Documentation/ABI/testing/debugfs-kmemtrace b/Documentation/ABI/testing/debugfs-kmemtrace
deleted file mode 100644 (file)
index 5e6a92a..0000000
+++ /dev/null
@@ -1,71 +0,0 @@
-What:          /sys/kernel/debug/kmemtrace/
-Date:          July 2008
-Contact:       Eduard - Gabriel Munteanu <eduard.munteanu@linux360.ro>
-Description:
-
-In kmemtrace-enabled kernels, the following files are created:
-
-/sys/kernel/debug/kmemtrace/
-       cpu<n>          (0400)  Per-CPU tracing data, see below. (binary)
-       total_overruns  (0400)  Total number of bytes which were dropped from
-                               cpu<n> files because of full buffer condition,
-                               non-binary. (text)
-       abi_version     (0400)  Kernel's kmemtrace ABI version. (text)
-
-Each per-CPU file should be read according to the relay interface. That is,
-the reader should set affinity to that specific CPU and, as currently done by
-the userspace application (though there are other methods), use poll() with
-an infinite timeout before every read(). Otherwise, erroneous data may be
-read. The binary data has the following _core_ format:
-
-       Event ID        (1 byte)        Unsigned integer, one of:
-               0 - represents an allocation (KMEMTRACE_EVENT_ALLOC)
-               1 - represents a freeing of previously allocated memory
-                   (KMEMTRACE_EVENT_FREE)
-       Type ID         (1 byte)        Unsigned integer, one of:
-               0 - this is a kmalloc() / kfree()
-               1 - this is a kmem_cache_alloc() / kmem_cache_free()
-               2 - this is a __get_free_pages() et al.
-       Event size      (2 bytes)       Unsigned integer representing the
-                                       size of this event. Used to extend
-                                       kmemtrace. Discard the bytes you
-                                       don't know about.
-       Sequence number (4 bytes)       Signed integer used to reorder data
-                                       logged on SMP machines. Wraparound
-                                       must be taken into account, although
-                                       it is unlikely.
-       Caller address  (8 bytes)       Return address to the caller.
-       Pointer to mem  (8 bytes)       Pointer to target memory area. Can be
-                                       NULL, but not all such calls might be
-                                       recorded.
-
-In case of KMEMTRACE_EVENT_ALLOC events, the next fields follow:
-
-       Requested bytes (8 bytes)       Total number of requested bytes,
-                                       unsigned, must not be zero.
-       Allocated bytes (8 bytes)       Total number of actually allocated
-                                       bytes, unsigned, must not be lower
-                                       than requested bytes.
-       Requested flags (4 bytes)       GFP flags supplied by the caller.
-       Target CPU      (4 bytes)       Signed integer, valid for event id 1.
-                                       If equal to -1, target CPU is the same
-                                       as origin CPU, but the reverse might
-                                       not be true.
-
-The data is made available in the same endianness the machine has.
-
-Other event ids and type ids may be defined and added. Other fields may be
-added by increasing event size, but see below for details.
-Every modification to the ABI, including new id definitions, are followed
-by bumping the ABI version by one.
-
-Adding new data to the packet (features) is done at the end of the mandatory
-data:
-       Feature size    (2 byte)
-       Feature ID      (1 byte)
-       Feature data    (Feature size - 3 bytes)
-
-
-Users:
-       kmemtrace-user - git://repo.or.cz/kmemtrace-user.git
-
index f72ba727441faa8dd6f06ba78931052aeb62a2b5..f20c7abc0329ca520d524a5c135a5597a8484086 100644 (file)
@@ -1816,6 +1816,8 @@ and is between 256 and 4096 characters. It is defined in the file
 
        nousb           [USB] Disable the USB subsystem
 
+       nowatchdog      [KNL] Disable the lockup detector.
+
        nowb            [ARM]
 
        nox2apic        [X86-64,APIC] Do not enable x2APIC mode.
index f1f81afee8a04d19bc69a7231c25a91c0e0f17e0..dc52bd442c92aa11b5c717ad811bcd2f92a169fd 100644 (file)
@@ -13,6 +13,9 @@ Note that this focuses on architecture implementation details only.  If you
 want more explanation of a feature in terms of common code, review the common
 ftrace.txt file.
 
+Ideally, everyone who wishes to retain performance while supporting tracing in
+their kernel should make it all the way to dynamic ftrace support.
+
 
 Prerequisites
 -------------
@@ -215,7 +218,7 @@ An arch may pass in a unique value (frame pointer) to both the entering and
 exiting of a function.  On exit, the value is compared and if it does not
 match, then it will panic the kernel.  This is largely a sanity check for bad
 code generation with gcc.  If gcc for your port sanely updates the frame
-pointer under different opitmization levels, then ignore this option.
+pointer under different optimization levels, then ignore this option.
 
 However, adding support for it isn't terribly difficult.  In your assembly code
 that calls prepare_ftrace_return(), pass the frame pointer as the 3rd argument.
@@ -234,7 +237,7 @@ If you can't trace NMI functions, then skip this option.
 
 
 HAVE_SYSCALL_TRACEPOINTS
----------------------
+------------------------
 
 You need very few things to get the syscalls tracing in an arch.
 
@@ -250,12 +253,152 @@ You need very few things to get the syscalls tracing in an arch.
 HAVE_FTRACE_MCOUNT_RECORD
 -------------------------
 
-See scripts/recordmcount.pl for more info.
+See scripts/recordmcount.pl for more info.  Just fill in the arch-specific
+details for how to locate the addresses of mcount call sites via objdump.
+This option doesn't make much sense without also implementing dynamic ftrace.
 
+
+HAVE_DYNAMIC_FTRACE
+-------------------
+
+You will first need HAVE_FTRACE_MCOUNT_RECORD and HAVE_FUNCTION_TRACER, so
+scroll your reader back up if you got over eager.
+
+Once those are out of the way, you will need to implement:
+       - asm/ftrace.h:
+               - MCOUNT_ADDR
+               - ftrace_call_adjust()
+               - struct dyn_arch_ftrace{}
+       - asm code:
+               - mcount() (new stub)
+               - ftrace_caller()
+               - ftrace_call()
+               - ftrace_stub()
+       - C code:
+               - ftrace_dyn_arch_init()
+               - ftrace_make_nop()
+               - ftrace_make_call()
+               - ftrace_update_ftrace_func()
+
+First you will need to fill out some arch details in your asm/ftrace.h.
+
+Define MCOUNT_ADDR as the address of your mcount symbol similar to:
+       #define MCOUNT_ADDR ((unsigned long)mcount)
+Since no one else will have a decl for that function, you will need to:
+       extern void mcount(void);
+
+You will also need the helper function ftrace_call_adjust().  Most people
+will be able to stub it out like so:
+       static inline unsigned long ftrace_call_adjust(unsigned long addr)
+       {
+               return addr;
+       }
 <details to be filled>
 
+Lastly you will need the custom dyn_arch_ftrace structure.  If you need
+some extra state when runtime patching arbitrary call sites, this is the
+place.  For now though, create an empty struct:
+       struct dyn_arch_ftrace {
+               /* No extra data needed */
+       };
+
+With the header out of the way, we can fill out the assembly code.  While we
+did already create a mcount() function earlier, dynamic ftrace only wants a
+stub function.  This is because the mcount() will only be used during boot
+and then all references to it will be patched out never to return.  Instead,
+the guts of the old mcount() will be used to create a new ftrace_caller()
+function.  Because the two are hard to merge, it will most likely be a lot
+easier to have two separate definitions split up by #ifdefs.  Same goes for
+the ftrace_stub() as that will now be inlined in ftrace_caller().
+
+Before we get confused anymore, let's check out some pseudo code so you can
+implement your own stuff in assembly:
 
-HAVE_DYNAMIC_FTRACE
----------------------
+void mcount(void)
+{
+       return;
+}
+
+void ftrace_caller(void)
+{
+       /* implement HAVE_FUNCTION_TRACE_MCOUNT_TEST if you desire */
+
+       /* save all state needed by the ABI (see paragraph above) */
+
+       unsigned long frompc = ...;
+       unsigned long selfpc = <return address> - MCOUNT_INSN_SIZE;
+
+ftrace_call:
+       ftrace_stub(frompc, selfpc);
+
+       /* restore all state needed by the ABI */
+
+ftrace_stub:
+       return;
+}
+
+This might look a little odd at first, but keep in mind that we will be runtime
+patching multiple things.  First, only functions that we actually want to trace
+will be patched to call ftrace_caller().  Second, since we only have one tracer
+active at a time, we will patch the ftrace_caller() function itself to call the
+specific tracer in question.  That is the point of the ftrace_call label.
+
+With that in mind, let's move on to the C code that will actually be doing the
+runtime patching.  You'll need a little knowledge of your arch's opcodes in
+order to make it through the next section.
+
+Every arch has an init callback function.  If you need to do something early on
+to initialize some state, this is the time to do that.  Otherwise, this simple
+function below should be sufficient for most people:
+
+int __init ftrace_dyn_arch_init(void *data)
+{
+       /* return value is done indirectly via data */
+       *(unsigned long *)data = 0;
+
+       return 0;
+}
+
+There are two functions that are used to do runtime patching of arbitrary
+functions.  The first is used to turn the mcount call site into a nop (which
+is what helps us retain runtime performance when not tracing).  The second is
+used to turn the mcount call site into a call to an arbitrary location (but
+typically that is ftracer_caller()).  See the general function definition in
+linux/ftrace.h for the functions:
+       ftrace_make_nop()
+       ftrace_make_call()
+The rec->ip value is the address of the mcount call site that was collected
+by the scripts/recordmcount.pl during build time.
+
+The last function is used to do runtime patching of the active tracer.  This
+will be modifying the assembly code at the location of the ftrace_call symbol
+inside of the ftrace_caller() function.  So you should have sufficient padding
+at that location to support the new function calls you'll be inserting.  Some
+people will be using a "call" type instruction while others will be using a
+"branch" type instruction.  Specifically, the function is:
+       ftrace_update_ftrace_func()
+
+
+HAVE_DYNAMIC_FTRACE + HAVE_FUNCTION_GRAPH_TRACER
+------------------------------------------------
+
+The function grapher needs a few tweaks in order to work with dynamic ftrace.
+Basically, you will need to:
+       - update:
+               - ftrace_caller()
+               - ftrace_graph_call()
+               - ftrace_graph_caller()
+       - implement:
+               - ftrace_enable_ftrace_graph_caller()
+               - ftrace_disable_ftrace_graph_caller()
 
 <details to be filled>
+Quick notes:
+       - add a nop stub after the ftrace_call location named ftrace_graph_call;
+         stub needs to be large enough to support a call to ftrace_graph_caller()
+       - update ftrace_graph_caller() to work with being called by the new
+         ftrace_caller() since some semantics may have changed
+       - ftrace_enable_ftrace_graph_caller() will runtime patch the
+         ftrace_graph_call location with a call to ftrace_graph_caller()
+       - ftrace_disable_ftrace_graph_caller() will runtime patch the
+         ftrace_graph_call location with nops
diff --git a/Documentation/trace/kmemtrace.txt b/Documentation/trace/kmemtrace.txt
deleted file mode 100644 (file)
index 6308735..0000000
+++ /dev/null
@@ -1,126 +0,0 @@
-                       kmemtrace - Kernel Memory Tracer
-
-                         by Eduard - Gabriel Munteanu
-                            <eduard.munteanu@linux360.ro>
-
-I. Introduction
-===============
-
-kmemtrace helps kernel developers figure out two things:
-1) how different allocators (SLAB, SLUB etc.) perform
-2) how kernel code allocates memory and how much
-
-To do this, we trace every allocation and export information to the userspace
-through the relay interface. We export things such as the number of requested
-bytes, the number of bytes actually allocated (i.e. including internal
-fragmentation), whether this is a slab allocation or a plain kmalloc() and so
-on.
-
-The actual analysis is performed by a userspace tool (see section III for
-details on where to get it from). It logs the data exported by the kernel,
-processes it and (as of writing this) can provide the following information:
-- the total amount of memory allocated and fragmentation per call-site
-- the amount of memory allocated and fragmentation per allocation
-- total memory allocated and fragmentation in the collected dataset
-- number of cross-CPU allocation and frees (makes sense in NUMA environments)
-
-Moreover, it can potentially find inconsistent and erroneous behavior in
-kernel code, such as using slab free functions on kmalloc'ed memory or
-allocating less memory than requested (but not truly failed allocations).
-
-kmemtrace also makes provisions for tracing on some arch and analysing the
-data on another.
-
-II. Design and goals
-====================
-
-kmemtrace was designed to handle rather large amounts of data. Thus, it uses
-the relay interface to export whatever is logged to userspace, which then
-stores it. Analysis and reporting is done asynchronously, that is, after the
-data is collected and stored. By design, it allows one to log and analyse
-on different machines and different arches.
-
-As of writing this, the ABI is not considered stable, though it might not
-change much. However, no guarantees are made about compatibility yet. When
-deemed stable, the ABI should still allow easy extension while maintaining
-backward compatibility. This is described further in Documentation/ABI.
-
-Summary of design goals:
-       - allow logging and analysis to be done across different machines
-       - be fast and anticipate usage in high-load environments (*)
-       - be reasonably extensible
-       - make it possible for GNU/Linux distributions to have kmemtrace
-       included in their repositories
-
-(*) - one of the reasons Pekka Enberg's original userspace data analysis
-    tool's code was rewritten from Perl to C (although this is more than a
-    simple conversion)
-
-
-III. Quick usage guide
-======================
-
-1) Get a kernel that supports kmemtrace and build it accordingly (i.e. enable
-CONFIG_KMEMTRACE).
-
-2) Get the userspace tool and build it:
-$ git clone git://repo.or.cz/kmemtrace-user.git                # current repository
-$ cd kmemtrace-user/
-$ ./autogen.sh
-$ ./configure
-$ make
-
-3) Boot the kmemtrace-enabled kernel if you haven't, preferably in the
-'single' runlevel (so that relay buffers don't fill up easily), and run
-kmemtrace:
-# '$' does not mean user, but root here.
-$ mount -t debugfs none /sys/kernel/debug
-$ mount -t proc none /proc
-$ cd path/to/kmemtrace-user/
-$ ./kmemtraced
-Wait a bit, then stop it with CTRL+C.
-$ cat /sys/kernel/debug/kmemtrace/total_overruns       # Check if we didn't
-                                                       # overrun, should
-                                                       # be zero.
-$ (Optionally) [Run kmemtrace_check separately on each cpu[0-9]*.out file to
-               check its correctness]
-$ ./kmemtrace-report
-
-Now you should have a nice and short summary of how the allocator performs.
-
-IV. FAQ and known issues
-========================
-
-Q: 'cat /sys/kernel/debug/kmemtrace/total_overruns' is non-zero, how do I fix
-this? Should I worry?
-A: If it's non-zero, this affects kmemtrace's accuracy, depending on how
-large the number is. You can fix it by supplying a higher
-'kmemtrace.subbufs=N' kernel parameter.
----
-
-Q: kmemtrace_check reports errors, how do I fix this? Should I worry?
-A: This is a bug and should be reported. It can occur for a variety of
-reasons:
-       - possible bugs in relay code
-       - possible misuse of relay by kmemtrace
-       - timestamps being collected unorderly
-Or you may fix it yourself and send us a patch.
----
-
-Q: kmemtrace_report shows many errors, how do I fix this? Should I worry?
-A: This is a known issue and I'm working on it. These might be true errors
-in kernel code, which may have inconsistent behavior (e.g. allocating memory
-with kmem_cache_alloc() and freeing it with kfree()). Pekka Enberg pointed
-out this behavior may work with SLAB, but may fail with other allocators.
-
-It may also be due to lack of tracing in some unusual allocator functions.
-
-We don't want bug reports regarding this issue yet.
----
-
-V. See also
-===========
-
-Documentation/kernel-parameters.txt
-Documentation/ABI/testing/debugfs-kmemtrace
-
index ec94748ae65bf1c55c1359c5c9d71aac714b2e5e..5f77d94598dd577aca9a9f36c9cdee13ca86fec6 100644 (file)
@@ -42,7 +42,7 @@ Synopsis of kprobe_events
   +|-offs(FETCHARG) : Fetch memory at FETCHARG +|- offs address.(**)
   NAME=FETCHARG : Set NAME as the argument name of FETCHARG.
   FETCHARG:TYPE : Set TYPE as the type of FETCHARG. Currently, basic types
-                 (u8/u16/u32/u64/s8/s16/s32/s64) are supported.
+                 (u8/u16/u32/u64/s8/s16/s32/s64) and string are supported.
 
   (*) only for return probe.
   (**) this is useful for fetching a field of data structures.
index 11e34d5272b8aa7f1912af8b07e71a66269ddf03..100a3f535c9f579be993d6833b1a5939cc158b91 100644 (file)
@@ -3403,13 +3403,6 @@ F:       include/linux/kmemleak.h
 F:     mm/kmemleak.c
 F:     mm/kmemleak-test.c
 
-KMEMTRACE
-M:     Eduard - Gabriel Munteanu <eduard.munteanu@linux360.ro>
-S:     Maintained
-F:     Documentation/trace/kmemtrace.txt
-F:     include/linux/kmemtrace.h
-F:     kernel/trace/kmemtrace.c
-
 KPROBES
 M:     Ananth N Mavinakayanahalli <ananth@in.ibm.com>
 M:     Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
@@ -5685,7 +5678,7 @@ TRACING
 M:     Steven Rostedt <rostedt@goodmis.org>
 M:     Frederic Weisbecker <fweisbec@gmail.com>
 M:     Ingo Molnar <mingo@redhat.com>
-T:     git git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip.git tracing/core
+T:     git git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip.git perf/core
 S:     Maintained
 F:     Documentation/trace/ftrace.txt
 F:     arch/*/*/*/ftrace.h
index 66c94aad36651e723898749f94c163a72bf90705..7431c283f15b3efaed73050a0c33d721a9fc8a57 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -420,7 +420,7 @@ endif
 no-dot-config-targets := clean mrproper distclean \
                         cscope TAGS tags help %docs check% coccicheck \
                         include/linux/version.h headers_% \
-                        kernelversion
+                        kernelversion %src-pkg
 
 config-targets := 0
 mixed-targets  := 0
@@ -1168,6 +1168,8 @@ distclean: mrproper
 # rpm target kept for backward compatibility
 package-dir    := $(srctree)/scripts/package
 
+%src-pkg: FORCE
+       $(Q)$(MAKE) $(build)=$(package-dir) $@
 %pkg: include/config/kernel.release FORCE
        $(Q)$(MAKE) $(build)=$(package-dir) $@
 rpm: include/config/kernel.release FORCE
index acda512da2e21b52a972bb4255404fab3fce7015..4877a8c8ee1697599289f35107824f95ba8daf84 100644 (file)
@@ -151,4 +151,11 @@ config HAVE_MIXED_BREAKPOINTS_REGS
 config HAVE_USER_RETURN_NOTIFIER
        bool
 
+config HAVE_PERF_EVENTS_NMI
+       bool
+       help
+         System hardware can generate an NMI using the perf event
+         subsystem.  Also has support for calculating CPU cycle events
+         to determine how many clock cycles in a given period.
+
 source "kernel/gcov/Kconfig"
diff --git a/arch/alpha/include/asm/local64.h b/arch/alpha/include/asm/local64.h
new file mode 100644 (file)
index 0000000..36c93b5
--- /dev/null
@@ -0,0 +1 @@
+#include <asm-generic/local64.h>
diff --git a/arch/arm/include/asm/local64.h b/arch/arm/include/asm/local64.h
new file mode 100644 (file)
index 0000000..36c93b5
--- /dev/null
@@ -0,0 +1 @@
+#include <asm-generic/local64.h>
index de12536d687f69a6fa246038afe9754936d223d9..417c392ddf1cb55066fa5f99e83e77514bd89901 100644 (file)
@@ -164,20 +164,20 @@ armpmu_event_set_period(struct perf_event *event,
                        struct hw_perf_event *hwc,
                        int idx)
 {
-       s64 left = atomic64_read(&hwc->period_left);
+       s64 left = local64_read(&hwc->period_left);
        s64 period = hwc->sample_period;
        int ret = 0;
 
        if (unlikely(left <= -period)) {
                left = period;
-               atomic64_set(&hwc->period_left, left);
+               local64_set(&hwc->period_left, left);
                hwc->last_period = period;
                ret = 1;
        }
 
        if (unlikely(left <= 0)) {
                left += period;
-               atomic64_set(&hwc->period_left, left);
+               local64_set(&hwc->period_left, left);
                hwc->last_period = period;
                ret = 1;
        }
@@ -185,7 +185,7 @@ armpmu_event_set_period(struct perf_event *event,
        if (left > (s64)armpmu->max_period)
                left = armpmu->max_period;
 
-       atomic64_set(&hwc->prev_count, (u64)-left);
+       local64_set(&hwc->prev_count, (u64)-left);
 
        armpmu->write_counter(idx, (u64)(-left) & 0xffffffff);
 
@@ -204,18 +204,18 @@ armpmu_event_update(struct perf_event *event,
        u64 delta;
 
 again:
-       prev_raw_count = atomic64_read(&hwc->prev_count);
+       prev_raw_count = local64_read(&hwc->prev_count);
        new_raw_count = armpmu->read_counter(idx);
 
-       if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count,
+       if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
                             new_raw_count) != prev_raw_count)
                goto again;
 
        delta = (new_raw_count << shift) - (prev_raw_count << shift);
        delta >>= shift;
 
-       atomic64_add(delta, &event->count);
-       atomic64_sub(delta, &hwc->period_left);
+       local64_add(delta, &event->count);
+       local64_sub(delta, &hwc->period_left);
 
        return new_raw_count;
 }
@@ -478,7 +478,7 @@ __hw_perf_event_init(struct perf_event *event)
        if (!hwc->sample_period) {
                hwc->sample_period  = armpmu->max_period;
                hwc->last_period    = hwc->sample_period;
-               atomic64_set(&hwc->period_left, hwc->sample_period);
+               local64_set(&hwc->period_left, hwc->sample_period);
        }
 
        err = 0;
diff --git a/arch/avr32/include/asm/local64.h b/arch/avr32/include/asm/local64.h
new file mode 100644 (file)
index 0000000..36c93b5
--- /dev/null
@@ -0,0 +1 @@
+#include <asm-generic/local64.h>
diff --git a/arch/blackfin/include/asm/local64.h b/arch/blackfin/include/asm/local64.h
new file mode 100644 (file)
index 0000000..36c93b5
--- /dev/null
@@ -0,0 +1 @@
+#include <asm-generic/local64.h>
diff --git a/arch/cris/include/asm/local64.h b/arch/cris/include/asm/local64.h
new file mode 100644 (file)
index 0000000..36c93b5
--- /dev/null
@@ -0,0 +1 @@
+#include <asm-generic/local64.h>
diff --git a/arch/frv/include/asm/local64.h b/arch/frv/include/asm/local64.h
new file mode 100644 (file)
index 0000000..36c93b5
--- /dev/null
@@ -0,0 +1 @@
+#include <asm-generic/local64.h>
diff --git a/arch/frv/kernel/local64.h b/arch/frv/kernel/local64.h
new file mode 100644 (file)
index 0000000..36c93b5
--- /dev/null
@@ -0,0 +1 @@
+#include <asm-generic/local64.h>
diff --git a/arch/h8300/include/asm/local64.h b/arch/h8300/include/asm/local64.h
new file mode 100644 (file)
index 0000000..36c93b5
--- /dev/null
@@ -0,0 +1 @@
+#include <asm-generic/local64.h>
diff --git a/arch/ia64/include/asm/local64.h b/arch/ia64/include/asm/local64.h
new file mode 100644 (file)
index 0000000..36c93b5
--- /dev/null
@@ -0,0 +1 @@
+#include <asm-generic/local64.h>
diff --git a/arch/m32r/include/asm/local64.h b/arch/m32r/include/asm/local64.h
new file mode 100644 (file)
index 0000000..36c93b5
--- /dev/null
@@ -0,0 +1 @@
+#include <asm-generic/local64.h>
diff --git a/arch/m68k/include/asm/local64.h b/arch/m68k/include/asm/local64.h
new file mode 100644 (file)
index 0000000..36c93b5
--- /dev/null
@@ -0,0 +1 @@
+#include <asm-generic/local64.h>
diff --git a/arch/microblaze/include/asm/local64.h b/arch/microblaze/include/asm/local64.h
new file mode 100644 (file)
index 0000000..36c93b5
--- /dev/null
@@ -0,0 +1 @@
+#include <asm-generic/local64.h>
diff --git a/arch/mips/include/asm/local64.h b/arch/mips/include/asm/local64.h
new file mode 100644 (file)
index 0000000..36c93b5
--- /dev/null
@@ -0,0 +1 @@
+#include <asm-generic/local64.h>
diff --git a/arch/mn10300/include/asm/local64.h b/arch/mn10300/include/asm/local64.h
new file mode 100644 (file)
index 0000000..36c93b5
--- /dev/null
@@ -0,0 +1 @@
+#include <asm-generic/local64.h>
diff --git a/arch/parisc/include/asm/local64.h b/arch/parisc/include/asm/local64.h
new file mode 100644 (file)
index 0000000..36c93b5
--- /dev/null
@@ -0,0 +1 @@
+#include <asm-generic/local64.h>
diff --git a/arch/powerpc/include/asm/local64.h b/arch/powerpc/include/asm/local64.h
new file mode 100644 (file)
index 0000000..36c93b5
--- /dev/null
@@ -0,0 +1 @@
+#include <asm-generic/local64.h>
index e6d4ce69b126eef07a463fc144ef2020ed53ad05..5c16b891d501f8f3943a10311d1020da608843d1 100644 (file)
 #ifdef CONFIG_FSL_EMB_PERF_EVENT
 #include <asm/perf_event_fsl_emb.h>
 #endif
+
+#ifdef CONFIG_PERF_EVENTS
+#include <asm/ptrace.h>
+#include <asm/reg.h>
+
+#define perf_arch_fetch_caller_regs(regs, __ip)                        \
+       do {                                                    \
+               (regs)->nip = __ip;                             \
+               (regs)->gpr[1] = *(unsigned long *)__get_SP();  \
+               asm volatile("mfmsr %0" : "=r" ((regs)->msr));  \
+       } while (0)
+#endif
index 22e507c8a5566d1f4dfcfe9db67482e3c0e83e35..2d29752cbe169ea9398fbd1381cd9eff64ec28c8 100644 (file)
@@ -127,29 +127,3 @@ _GLOBAL(__setup_cpu_power7)
 _GLOBAL(__restore_cpu_power7)
        /* place holder */
        blr
-
-/*
- * Get a minimal set of registers for our caller's nth caller.
- * r3 = regs pointer, r5 = n.
- *
- * We only get R1 (stack pointer), NIP (next instruction pointer)
- * and LR (link register).  These are all we can get in the
- * general case without doing complicated stack unwinding, but
- * fortunately they are enough to do a stack backtrace, which
- * is all we need them for.
- */
-_GLOBAL(perf_arch_fetch_caller_regs)
-       mr      r6,r1
-       cmpwi   r5,0
-       mflr    r4
-       ble     2f
-       mtctr   r5
-1:     PPC_LL  r6,0(r6)
-       bdnz    1b
-       PPC_LL  r4,PPC_LR_STKOFF(r6)
-2:     PPC_LL  r7,0(r6)
-       PPC_LL  r7,PPC_LR_STKOFF(r7)
-       PPC_STL r6,GPR1-STACK_FRAME_OVERHEAD(r3)
-       PPC_STL r4,_NIP-STACK_FRAME_OVERHEAD(r3)
-       PPC_STL r7,_LINK-STACK_FRAME_OVERHEAD(r3)
-       blr
index 5c14ffe5125813d19e6793ebf69e132fa19dbffe..d301a30445e09a49cec4a3d4dcf2ea01529934b3 100644 (file)
@@ -410,15 +410,15 @@ static void power_pmu_read(struct perf_event *event)
         * Therefore we treat them like NMIs.
         */
        do {
-               prev = atomic64_read(&event->hw.prev_count);
+               prev = local64_read(&event->hw.prev_count);
                barrier();
                val = read_pmc(event->hw.idx);
-       } while (atomic64_cmpxchg(&event->hw.prev_count, prev, val) != prev);
+       } while (local64_cmpxchg(&event->hw.prev_count, prev, val) != prev);
 
        /* The counters are only 32 bits wide */
        delta = (val - prev) & 0xfffffffful;
-       atomic64_add(delta, &event->count);
-       atomic64_sub(delta, &event->hw.period_left);
+       local64_add(delta, &event->count);
+       local64_sub(delta, &event->hw.period_left);
 }
 
 /*
@@ -444,10 +444,10 @@ static void freeze_limited_counters(struct cpu_hw_events *cpuhw,
                if (!event->hw.idx)
                        continue;
                val = (event->hw.idx == 5) ? pmc5 : pmc6;
-               prev = atomic64_read(&event->hw.prev_count);
+               prev = local64_read(&event->hw.prev_count);
                event->hw.idx = 0;
                delta = (val - prev) & 0xfffffffful;
-               atomic64_add(delta, &event->count);
+               local64_add(delta, &event->count);
        }
 }
 
@@ -462,7 +462,7 @@ static void thaw_limited_counters(struct cpu_hw_events *cpuhw,
                event = cpuhw->limited_counter[i];
                event->hw.idx = cpuhw->limited_hwidx[i];
                val = (event->hw.idx == 5) ? pmc5 : pmc6;
-               atomic64_set(&event->hw.prev_count, val);
+               local64_set(&event->hw.prev_count, val);
                perf_event_update_userpage(event);
        }
 }
@@ -666,11 +666,11 @@ void hw_perf_enable(void)
                }
                val = 0;
                if (event->hw.sample_period) {
-                       left = atomic64_read(&event->hw.period_left);
+                       left = local64_read(&event->hw.period_left);
                        if (left < 0x80000000L)
                                val = 0x80000000L - left;
                }
-               atomic64_set(&event->hw.prev_count, val);
+               local64_set(&event->hw.prev_count, val);
                event->hw.idx = idx;
                write_pmc(idx, val);
                perf_event_update_userpage(event);
@@ -754,7 +754,7 @@ static int power_pmu_enable(struct perf_event *event)
         * skip the schedulability test here, it will be peformed
         * at commit time(->commit_txn) as a whole
         */
-       if (cpuhw->group_flag & PERF_EVENT_TXN_STARTED)
+       if (cpuhw->group_flag & PERF_EVENT_TXN)
                goto nocheck;
 
        if (check_excludes(cpuhw->event, cpuhw->flags, n0, 1))
@@ -845,8 +845,8 @@ static void power_pmu_unthrottle(struct perf_event *event)
        if (left < 0x80000000L)
                val = 0x80000000L - left;
        write_pmc(event->hw.idx, val);
-       atomic64_set(&event->hw.prev_count, val);
-       atomic64_set(&event->hw.period_left, left);
+       local64_set(&event->hw.prev_count, val);
+       local64_set(&event->hw.period_left, left);
        perf_event_update_userpage(event);
        perf_enable();
        local_irq_restore(flags);
@@ -861,7 +861,7 @@ void power_pmu_start_txn(const struct pmu *pmu)
 {
        struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
 
-       cpuhw->group_flag |= PERF_EVENT_TXN_STARTED;
+       cpuhw->group_flag |= PERF_EVENT_TXN;
        cpuhw->n_txn_start = cpuhw->n_events;
 }
 
@@ -874,7 +874,7 @@ void power_pmu_cancel_txn(const struct pmu *pmu)
 {
        struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
 
-       cpuhw->group_flag &= ~PERF_EVENT_TXN_STARTED;
+       cpuhw->group_flag &= ~PERF_EVENT_TXN;
 }
 
 /*
@@ -900,6 +900,7 @@ int power_pmu_commit_txn(const struct pmu *pmu)
        for (i = cpuhw->n_txn_start; i < n; ++i)
                cpuhw->event[i]->hw.config = cpuhw->events[i];
 
+       cpuhw->group_flag &= ~PERF_EVENT_TXN;
        return 0;
 }
 
@@ -1111,7 +1112,7 @@ const struct pmu *hw_perf_event_init(struct perf_event *event)
        event->hw.config = events[n];
        event->hw.event_base = cflags[n];
        event->hw.last_period = event->hw.sample_period;
-       atomic64_set(&event->hw.period_left, event->hw.last_period);
+       local64_set(&event->hw.period_left, event->hw.last_period);
 
        /*
         * See if we need to reserve the PMU.
@@ -1149,16 +1150,16 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
        int record = 0;
 
        /* we don't have to worry about interrupts here */
-       prev = atomic64_read(&event->hw.prev_count);
+       prev = local64_read(&event->hw.prev_count);
        delta = (val - prev) & 0xfffffffful;
-       atomic64_add(delta, &event->count);
+       local64_add(delta, &event->count);
 
        /*
         * See if the total period for this event has expired,
         * and update for the next period.
         */
        val = 0;
-       left = atomic64_read(&event->hw.period_left) - delta;
+       left = local64_read(&event->hw.period_left) - delta;
        if (period) {
                if (left <= 0) {
                        left += period;
@@ -1196,8 +1197,8 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
        }
 
        write_pmc(event->hw.idx, val);
-       atomic64_set(&event->hw.prev_count, val);
-       atomic64_set(&event->hw.period_left, left);
+       local64_set(&event->hw.prev_count, val);
+       local64_set(&event->hw.period_left, left);
        perf_event_update_userpage(event);
 }
 
index babcceecd2eab6335217d2434c6e91e5c7fe5884..1ba45471ae436617e1ecbf3654a5064ef15d1af7 100644 (file)
@@ -162,15 +162,15 @@ static void fsl_emb_pmu_read(struct perf_event *event)
         * Therefore we treat them like NMIs.
         */
        do {
-               prev = atomic64_read(&event->hw.prev_count);
+               prev = local64_read(&event->hw.prev_count);
                barrier();
                val = read_pmc(event->hw.idx);
-       } while (atomic64_cmpxchg(&event->hw.prev_count, prev, val) != prev);
+       } while (local64_cmpxchg(&event->hw.prev_count, prev, val) != prev);
 
        /* The counters are only 32 bits wide */
        delta = (val - prev) & 0xfffffffful;
-       atomic64_add(delta, &event->count);
-       atomic64_sub(delta, &event->hw.period_left);
+       local64_add(delta, &event->count);
+       local64_sub(delta, &event->hw.period_left);
 }
 
 /*
@@ -296,11 +296,11 @@ static int fsl_emb_pmu_enable(struct perf_event *event)
 
        val = 0;
        if (event->hw.sample_period) {
-               s64 left = atomic64_read(&event->hw.period_left);
+               s64 left = local64_read(&event->hw.period_left);
                if (left < 0x80000000L)
                        val = 0x80000000L - left;
        }
-       atomic64_set(&event->hw.prev_count, val);
+       local64_set(&event->hw.prev_count, val);
        write_pmc(i, val);
        perf_event_update_userpage(event);
 
@@ -371,8 +371,8 @@ static void fsl_emb_pmu_unthrottle(struct perf_event *event)
        if (left < 0x80000000L)
                val = 0x80000000L - left;
        write_pmc(event->hw.idx, val);
-       atomic64_set(&event->hw.prev_count, val);
-       atomic64_set(&event->hw.period_left, left);
+       local64_set(&event->hw.prev_count, val);
+       local64_set(&event->hw.period_left, left);
        perf_event_update_userpage(event);
        perf_enable();
        local_irq_restore(flags);
@@ -500,7 +500,7 @@ const struct pmu *hw_perf_event_init(struct perf_event *event)
                return ERR_PTR(-ENOTSUPP);
 
        event->hw.last_period = event->hw.sample_period;
-       atomic64_set(&event->hw.period_left, event->hw.last_period);
+       local64_set(&event->hw.period_left, event->hw.last_period);
 
        /*
         * See if we need to reserve the PMU.
@@ -541,16 +541,16 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
        int record = 0;
 
        /* we don't have to worry about interrupts here */
-       prev = atomic64_read(&event->hw.prev_count);
+       prev = local64_read(&event->hw.prev_count);
        delta = (val - prev) & 0xfffffffful;
-       atomic64_add(delta, &event->count);
+       local64_add(delta, &event->count);
 
        /*
         * See if the total period for this event has expired,
         * and update for the next period.
         */
        val = 0;
-       left = atomic64_read(&event->hw.period_left) - delta;
+       left = local64_read(&event->hw.period_left) - delta;
        if (period) {
                if (left <= 0) {
                        left += period;
@@ -569,6 +569,7 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
                struct perf_sample_data data;
 
                perf_sample_data_init(&data, 0);
+               data.period = event->hw.last_period;
 
                if (perf_event_overflow(event, nmi, &data, regs)) {
                        /*
@@ -584,8 +585,8 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
        }
 
        write_pmc(event->hw.idx, val);
-       atomic64_set(&event->hw.prev_count, val);
-       atomic64_set(&event->hw.period_left, left);
+       local64_set(&event->hw.prev_count, val);
+       local64_set(&event->hw.period_left, left);
        perf_event_update_userpage(event);
 }
 
diff --git a/arch/s390/include/asm/local64.h b/arch/s390/include/asm/local64.h
new file mode 100644 (file)
index 0000000..36c93b5
--- /dev/null
@@ -0,0 +1 @@
+#include <asm-generic/local64.h>
diff --git a/arch/score/include/asm/local64.h b/arch/score/include/asm/local64.h
new file mode 100644 (file)
index 0000000..36c93b5
--- /dev/null
@@ -0,0 +1 @@
+#include <asm-generic/local64.h>
diff --git a/arch/sh/include/asm/local64.h b/arch/sh/include/asm/local64.h
new file mode 100644 (file)
index 0000000..36c93b5
--- /dev/null
@@ -0,0 +1 @@
+#include <asm-generic/local64.h>
index 81b6de41ae5d1c3bfb87c340acee5291a499a036..7a3dc356725839f2cf8579491efd8d02ba11b483 100644 (file)
@@ -185,10 +185,10 @@ static void sh_perf_event_update(struct perf_event *event,
         * this is the simplest approach for maintaining consistency.
         */
 again:
-       prev_raw_count = atomic64_read(&hwc->prev_count);
+       prev_raw_count = local64_read(&hwc->prev_count);
        new_raw_count = sh_pmu->read(idx);
 
-       if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count,
+       if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
                             new_raw_count) != prev_raw_count)
                goto again;
 
@@ -203,7 +203,7 @@ again:
        delta = (new_raw_count << shift) - (prev_raw_count << shift);
        delta >>= shift;
 
-       atomic64_add(delta, &event->count);
+       local64_add(delta, &event->count);
 }
 
 static void sh_pmu_disable(struct perf_event *event)
diff --git a/arch/sparc/include/asm/local64.h b/arch/sparc/include/asm/local64.h
new file mode 100644 (file)
index 0000000..36c93b5
--- /dev/null
@@ -0,0 +1 @@
+#include <asm-generic/local64.h>
index 7e2669894ce8346152ba1910e4122f954dd7a1a2..74c4e0cd889c700a0a901937bd716366fb882c10 100644 (file)
@@ -6,7 +6,15 @@ extern void set_perf_event_pending(void);
 #define        PERF_EVENT_INDEX_OFFSET 0
 
 #ifdef CONFIG_PERF_EVENTS
+#include <asm/ptrace.h>
+
 extern void init_hw_perf_events(void);
+
+extern void
+__perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int skip);
+
+#define perf_arch_fetch_caller_regs(pt_regs, ip)       \
+       __perf_arch_fetch_caller_regs(pt_regs, ip, 1);
 #else
 static inline void init_hw_perf_events(void)   { }
 #endif
index 92090cc9e82937a2daf6730dca89668ce9e04869..682fee06a16b044e28275935f45fba73accee3a5 100644 (file)
@@ -47,9 +47,9 @@ stack_trace_flush:
        .size           stack_trace_flush,.-stack_trace_flush
 
 #ifdef CONFIG_PERF_EVENTS
-       .globl          perf_arch_fetch_caller_regs
-       .type           perf_arch_fetch_caller_regs,#function
-perf_arch_fetch_caller_regs:
+       .globl          __perf_arch_fetch_caller_regs
+       .type           __perf_arch_fetch_caller_regs,#function
+__perf_arch_fetch_caller_regs:
        /* We always read the %pstate into %o5 since we will use
         * that to construct a fake %tstate to store into the regs.
         */
index 44faabc3c02c920bba1b9f12272d1a907895161e..357ced3c33ffac87a992e01b6820a77084cfb8de 100644 (file)
@@ -572,18 +572,18 @@ static u64 sparc_perf_event_update(struct perf_event *event,
        s64 delta;
 
 again:
-       prev_raw_count = atomic64_read(&hwc->prev_count);
+       prev_raw_count = local64_read(&hwc->prev_count);
        new_raw_count = read_pmc(idx);
 
-       if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count,
+       if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
                             new_raw_count) != prev_raw_count)
                goto again;
 
        delta = (new_raw_count << shift) - (prev_raw_count << shift);
        delta >>= shift;
 
-       atomic64_add(delta, &event->count);
-       atomic64_sub(delta, &hwc->period_left);
+       local64_add(delta, &event->count);
+       local64_sub(delta, &hwc->period_left);
 
        return new_raw_count;
 }
@@ -591,27 +591,27 @@ again:
 static int sparc_perf_event_set_period(struct perf_event *event,
                                       struct hw_perf_event *hwc, int idx)
 {
-       s64 left = atomic64_read(&hwc->period_left);
+       s64 left = local64_read(&hwc->period_left);
        s64 period = hwc->sample_period;
        int ret = 0;
 
        if (unlikely(left <= -period)) {
                left = period;
-               atomic64_set(&hwc->period_left, left);
+               local64_set(&hwc->period_left, left);
                hwc->last_period = period;
                ret = 1;
        }
 
        if (unlikely(left <= 0)) {
                left += period;
-               atomic64_set(&hwc->period_left, left);
+               local64_set(&hwc->period_left, left);
                hwc->last_period = period;
                ret = 1;
        }
        if (left > MAX_PERIOD)
                left = MAX_PERIOD;
 
-       atomic64_set(&hwc->prev_count, (u64)-left);
+       local64_set(&hwc->prev_count, (u64)-left);
 
        write_pmc(idx, (u64)(-left) & 0xffffffff);
 
@@ -1006,7 +1006,7 @@ static int sparc_pmu_enable(struct perf_event *event)
         * skip the schedulability test here, it will be peformed
         * at commit time(->commit_txn) as a whole
         */
-       if (cpuc->group_flag & PERF_EVENT_TXN_STARTED)
+       if (cpuc->group_flag & PERF_EVENT_TXN)
                goto nocheck;
 
        if (check_excludes(cpuc->event, n0, 1))
@@ -1088,7 +1088,7 @@ static int __hw_perf_event_init(struct perf_event *event)
        if (!hwc->sample_period) {
                hwc->sample_period = MAX_PERIOD;
                hwc->last_period = hwc->sample_period;
-               atomic64_set(&hwc->period_left, hwc->sample_period);
+               local64_set(&hwc->period_left, hwc->sample_period);
        }
 
        return 0;
@@ -1103,7 +1103,7 @@ static void sparc_pmu_start_txn(const struct pmu *pmu)
 {
        struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
 
-       cpuhw->group_flag |= PERF_EVENT_TXN_STARTED;
+       cpuhw->group_flag |= PERF_EVENT_TXN;
 }
 
 /*
@@ -1115,7 +1115,7 @@ static void sparc_pmu_cancel_txn(const struct pmu *pmu)
 {
        struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
 
-       cpuhw->group_flag &= ~PERF_EVENT_TXN_STARTED;
+       cpuhw->group_flag &= ~PERF_EVENT_TXN;
 }
 
 /*
@@ -1138,6 +1138,7 @@ static int sparc_pmu_commit_txn(const struct pmu *pmu)
        if (sparc_check_constraints(cpuc->event, cpuc->events, n))
                return -EAGAIN;
 
+       cpuc->group_flag &= ~PERF_EVENT_TXN;
        return 0;
 }
 
index dcb0593b4a66348a204a44fee3366c5da0848ad1..6f77afa6bca92203a338badf7e25a3fe01ab4e57 100644 (file)
@@ -55,6 +55,7 @@ config X86
        select HAVE_HW_BREAKPOINT
        select HAVE_MIXED_BREAKPOINTS_REGS
        select PERF_EVENTS
+       select HAVE_PERF_EVENTS_NMI
        select ANON_INODES
        select HAVE_ARCH_KMEMCHECK
        select HAVE_USER_RETURN_NOTIFIER
index 942255310e6a16391b3e0a5bcbcd59ccafece3d6..528a11e8d3e35f64fea90202d6f196d77d48e708 100644 (file)
@@ -20,10 +20,10 @@ struct arch_hw_breakpoint {
 #include <linux/list.h>
 
 /* Available HW breakpoint length encodings */
+#define X86_BREAKPOINT_LEN_X           0x00
 #define X86_BREAKPOINT_LEN_1           0x40
 #define X86_BREAKPOINT_LEN_2           0x44
 #define X86_BREAKPOINT_LEN_4           0x4c
-#define X86_BREAKPOINT_LEN_EXECUTE     0x40
 
 #ifdef CONFIG_X86_64
 #define X86_BREAKPOINT_LEN_8           0x48
diff --git a/arch/x86/include/asm/local64.h b/arch/x86/include/asm/local64.h
new file mode 100644 (file)
index 0000000..36c93b5
--- /dev/null
@@ -0,0 +1 @@
+#include <asm-generic/local64.h>
index 93da9c3f334120b64165a8124b79b09e71365bf4..932f0f86b4b76252e6e6434ab9c15d81c3b17004 100644 (file)
@@ -17,7 +17,9 @@ int do_nmi_callback(struct pt_regs *regs, int cpu);
 
 extern void die_nmi(char *str, struct pt_regs *regs, int do_panic);
 extern int check_nmi_watchdog(void);
+#if !defined(CONFIG_LOCKUP_DETECTOR)
 extern int nmi_watchdog_enabled;
+#endif
 extern int avail_to_resrv_perfctr_nmi_bit(unsigned int);
 extern int reserve_perfctr_nmi(unsigned int);
 extern void release_perfctr_nmi(unsigned int);
index 254883d0c7e088424ed983154613b4ad889435e1..6e742cc4251b49b2474830107da0378951c874be 100644 (file)
@@ -68,8 +68,9 @@ union cpuid10_eax {
 
 union cpuid10_edx {
        struct {
-               unsigned int num_counters_fixed:4;
-               unsigned int reserved:28;
+               unsigned int num_counters_fixed:5;
+               unsigned int bit_width_fixed:8;
+               unsigned int reserved:19;
        } split;
        unsigned int full;
 };
@@ -140,6 +141,19 @@ extern unsigned long perf_instruction_pointer(struct pt_regs *regs);
 extern unsigned long perf_misc_flags(struct pt_regs *regs);
 #define perf_misc_flags(regs)  perf_misc_flags(regs)
 
+#include <asm/stacktrace.h>
+
+/*
+ * We abuse bit 3 from flags to pass exact information, see perf_misc_flags
+ * and the comment with PERF_EFLAGS_EXACT.
+ */
+#define perf_arch_fetch_caller_regs(regs, __ip)                {       \
+       (regs)->ip = (__ip);                                    \
+       (regs)->bp = caller_frame_pointer();                    \
+       (regs)->cs = __KERNEL_CS;                               \
+       regs->flags = 0;                                        \
+}
+
 #else
 static inline void init_hw_perf_events(void)           { }
 static inline void perf_events_lapic_init(void)        { }
index 64a8ebff06fcef47dc36301e038ea2d63acad27b..def500776b16a3b63d34da569021722e4d82f18a 100644 (file)
@@ -19,7 +19,6 @@
 #define ARCH_P4_RESERVED_ESCR  (2) /* IQ_ESCR(0,1) not always present */
 #define ARCH_P4_MAX_ESCR       (ARCH_P4_TOTAL_ESCR - ARCH_P4_RESERVED_ESCR)
 #define ARCH_P4_MAX_CCCR       (18)
-#define ARCH_P4_MAX_COUNTER    (ARCH_P4_MAX_CCCR / 2)
 
 #define P4_ESCR_EVENT_MASK     0x7e000000U
 #define P4_ESCR_EVENT_SHIFT    25
 #define P4_CCCR_THRESHOLD(v)           ((v) << P4_CCCR_THRESHOLD_SHIFT)
 #define P4_CCCR_ESEL(v)                        ((v) << P4_CCCR_ESCR_SELECT_SHIFT)
 
-/* Custom bits in reerved CCCR area */
-#define P4_CCCR_CACHE_OPS_MASK         0x0000003fU
-
-
 /* Non HT mask */
 #define P4_CCCR_MASK                           \
        (P4_CCCR_OVF                    |       \
  * ESCR and CCCR but rather an only packed value should
  * be unpacked and written to a proper addresses
  *
- * the base idea is to pack as much info as
- * possible
+ * the base idea is to pack as much info as possible
  */
 #define p4_config_pack_escr(v)         (((u64)(v)) << 32)
 #define p4_config_pack_cccr(v)         (((u64)(v)) & 0xffffffffULL)
                t;                                      \
        })
 
-#define p4_config_unpack_cache_event(v)        (((u64)(v)) & P4_CCCR_CACHE_OPS_MASK)
-
 #define P4_CONFIG_HT_SHIFT             63
 #define P4_CONFIG_HT                   (1ULL << P4_CONFIG_HT_SHIFT)
 
@@ -214,6 +206,12 @@ static inline u32 p4_default_escr_conf(int cpu, int exclude_os, int exclude_usr)
        return escr;
 }
 
+/*
+ * This are the events which should be used in "Event Select"
+ * field of ESCR register, they are like unique keys which allow
+ * the kernel to determinate which CCCR and COUNTER should be
+ * used to track an event
+ */
 enum P4_EVENTS {
        P4_EVENT_TC_DELIVER_MODE,
        P4_EVENT_BPU_FETCH_REQUEST,
@@ -561,7 +559,7 @@ enum P4_EVENT_OPCODES {
  * a caller should use P4_ESCR_EMASK_NAME helper to
  * pick the EventMask needed, for example
  *
- *     P4_ESCR_EMASK_NAME(P4_EVENT_TC_DELIVER_MODE, DD)
+ *     P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, DD)
  */
 enum P4_ESCR_EMASKS {
        P4_GEN_ESCR_EMASK(P4_EVENT_TC_DELIVER_MODE, DD, 0),
@@ -753,43 +751,50 @@ enum P4_ESCR_EMASKS {
        P4_GEN_ESCR_EMASK(P4_EVENT_INSTR_COMPLETED, BOGUS, 1),
 };
 
-/* P4 PEBS: stale for a while */
-#define P4_PEBS_METRIC_MASK    0x00001fffU
-#define P4_PEBS_UOB_TAG                0x01000000U
-#define P4_PEBS_ENABLE         0x02000000U
-
-/* Replay metrics for MSR_IA32_PEBS_ENABLE and MSR_P4_PEBS_MATRIX_VERT */
-#define P4_PEBS__1stl_cache_load_miss_retired  0x3000001
-#define P4_PEBS__2ndl_cache_load_miss_retired  0x3000002
-#define P4_PEBS__dtlb_load_miss_retired                0x3000004
-#define P4_PEBS__dtlb_store_miss_retired       0x3000004
-#define P4_PEBS__dtlb_all_miss_retired         0x3000004
-#define P4_PEBS__tagged_mispred_branch         0x3018000
-#define P4_PEBS__mob_load_replay_retired       0x3000200
-#define P4_PEBS__split_load_retired            0x3000400
-#define P4_PEBS__split_store_retired           0x3000400
-
-#define P4_VERT__1stl_cache_load_miss_retired  0x0000001
-#define P4_VERT__2ndl_cache_load_miss_retired  0x0000001
-#define P4_VERT__dtlb_load_miss_retired                0x0000001
-#define P4_VERT__dtlb_store_miss_retired       0x0000002
-#define P4_VERT__dtlb_all_miss_retired         0x0000003
-#define P4_VERT__tagged_mispred_branch         0x0000010
-#define P4_VERT__mob_load_replay_retired       0x0000001
-#define P4_VERT__split_load_retired            0x0000001
-#define P4_VERT__split_store_retired           0x0000002
-
-enum P4_CACHE_EVENTS {
-       P4_CACHE__NONE,
-
-       P4_CACHE__1stl_cache_load_miss_retired,
-       P4_CACHE__2ndl_cache_load_miss_retired,
-       P4_CACHE__dtlb_load_miss_retired,
-       P4_CACHE__dtlb_store_miss_retired,
-       P4_CACHE__itlb_reference_hit,
-       P4_CACHE__itlb_reference_miss,
-
-       P4_CACHE__MAX
+/*
+ * P4 PEBS specifics (Replay Event only)
+ *
+ * Format (bits):
+ *   0-6: metric from P4_PEBS_METRIC enum
+ *    7 : reserved
+ *    8 : reserved
+ * 9-11 : reserved
+ *
+ * Note we have UOP and PEBS bits reserved for now
+ * just in case if we will need them once
+ */
+#define P4_PEBS_CONFIG_ENABLE          (1 << 7)
+#define P4_PEBS_CONFIG_UOP_TAG         (1 << 8)
+#define P4_PEBS_CONFIG_METRIC_MASK     0x3f
+#define P4_PEBS_CONFIG_MASK            0xff
+
+/*
+ * mem: Only counters MSR_IQ_COUNTER4 (16) and
+ * MSR_IQ_COUNTER5 (17) are allowed for PEBS sampling
+ */
+#define P4_PEBS_ENABLE                 0x02000000U
+#define P4_PEBS_ENABLE_UOP_TAG         0x01000000U
+
+#define p4_config_unpack_metric(v)     (((u64)(v)) & P4_PEBS_CONFIG_METRIC_MASK)
+#define p4_config_unpack_pebs(v)       (((u64)(v)) & P4_PEBS_CONFIG_MASK)
+
+#define p4_config_pebs_has(v, mask)    (p4_config_unpack_pebs(v) & (mask))
+
+enum P4_PEBS_METRIC {
+       P4_PEBS_METRIC__none,
+
+       P4_PEBS_METRIC__1stl_cache_load_miss_retired,
+       P4_PEBS_METRIC__2ndl_cache_load_miss_retired,
+       P4_PEBS_METRIC__dtlb_load_miss_retired,
+       P4_PEBS_METRIC__dtlb_store_miss_retired,
+       P4_PEBS_METRIC__dtlb_all_miss_retired,
+       P4_PEBS_METRIC__tagged_mispred_branch,
+       P4_PEBS_METRIC__mob_load_replay_retired,
+       P4_PEBS_METRIC__split_load_retired,
+       P4_PEBS_METRIC__split_store_retired,
+
+       P4_PEBS_METRIC__max
 };
 
 #endif /* PERF_EVENT_P4_H */
+
index 4dab78edbad9ff7315a8c30aebeb734f4e87da1e..2b16a2ad23dc6b9647028c0808f8f45b094e74ac 100644 (file)
@@ -1,6 +1,13 @@
+/*
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ *  Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs
+ */
+
 #ifndef _ASM_X86_STACKTRACE_H
 #define _ASM_X86_STACKTRACE_H
 
+#include <linux/uaccess.h>
+
 extern int kstack_depth_to_print;
 
 struct thread_info;
@@ -42,4 +49,46 @@ void dump_trace(struct task_struct *tsk, struct pt_regs *regs,
                unsigned long *stack, unsigned long bp,
                const struct stacktrace_ops *ops, void *data);
 
+#ifdef CONFIG_X86_32
+#define STACKSLOTS_PER_LINE 8
+#define get_bp(bp) asm("movl %%ebp, %0" : "=r" (bp) :)
+#else
+#define STACKSLOTS_PER_LINE 4
+#define get_bp(bp) asm("movq %%rbp, %0" : "=r" (bp) :)
+#endif
+
+extern void
+show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
+               unsigned long *stack, unsigned long bp, char *log_lvl);
+
+extern void
+show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
+               unsigned long *sp, unsigned long bp, char *log_lvl);
+
+extern unsigned int code_bytes;
+
+/* The form of the top of the frame on the stack */
+struct stack_frame {
+       struct stack_frame *next_frame;
+       unsigned long return_address;
+};
+
+struct stack_frame_ia32 {
+    u32 next_frame;
+    u32 return_address;
+};
+
+static inline unsigned long caller_frame_pointer(void)
+{
+       struct stack_frame *frame;
+
+       get_bp(frame);
+
+#ifdef CONFIG_FRAME_POINTER
+       frame = frame->next_frame;
+#endif
+
+       return (unsigned long)frame;
+}
+
 #endif /* _ASM_X86_STACKTRACE_H */
index 565c1bfc507d41b8387eea7cc7e6ffd22f3f114a..910f20b457c464d34f1e9874269d652ea0da325e 100644 (file)
@@ -2,7 +2,12 @@
 # Makefile for local APIC drivers and for the IO-APIC code
 #
 
-obj-$(CONFIG_X86_LOCAL_APIC)   += apic.o apic_noop.o probe_$(BITS).o ipi.o nmi.o
+obj-$(CONFIG_X86_LOCAL_APIC)   += apic.o apic_noop.o probe_$(BITS).o ipi.o
+ifneq ($(CONFIG_HARDLOCKUP_DETECTOR),y)
+obj-$(CONFIG_X86_LOCAL_APIC)   += nmi.o
+endif
+obj-$(CONFIG_HARDLOCKUP_DETECTOR)      += hw_nmi.o
+
 obj-$(CONFIG_X86_IO_APIC)      += io_apic.o
 obj-$(CONFIG_SMP)              += ipi.o
 
diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c
new file mode 100644 (file)
index 0000000..cefd694
--- /dev/null
@@ -0,0 +1,107 @@
+/*
+ *  HW NMI watchdog support
+ *
+ *  started by Don Zickus, Copyright (C) 2010 Red Hat, Inc.
+ *
+ *  Arch specific calls to support NMI watchdog
+ *
+ *  Bits copied from original nmi.c file
+ *
+ */
+#include <asm/apic.h>
+
+#include <linux/cpumask.h>
+#include <linux/kdebug.h>
+#include <linux/notifier.h>
+#include <linux/kprobes.h>
+#include <linux/nmi.h>
+#include <linux/module.h>
+
+/* For reliability, we're prepared to waste bits here. */
+static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly;
+
+u64 hw_nmi_get_sample_period(void)
+{
+       return (u64)(cpu_khz) * 1000 * 60;
+}
+
+#ifdef ARCH_HAS_NMI_WATCHDOG
+void arch_trigger_all_cpu_backtrace(void)
+{
+       int i;
+
+       cpumask_copy(to_cpumask(backtrace_mask), cpu_online_mask);
+
+       printk(KERN_INFO "sending NMI to all CPUs:\n");
+       apic->send_IPI_all(NMI_VECTOR);
+
+       /* Wait for up to 10 seconds for all CPUs to do the backtrace */
+       for (i = 0; i < 10 * 1000; i++) {
+               if (cpumask_empty(to_cpumask(backtrace_mask)))
+                       break;
+               mdelay(1);
+       }
+}
+
+static int __kprobes
+arch_trigger_all_cpu_backtrace_handler(struct notifier_block *self,
+                        unsigned long cmd, void *__args)
+{
+       struct die_args *args = __args;
+       struct pt_regs *regs;
+       int cpu = smp_processor_id();
+
+       switch (cmd) {
+       case DIE_NMI:
+       case DIE_NMI_IPI:
+               break;
+
+       default:
+               return NOTIFY_DONE;
+       }
+
+       regs = args->regs;
+
+       if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) {
+               static arch_spinlock_t lock = __ARCH_SPIN_LOCK_UNLOCKED;
+
+               arch_spin_lock(&lock);
+               printk(KERN_WARNING "NMI backtrace for cpu %d\n", cpu);
+               show_regs(regs);
+               dump_stack();
+               arch_spin_unlock(&lock);
+               cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask));
+               return NOTIFY_STOP;
+       }
+
+       return NOTIFY_DONE;
+}
+
+static __read_mostly struct notifier_block backtrace_notifier = {
+       .notifier_call          = arch_trigger_all_cpu_backtrace_handler,
+       .next                   = NULL,
+       .priority               = 1
+};
+
+static int __init register_trigger_all_cpu_backtrace(void)
+{
+       register_die_notifier(&backtrace_notifier);
+       return 0;
+}
+early_initcall(register_trigger_all_cpu_backtrace);
+#endif
+
+/* STUB calls to mimic old nmi_watchdog behaviour */
+#if defined(CONFIG_X86_LOCAL_APIC)
+unsigned int nmi_watchdog = NMI_NONE;
+EXPORT_SYMBOL(nmi_watchdog);
+void acpi_nmi_enable(void) { return; }
+void acpi_nmi_disable(void) { return; }
+#endif
+atomic_t nmi_active = ATOMIC_INIT(0);           /* oprofile uses this */
+EXPORT_SYMBOL(nmi_active);
+int unknown_nmi_panic;
+void cpu_nmi_set_wd_enabled(void) { return; }
+void stop_apic_nmi_watchdog(void *unused) { return; }
+void setup_apic_nmi_watchdog(void *unused) { return; }
+int __init check_nmi_watchdog(void) { return 0; }
index 1edaf15c0b8eef05b36653a0ed9087a9b3717f9e..a43f71cb30f8709595fe77c80be19d36c24aae29 100644 (file)
@@ -401,13 +401,6 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason)
        int cpu = smp_processor_id();
        int rc = 0;
 
-       /* check for other users first */
-       if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT)
-                       == NOTIFY_STOP) {
-               rc = 1;
-               touched = 1;
-       }
-
        sum = get_timer_irqs(cpu);
 
        if (__get_cpu_var(nmi_touch)) {
index 5db5b7d65a180f6a7f0c2cb970d63e04129add2a..f2da20fda02ddf6fcd449a88ba399fe4ed44af2a 100644 (file)
@@ -220,6 +220,7 @@ struct x86_pmu {
                                                 struct perf_event *event);
        struct event_constraint *event_constraints;
        void            (*quirks)(void);
+       int             perfctr_second_write;
 
        int             (*cpu_prepare)(int cpu);
        void            (*cpu_starting)(int cpu);
@@ -295,10 +296,10 @@ x86_perf_event_update(struct perf_event *event)
         * count to the generic event atomically:
         */
 again:
-       prev_raw_count = atomic64_read(&hwc->prev_count);
+       prev_raw_count = local64_read(&hwc->prev_count);
        rdmsrl(hwc->event_base + idx, new_raw_count);
 
-       if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count,
+       if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
                                        new_raw_count) != prev_raw_count)
                goto again;
 
@@ -313,8 +314,8 @@ again:
        delta = (new_raw_count << shift) - (prev_raw_count << shift);
        delta >>= shift;
 
-       atomic64_add(delta, &event->count);
-       atomic64_sub(delta, &hwc->period_left);
+       local64_add(delta, &event->count);
+       local64_sub(delta, &hwc->period_left);
 
        return new_raw_count;
 }
@@ -438,7 +439,7 @@ static int x86_setup_perfctr(struct perf_event *event)
        if (!hwc->sample_period) {
                hwc->sample_period = x86_pmu.max_period;
                hwc->last_period = hwc->sample_period;
-               atomic64_set(&hwc->period_left, hwc->sample_period);
+               local64_set(&hwc->period_left, hwc->sample_period);
        } else {
                /*
                 * If we have a PMU initialized but no APIC
@@ -885,7 +886,7 @@ static int
 x86_perf_event_set_period(struct perf_event *event)
 {
        struct hw_perf_event *hwc = &event->hw;
-       s64 left = atomic64_read(&hwc->period_left);
+       s64 left = local64_read(&hwc->period_left);
        s64 period = hwc->sample_period;
        int ret = 0, idx = hwc->idx;
 
@@ -897,14 +898,14 @@ x86_perf_event_set_period(struct perf_event *event)
         */
        if (unlikely(left <= -period)) {
                left = period;
-               atomic64_set(&hwc->period_left, left);
+               local64_set(&hwc->period_left, left);
                hwc->last_period = period;
                ret = 1;
        }
 
        if (unlikely(left <= 0)) {
                left += period;
-               atomic64_set(&hwc->period_left, left);
+               local64_set(&hwc->period_left, left);
                hwc->last_period = period;
                ret = 1;
        }
@@ -923,10 +924,19 @@ x86_perf_event_set_period(struct perf_event *event)
         * The hw event starts counting from this event offset,
         * mark it to be able to extra future deltas:
         */
-       atomic64_set(&hwc->prev_count, (u64)-left);
+       local64_set(&hwc->prev_count, (u64)-left);
 
-       wrmsrl(hwc->event_base + idx,
+       wrmsrl(hwc->event_base + idx, (u64)(-left) & x86_pmu.cntval_mask);
+
+       /*
+        * Due to erratum on certan cpu we need
+        * a second write to be sure the register
+        * is updated properly
+        */
+       if (x86_pmu.perfctr_second_write) {
+               wrmsrl(hwc->event_base + idx,
                        (u64)(-left) & x86_pmu.cntval_mask);
+       }
 
        perf_event_update_userpage(event);
 
@@ -969,7 +979,7 @@ static int x86_pmu_enable(struct perf_event *event)
         * skip the schedulability test here, it will be peformed
         * at commit time(->commit_txn) as a whole
         */
-       if (cpuc->group_flag & PERF_EVENT_TXN_STARTED)
+       if (cpuc->group_flag & PERF_EVENT_TXN)
                goto out;
 
        ret = x86_pmu.schedule_events(cpuc, n, assign);
@@ -1096,7 +1106,7 @@ static void x86_pmu_disable(struct perf_event *event)
         * The events never got scheduled and ->cancel_txn will truncate
         * the event_list.
         */
-       if (cpuc->group_flag & PERF_EVENT_TXN_STARTED)
+       if (cpuc->group_flag & PERF_EVENT_TXN)
                return;
 
        x86_pmu_stop(event);
@@ -1388,7 +1398,7 @@ static void x86_pmu_start_txn(const struct pmu *pmu)
 {
        struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 
-       cpuc->group_flag |= PERF_EVENT_TXN_STARTED;
+       cpuc->group_flag |= PERF_EVENT_TXN;
        cpuc->n_txn = 0;
 }
 
@@ -1401,7 +1411,7 @@ static void x86_pmu_cancel_txn(const struct pmu *pmu)
 {
        struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 
-       cpuc->group_flag &= ~PERF_EVENT_TXN_STARTED;
+       cpuc->group_flag &= ~PERF_EVENT_TXN;
        /*
         * Truncate the collected events.
         */
@@ -1435,11 +1445,7 @@ static int x86_pmu_commit_txn(const struct pmu *pmu)
         */
        memcpy(cpuc->assign, assign, n*sizeof(int));
 
-       /*
-        * Clear out the txn count so that ->cancel_txn() which gets
-        * run after ->commit_txn() doesn't undo things.
-        */
-       cpuc->n_txn = 0;
+       cpuc->group_flag &= ~PERF_EVENT_TXN;
 
        return 0;
 }
@@ -1607,8 +1613,6 @@ static const struct stacktrace_ops backtrace_ops = {
        .walk_stack             = print_context_stack_bp,
 };
 
-#include "../dumpstack.h"
-
 static void
 perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry)
 {
@@ -1730,22 +1734,6 @@ struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
        return entry;
 }
 
-void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int skip)
-{
-       regs->ip = ip;
-       /*
-        * perf_arch_fetch_caller_regs adds another call, we need to increment
-        * the skip level
-        */
-       regs->bp = rewind_frame_pointer(skip + 1);
-       regs->cs = __KERNEL_CS;
-       /*
-        * We abuse bit 3 to pass exact information, see perf_misc_flags
-        * and the comment with PERF_EFLAGS_EXACT.
-        */
-       regs->flags = 0;
-}
-
 unsigned long perf_instruction_pointer(struct pt_regs *regs)
 {
        unsigned long ip;
index ae85d69644d182f31c9711eba6c5ddb21862bcec..107711bf0ee8f9ff05e886b2143f1f69123d0d41 100644 (file)
@@ -21,22 +21,36 @@ struct p4_event_bind {
        char cntr[2][P4_CNTR_LIMIT];            /* counter index (offset), -1 on abscence */
 };
 
-struct p4_cache_event_bind {
+struct p4_pebs_bind {
        unsigned int metric_pebs;
        unsigned int metric_vert;
 };
 
-#define P4_GEN_CACHE_EVENT_BIND(name)          \
-       [P4_CACHE__##name] = {                  \
-               .metric_pebs = P4_PEBS__##name, \
-               .metric_vert = P4_VERT__##name, \
+/* it sets P4_PEBS_ENABLE_UOP_TAG as well */
+#define P4_GEN_PEBS_BIND(name, pebs, vert)                     \
+       [P4_PEBS_METRIC__##name] = {                            \
+               .metric_pebs = pebs | P4_PEBS_ENABLE_UOP_TAG,   \
+               .metric_vert = vert,                            \
        }
 
-static struct p4_cache_event_bind p4_cache_event_bind_map[] = {
-       P4_GEN_CACHE_EVENT_BIND(1stl_cache_load_miss_retired),
-       P4_GEN_CACHE_EVENT_BIND(2ndl_cache_load_miss_retired),
-       P4_GEN_CACHE_EVENT_BIND(dtlb_load_miss_retired),
-       P4_GEN_CACHE_EVENT_BIND(dtlb_store_miss_retired),
+/*
+ * note we have P4_PEBS_ENABLE_UOP_TAG always set here
+ *
+ * it's needed for mapping P4_PEBS_CONFIG_METRIC_MASK bits of
+ * event configuration to find out which values are to be
+ * written into MSR_IA32_PEBS_ENABLE and MSR_P4_PEBS_MATRIX_VERT
+ * resgisters
+ */
+static struct p4_pebs_bind p4_pebs_bind_map[] = {
+       P4_GEN_PEBS_BIND(1stl_cache_load_miss_retired,  0x0000001, 0x0000001),
+       P4_GEN_PEBS_BIND(2ndl_cache_load_miss_retired,  0x0000002, 0x0000001),
+       P4_GEN_PEBS_BIND(dtlb_load_miss_retired,        0x0000004, 0x0000001),
+       P4_GEN_PEBS_BIND(dtlb_store_miss_retired,       0x0000004, 0x0000002),
+       P4_GEN_PEBS_BIND(dtlb_all_miss_retired,         0x0000004, 0x0000003),
+       P4_GEN_PEBS_BIND(tagged_mispred_branch,         0x0018000, 0x0000010),
+       P4_GEN_PEBS_BIND(mob_load_replay_retired,       0x0000200, 0x0000001),
+       P4_GEN_PEBS_BIND(split_load_retired,            0x0000400, 0x0000001),
+       P4_GEN_PEBS_BIND(split_store_retired,           0x0000400, 0x0000002),
 };
 
 /*
@@ -281,10 +295,10 @@ static struct p4_event_bind p4_event_bind_map[] = {
        },
 };
 
-#define P4_GEN_CACHE_EVENT(event, bit, cache_event)                      \
+#define P4_GEN_CACHE_EVENT(event, bit, metric)                           \
        p4_config_pack_escr(P4_ESCR_EVENT(event)                        | \
                            P4_ESCR_EMASK_BIT(event, bit))              | \
-       p4_config_pack_cccr(cache_event                                 | \
+       p4_config_pack_cccr(metric                                      | \
                            P4_CCCR_ESEL(P4_OPCODE_ESEL(P4_OPCODE(event))))
 
 static __initconst const u64 p4_hw_cache_event_ids
@@ -296,34 +310,34 @@ static __initconst const u64 p4_hw_cache_event_ids
        [ C(OP_READ) ] = {
                [ C(RESULT_ACCESS) ] = 0x0,
                [ C(RESULT_MISS)   ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS,
-                                               P4_CACHE__1stl_cache_load_miss_retired),
+                                               P4_PEBS_METRIC__1stl_cache_load_miss_retired),
        },
  },
  [ C(LL  ) ] = {
        [ C(OP_READ) ] = {
                [ C(RESULT_ACCESS) ] = 0x0,
                [ C(RESULT_MISS)   ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS,
-                                               P4_CACHE__2ndl_cache_load_miss_retired),
+                                               P4_PEBS_METRIC__2ndl_cache_load_miss_retired),
        },
 },
  [ C(DTLB) ] = {
        [ C(OP_READ) ] = {
                [ C(RESULT_ACCESS) ] = 0x0,
                [ C(RESULT_MISS)   ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS,
-                                               P4_CACHE__dtlb_load_miss_retired),
+                                               P4_PEBS_METRIC__dtlb_load_miss_retired),
        },
        [ C(OP_WRITE) ] = {
                [ C(RESULT_ACCESS) ] = 0x0,
                [ C(RESULT_MISS)   ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS,
-                                               P4_CACHE__dtlb_store_miss_retired),
+                                               P4_PEBS_METRIC__dtlb_store_miss_retired),
        },
  },
  [ C(ITLB) ] = {
        [ C(OP_READ) ] = {
                [ C(RESULT_ACCESS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_ITLB_REFERENCE, HIT,
-                                               P4_CACHE__itlb_reference_hit),
+                                               P4_PEBS_METRIC__none),
                [ C(RESULT_MISS)   ] = P4_GEN_CACHE_EVENT(P4_EVENT_ITLB_REFERENCE, MISS,
-                                               P4_CACHE__itlb_reference_miss),
+                                               P4_PEBS_METRIC__none),
        },
        [ C(OP_WRITE) ] = {
                [ C(RESULT_ACCESS) ] = -1,
@@ -414,11 +428,37 @@ static u64 p4_pmu_event_map(int hw_event)
        return config;
 }
 
+static int p4_validate_raw_event(struct perf_event *event)
+{
+       unsigned int v;
+
+       /* user data may have out-of-bound event index */
+       v = p4_config_unpack_event(event->attr.config);
+       if (v >= ARRAY_SIZE(p4_event_bind_map)) {
+               pr_warning("P4 PMU: Unknown event code: %d\n", v);
+               return -EINVAL;
+       }
+
+       /*
+        * it may have some screwed PEBS bits
+        */
+       if (p4_config_pebs_has(event->attr.config, P4_PEBS_CONFIG_ENABLE)) {
+               pr_warning("P4 PMU: PEBS are not supported yet\n");
+               return -EINVAL;
+       }
+       v = p4_config_unpack_metric(event->attr.config);
+       if (v >= ARRAY_SIZE(p4_pebs_bind_map)) {
+               pr_warning("P4 PMU: Unknown metric code: %d\n", v);
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
 static int p4_hw_config(struct perf_event *event)
 {
        int cpu = get_cpu();
        int rc = 0;
-       unsigned int evnt;
        u32 escr, cccr;
 
        /*
@@ -438,12 +478,9 @@ static int p4_hw_config(struct perf_event *event)
 
        if (event->attr.type == PERF_TYPE_RAW) {
 
-               /* user data may have out-of-bound event index */
-               evnt = p4_config_unpack_event(event->attr.config);
-               if (evnt >= ARRAY_SIZE(p4_event_bind_map)) {
-                       rc = -EINVAL;
+               rc = p4_validate_raw_event(event);
+               if (rc)
                        goto out;
-               }
 
                /*
                 * We don't control raw events so it's up to the caller
@@ -451,12 +488,15 @@ static int p4_hw_config(struct perf_event *event)
                 * on HT machine but allow HT-compatible specifics to be
                 * passed on)
                 *
+                * Note that for RAW events we allow user to use P4_CCCR_RESERVED
+                * bits since we keep additional info here (for cache events and etc)
+                *
                 * XXX: HT wide things should check perf_paranoid_cpu() &&
                 *      CAP_SYS_ADMIN
                 */
                event->hw.config |= event->attr.config &
                        (p4_config_pack_escr(P4_ESCR_MASK_HT) |
-                        p4_config_pack_cccr(P4_CCCR_MASK_HT));
+                        p4_config_pack_cccr(P4_CCCR_MASK_HT | P4_CCCR_RESERVED));
        }
 
        rc = x86_setup_perfctr(event);
@@ -482,6 +522,29 @@ static inline int p4_pmu_clear_cccr_ovf(struct hw_perf_event *hwc)
        return overflow;
 }
 
+static void p4_pmu_disable_pebs(void)
+{
+       /*
+        * FIXME
+        *
+        * It's still allowed that two threads setup same cache
+        * events so we can't simply clear metrics until we knew
+        * noone is depending on us, so we need kind of counter
+        * for "ReplayEvent" users.
+        *
+        * What is more complex -- RAW events, if user (for some
+        * reason) will pass some cache event metric with improper
+        * event opcode -- it's fine from hardware point of view
+        * but completely nonsence from "meaning" of such action.
+        *
+        * So at moment let leave metrics turned on forever -- it's
+        * ok for now but need to be revisited!
+        *
+        * (void)checking_wrmsrl(MSR_IA32_PEBS_ENABLE, (u64)0);
+        * (void)checking_wrmsrl(MSR_P4_PEBS_MATRIX_VERT, (u64)0);
+        */
+}
+
 static inline void p4_pmu_disable_event(struct perf_event *event)
 {
        struct hw_perf_event *hwc = &event->hw;
@@ -507,6 +570,26 @@ static void p4_pmu_disable_all(void)
                        continue;
                p4_pmu_disable_event(event);
        }
+
+       p4_pmu_disable_pebs();
+}
+
+/* configuration must be valid */
+static void p4_pmu_enable_pebs(u64 config)
+{
+       struct p4_pebs_bind *bind;
+       unsigned int idx;
+
+       BUILD_BUG_ON(P4_PEBS_METRIC__max > P4_PEBS_CONFIG_METRIC_MASK);
+
+       idx = p4_config_unpack_metric(config);
+       if (idx == P4_PEBS_METRIC__none)
+               return;
+
+       bind = &p4_pebs_bind_map[idx];
+
+       (void)checking_wrmsrl(MSR_IA32_PEBS_ENABLE,     (u64)bind->metric_pebs);
+       (void)checking_wrmsrl(MSR_P4_PEBS_MATRIX_VERT,  (u64)bind->metric_vert);
 }
 
 static void p4_pmu_enable_event(struct perf_event *event)
@@ -515,9 +598,7 @@ static void p4_pmu_enable_event(struct perf_event *event)
        int thread = p4_ht_config_thread(hwc->config);
        u64 escr_conf = p4_config_unpack_escr(p4_clear_ht_bit(hwc->config));
        unsigned int idx = p4_config_unpack_event(hwc->config);
-       unsigned int idx_cache = p4_config_unpack_cache_event(hwc->config);
        struct p4_event_bind *bind;
-       struct p4_cache_event_bind *bind_cache;
        u64 escr_addr, cccr;
 
        bind = &p4_event_bind_map[idx];
@@ -537,16 +618,10 @@ static void p4_pmu_enable_event(struct perf_event *event)
        cccr = p4_config_unpack_cccr(hwc->config);
 
        /*
-        * it could be Cache event so that we need to
-        * set metrics into additional MSRs
+        * it could be Cache event so we need to write metrics
+        * into additional MSRs
         */
-       BUILD_BUG_ON(P4_CACHE__MAX > P4_CCCR_CACHE_OPS_MASK);
-       if (idx_cache > P4_CACHE__NONE &&
-               idx_cache < ARRAY_SIZE(p4_cache_event_bind_map)) {
-               bind_cache = &p4_cache_event_bind_map[idx_cache];
-               (void)checking_wrmsrl(MSR_IA32_PEBS_ENABLE, (u64)bind_cache->metric_pebs);
-               (void)checking_wrmsrl(MSR_P4_PEBS_MATRIX_VERT, (u64)bind_cache->metric_vert);
-       }
+       p4_pmu_enable_pebs(hwc->config);
 
        (void)checking_wrmsrl(escr_addr, escr_conf);
        (void)checking_wrmsrl(hwc->config_base + hwc->idx,
@@ -829,6 +904,15 @@ static __initconst const struct x86_pmu p4_pmu = {
        .max_period             = (1ULL << 39) - 1,
        .hw_config              = p4_hw_config,
        .schedule_events        = p4_pmu_schedule_events,
+       /*
+        * This handles erratum N15 in intel doc 249199-029,
+        * the counter may not be updated correctly on write
+        * so we need a second write operation to do the trick
+        * (the official workaround didn't work)
+        *
+        * the former idea is taken from OProfile code
+        */
+       .perfctr_second_write   = 1,
 };
 
 static __init int p4_pmu_init(void)
index c89a386930b7f4d9bc9c74dc0fa0151056860493..6e8752c1bd5241fc9e7e63ee088f06c84d0526fb 100644 (file)
@@ -18,7 +18,6 @@
 
 #include <asm/stacktrace.h>
 
-#include "dumpstack.h"
 
 int panic_on_unrecovered_nmi;
 int panic_on_io_nmi;
diff --git a/arch/x86/kernel/dumpstack.h b/arch/x86/kernel/dumpstack.h
deleted file mode 100644 (file)
index e1a93be..0000000
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- *  Copyright (C) 1991, 1992  Linus Torvalds
- *  Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs
- */
-
-#ifndef DUMPSTACK_H
-#define DUMPSTACK_H
-
-#ifdef CONFIG_X86_32
-#define STACKSLOTS_PER_LINE 8
-#define get_bp(bp) asm("movl %%ebp, %0" : "=r" (bp) :)
-#else
-#define STACKSLOTS_PER_LINE 4
-#define get_bp(bp) asm("movq %%rbp, %0" : "=r" (bp) :)
-#endif
-
-#include <linux/uaccess.h>
-
-extern void
-show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
-               unsigned long *stack, unsigned long bp, char *log_lvl);
-
-extern void
-show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
-               unsigned long *sp, unsigned long bp, char *log_lvl);
-
-extern unsigned int code_bytes;
-
-/* The form of the top of the frame on the stack */
-struct stack_frame {
-       struct stack_frame *next_frame;
-       unsigned long return_address;
-};
-
-struct stack_frame_ia32 {
-    u32 next_frame;
-    u32 return_address;
-};
-
-static inline unsigned long rewind_frame_pointer(int n)
-{
-       struct stack_frame *frame;
-
-       get_bp(frame);
-
-#ifdef CONFIG_FRAME_POINTER
-       while (n--) {
-               if (probe_kernel_address(&frame->next_frame, frame))
-                       break;
-       }
-#endif
-
-       return (unsigned long)frame;
-}
-
-#endif /* DUMPSTACK_H */
index 11540a189d9311e6a0fb4c44e685fc82799fdd0e..0f6376ffa2d9b6da338a6145c38f8b3307fa6e36 100644 (file)
@@ -16,8 +16,6 @@
 
 #include <asm/stacktrace.h>
 
-#include "dumpstack.h"
-
 
 void dump_trace(struct task_struct *task, struct pt_regs *regs,
                unsigned long *stack, unsigned long bp,
index 272c9f1f05f31bf20492dfb6be35bed716be26b1..57a21f11c791b38a2b88559349cd041935e262da 100644 (file)
@@ -16,7 +16,6 @@
 
 #include <asm/stacktrace.h>
 
-#include "dumpstack.h"
 
 #define N_EXCEPTION_STACKS_END \
                (N_EXCEPTION_STACKS + DEBUG_STKSZ/EXCEPTION_STKSZ - 2)
index a8f1b803d2fd916c7aacf4952ff33b620f790852..a474ec37c32f84df372d39eac5730532d60d0228 100644 (file)
@@ -208,6 +208,9 @@ int arch_bp_generic_fields(int x86_len, int x86_type,
 {
        /* Len */
        switch (x86_len) {
+       case X86_BREAKPOINT_LEN_X:
+               *gen_len = sizeof(long);
+               break;
        case X86_BREAKPOINT_LEN_1:
                *gen_len = HW_BREAKPOINT_LEN_1;
                break;
@@ -251,6 +254,29 @@ static int arch_build_bp_info(struct perf_event *bp)
 
        info->address = bp->attr.bp_addr;
 
+       /* Type */
+       switch (bp->attr.bp_type) {
+       case HW_BREAKPOINT_W:
+               info->type = X86_BREAKPOINT_WRITE;
+               break;
+       case HW_BREAKPOINT_W | HW_BREAKPOINT_R:
+               info->type = X86_BREAKPOINT_RW;
+               break;
+       case HW_BREAKPOINT_X:
+               info->type = X86_BREAKPOINT_EXECUTE;
+               /*
+                * x86 inst breakpoints need to have a specific undefined len.
+                * But we still need to check userspace is not trying to setup
+                * an unsupported length, to get a range breakpoint for example.
+                */
+               if (bp->attr.bp_len == sizeof(long)) {
+                       info->len = X86_BREAKPOINT_LEN_X;
+                       return 0;
+               }
+       default:
+               return -EINVAL;
+       }
+
        /* Len */
        switch (bp->attr.bp_len) {
        case HW_BREAKPOINT_LEN_1:
@@ -271,21 +297,6 @@ static int arch_build_bp_info(struct perf_event *bp)
                return -EINVAL;
        }
 
-       /* Type */
-       switch (bp->attr.bp_type) {
-       case HW_BREAKPOINT_W:
-               info->type = X86_BREAKPOINT_WRITE;
-               break;
-       case HW_BREAKPOINT_W | HW_BREAKPOINT_R:
-               info->type = X86_BREAKPOINT_RW;
-               break;
-       case HW_BREAKPOINT_X:
-               info->type = X86_BREAKPOINT_EXECUTE;
-               break;
-       default:
-               return -EINVAL;
-       }
-
        return 0;
 }
 /*
@@ -305,6 +316,9 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp)
        ret = -EINVAL;
 
        switch (info->len) {
+       case X86_BREAKPOINT_LEN_X:
+               align = sizeof(long) -1;
+               break;
        case X86_BREAKPOINT_LEN_1:
                align = 0;
                break;
@@ -466,6 +480,13 @@ static int __kprobes hw_breakpoint_handler(struct die_args *args)
 
                perf_bp_event(bp, args->regs);
 
+               /*
+                * Set up resume flag to avoid breakpoint recursion when
+                * returning back to origin.
+                */
+               if (bp->hw.info.type == X86_BREAKPOINT_EXECUTE)
+                       args->regs->flags |= X86_EFLAGS_RF;
+
                rcu_read_unlock();
        }
        /*
index 675879b65ce666c91b868c96972ea35f107810f4..1bfb6cf4dd55d67aeeebbbf89d0bb60283a94c94 100644 (file)
@@ -126,16 +126,22 @@ static void __kprobes synthesize_reljump(void *from, void *to)
 }
 
 /*
- * Check for the REX prefix which can only exist on X86_64
- * X86_32 always returns 0
+ * Skip the prefixes of the instruction.
  */
-static int __kprobes is_REX_prefix(kprobe_opcode_t *insn)
+static kprobe_opcode_t *__kprobes skip_prefixes(kprobe_opcode_t *insn)
 {
+       insn_attr_t attr;
+
+       attr = inat_get_opcode_attribute((insn_byte_t)*insn);
+       while (inat_is_legacy_prefix(attr)) {
+               insn++;
+               attr = inat_get_opcode_attribute((insn_byte_t)*insn);
+       }
 #ifdef CONFIG_X86_64
-       if ((*insn & 0xf0) == 0x40)
-               return 1;
+       if (inat_is_rex_prefix(attr))
+               insn++;
 #endif
-       return 0;
+       return insn;
 }
 
 /*
@@ -272,6 +278,9 @@ static int __kprobes can_probe(unsigned long paddr)
  */
 static int __kprobes is_IF_modifier(kprobe_opcode_t *insn)
 {
+       /* Skip prefixes */
+       insn = skip_prefixes(insn);
+
        switch (*insn) {
        case 0xfa:              /* cli */
        case 0xfb:              /* sti */
@@ -280,13 +289,6 @@ static int __kprobes is_IF_modifier(kprobe_opcode_t *insn)
                return 1;
        }
 
-       /*
-        * on X86_64, 0x40-0x4f are REX prefixes so we need to look
-        * at the next byte instead.. but of course not recurse infinitely
-        */
-       if (is_REX_prefix(insn))
-               return is_IF_modifier(++insn);
-
        return 0;
 }
 
@@ -803,9 +805,8 @@ static void __kprobes resume_execution(struct kprobe *p,
        unsigned long orig_ip = (unsigned long)p->addr;
        kprobe_opcode_t *insn = p->ainsn.insn;
 
-       /*skip the REX prefix*/
-       if (is_REX_prefix(insn))
-               insn++;
+       /* Skip prefixes */
+       insn = skip_prefixes(insn);
 
        regs->flags &= ~X86_EFLAGS_TF;
        switch (*insn) {
index 8d128783af47374e56412d01d0488aa12af1647b..96586c3cbbbf88dd6479ed250b24ea1112154a22 100644 (file)
@@ -57,6 +57,8 @@
 #include <asm/syscalls.h>
 #include <asm/debugreg.h>
 
+#include <trace/events/power.h>
+
 asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
 
 /*
@@ -111,6 +113,8 @@ void cpu_idle(void)
                        stop_critical_timings();
                        pm_idle();
                        start_critical_timings();
+
+                       trace_power_end(smp_processor_id());
                }
                tick_nohz_restart_sched_tick();
                preempt_enable_no_resched();
index 3c2422a99f1f8293480ad436551cfac8c600d326..3d9ea531ddd1bfa8cc9e2e28fbfa8a22f461bc20 100644 (file)
@@ -51,6 +51,8 @@
 #include <asm/syscalls.h>
 #include <asm/debugreg.h>
 
+#include <trace/events/power.h>
+
 asmlinkage extern void ret_from_fork(void);
 
 DEFINE_PER_CPU(unsigned long, old_rsp);
@@ -138,6 +140,9 @@ void cpu_idle(void)
                        stop_critical_timings();
                        pm_idle();
                        start_critical_timings();
+
+                       trace_power_end(smp_processor_id());
+
                        /* In many cases the interrupt that ended idle
                           has already called exit_idle. But some idle
                           loops can be woken up without interrupt. */
index 922eefbb3f6c72b7b511791b083b916040b9f95e..b53c525368a75cf07489b0327de138bfab5b16d5 100644 (file)
@@ -23,11 +23,16 @@ static int save_stack_stack(void *data, char *name)
        return 0;
 }
 
-static void save_stack_address(void *data, unsigned long addr, int reliable)
+static void
+__save_stack_address(void *data, unsigned long addr, bool reliable, bool nosched)
 {
        struct stack_trace *trace = data;
+#ifdef CONFIG_FRAME_POINTER
        if (!reliable)
                return;
+#endif
+       if (nosched && in_sched_functions(addr))
+               return;
        if (trace->skip > 0) {
                trace->skip--;
                return;
@@ -36,20 +41,15 @@ static void save_stack_address(void *data, unsigned long addr, int reliable)
                trace->entries[trace->nr_entries++] = addr;
 }
 
+static void save_stack_address(void *data, unsigned long addr, int reliable)
+{
+       return __save_stack_address(data, addr, reliable, false);
+}
+
 static void
 save_stack_address_nosched(void *data, unsigned long addr, int reliable)
 {
-       struct stack_trace *trace = (struct stack_trace *)data;
-       if (!reliable)
-               return;
-       if (in_sched_functions(addr))
-               return;
-       if (trace->skip > 0) {
-               trace->skip--;
-               return;
-       }
-       if (trace->nr_entries < trace->max_entries)
-               trace->entries[trace->nr_entries++] = addr;
+       return __save_stack_address(data, addr, reliable, true);
 }
 
 static const struct stacktrace_ops save_stack_ops = {
@@ -96,12 +96,13 @@ EXPORT_SYMBOL_GPL(save_stack_trace_tsk);
 
 /* Userspace stacktrace - based on kernel/trace/trace_sysprof.c */
 
-struct stack_frame {
+struct stack_frame_user {
        const void __user       *next_fp;
        unsigned long           ret_addr;
 };
 
-static int copy_stack_frame(const void __user *fp, struct stack_frame *frame)
+static int
+copy_stack_frame(const void __user *fp, struct stack_frame_user *frame)
 {
        int ret;
 
@@ -126,7 +127,7 @@ static inline void __save_stack_trace_user(struct stack_trace *trace)
                trace->entries[trace->nr_entries++] = regs->ip;
 
        while (trace->nr_entries < trace->max_entries) {
-               struct stack_frame frame;
+               struct stack_frame_user frame;
 
                frame.next_fp = NULL;
                frame.ret_addr = 0;
index 725ef4d17cd5922289b24e515c8b030e86cf6c18..60788dee0f8a74f53d547c462d5694201b6d609d 100644 (file)
@@ -392,7 +392,13 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
                if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 2, SIGINT)
                                                                == NOTIFY_STOP)
                        return;
+
 #ifdef CONFIG_X86_LOCAL_APIC
+               if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT)
+                                                       == NOTIFY_STOP)
+                       return;
+
+#ifndef CONFIG_LOCKUP_DETECTOR
                /*
                 * Ok, so this is none of the documented NMI sources,
                 * so it must be the NMI watchdog.
@@ -400,6 +406,7 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
                if (nmi_watchdog_tick(regs, reason))
                        return;
                if (!do_nmi_callback(regs, cpu))
+#endif /* !CONFIG_LOCKUP_DETECTOR */
                        unknown_nmi_error(reason, regs);
 #else
                unknown_nmi_error(reason, regs);
index 308e32570d846f3eeb339bc662de5b2c50c3cb0e..38e6d174c497ec7127a70cb778f7034e83605737 100644 (file)
@@ -40,16 +40,16 @@ static unsigned char prefix_codes[] = {
 static unsigned int reg_rop[] = {
        0x8A, 0x8B, 0xB60F, 0xB70F, 0xBE0F, 0xBF0F
 };
-static unsigned int reg_wop[] = { 0x88, 0x89 };
+static unsigned int reg_wop[] = { 0x88, 0x89, 0xAA, 0xAB };
 static unsigned int imm_wop[] = { 0xC6, 0xC7 };
 /* IA32 Manual 3, 3-432*/
-static unsigned int rw8[] = { 0x88, 0x8A, 0xC6 };
+static unsigned int rw8[] = { 0x88, 0x8A, 0xC6, 0xAA };
 static unsigned int rw32[] = {
-       0x89, 0x8B, 0xC7, 0xB60F, 0xB70F, 0xBE0F, 0xBF0F
+       0x89, 0x8B, 0xC7, 0xB60F, 0xB70F, 0xBE0F, 0xBF0F, 0xAB
 };
-static unsigned int mw8[] = { 0x88, 0x8A, 0xC6, 0xB60F, 0xBE0F };
+static unsigned int mw8[] = { 0x88, 0x8A, 0xC6, 0xB60F, 0xBE0F, 0xAA };
 static unsigned int mw16[] = { 0xB70F, 0xBF0F };
-static unsigned int mw32[] = { 0x89, 0x8B, 0xC7 };
+static unsigned int mw32[] = { 0x89, 0x8B, 0xC7, 0xAB };
 static unsigned int mw64[] = {};
 #else /* not __i386__ */
 static unsigned char prefix_codes[] = {
@@ -63,20 +63,20 @@ static unsigned char prefix_codes[] = {
 static unsigned int reg_rop[] = {
        0x8A, 0x8B, 0xB60F, 0xB70F, 0xBE0F, 0xBF0F
 };
-static unsigned int reg_wop[] = { 0x88, 0x89 };
+static unsigned int reg_wop[] = { 0x88, 0x89, 0xAA, 0xAB };
 static unsigned int imm_wop[] = { 0xC6, 0xC7 };
-static unsigned int rw8[] = { 0xC6, 0x88, 0x8A };
+static unsigned int rw8[] = { 0xC6, 0x88, 0x8A, 0xAA };
 static unsigned int rw32[] = {
-       0xC7, 0x89, 0x8B, 0xB60F, 0xB70F, 0xBE0F, 0xBF0F
+       0xC7, 0x89, 0x8B, 0xB60F, 0xB70F, 0xBE0F, 0xBF0F, 0xAB
 };
 /* 8 bit only */
-static unsigned int mw8[] = { 0xC6, 0x88, 0x8A, 0xB60F, 0xBE0F };
+static unsigned int mw8[] = { 0xC6, 0x88, 0x8A, 0xB60F, 0xBE0F, 0xAA };
 /* 16 bit only */
 static unsigned int mw16[] = { 0xB70F, 0xBF0F };
 /* 16 or 32 bit */
 static unsigned int mw32[] = { 0xC7 };
 /* 16, 32 or 64 bit */
-static unsigned int mw64[] = { 0x89, 0x8B };
+static unsigned int mw64[] = { 0x89, 0x8B, 0xAB };
 #endif /* not __i386__ */
 
 struct prefix_bits {
@@ -410,7 +410,6 @@ static unsigned long *get_reg_w32(int no, struct pt_regs *regs)
 unsigned long get_ins_reg_val(unsigned long ins_addr, struct pt_regs *regs)
 {
        unsigned int opcode;
-       unsigned char mod_rm;
        int reg;
        unsigned char *p;
        struct prefix_bits prf;
@@ -437,8 +436,13 @@ unsigned long get_ins_reg_val(unsigned long ins_addr, struct pt_regs *regs)
        goto err;
 
 do_work:
-       mod_rm = *p;
-       reg = ((mod_rm >> 3) & 0x7) | (prf.rexr << 3);
+       /* for STOS, source register is fixed */
+       if (opcode == 0xAA || opcode == 0xAB) {
+               reg = arg_AX;
+       } else {
+               unsigned char mod_rm = *p;
+               reg = ((mod_rm >> 3) & 0x7) | (prf.rexr << 3);
+       }
        switch (get_ins_reg_width(ins_addr)) {
        case 1:
                return *get_reg_w8(reg, prf.rex, regs);
index b28d2f1253bbc927731afa08022fdef21a598a24..1ba67dc8006ab700170afb6964954c33c5705591 100644 (file)
@@ -634,6 +634,18 @@ static int __init ppro_init(char **cpu_type)
        if (force_arch_perfmon && cpu_has_arch_perfmon)
                return 0;
 
+       /*
+        * Documentation on identifying Intel processors by CPU family
+        * and model can be found in the Intel Software Developer's
+        * Manuals (SDM):
+        *
+        *  http://www.intel.com/products/processor/manuals/
+        *
+        * As of May 2010 the documentation for this was in the:
+        * "Intel 64 and IA-32 Architectures Software Developer's
+        * Manual Volume 3B: System Programming Guide", "Table B-1
+        * CPUID Signature Values of DisplayFamily_DisplayModel".
+        */
        switch (cpu_model) {
        case 0 ... 2:
                *cpu_type = "i386/ppro";
@@ -655,12 +667,12 @@ static int __init ppro_init(char **cpu_type)
        case 15: case 23:
                *cpu_type = "i386/core_2";
                break;
+       case 0x1a:
        case 0x2e:
-       case 26:
                spec = &op_arch_perfmon_spec;
                *cpu_type = "i386/core_i7";
                break;
-       case 28:
+       case 0x1c:
                *cpu_type = "i386/atom";
                break;
        default:
diff --git a/arch/xtensa/include/asm/local64.h b/arch/xtensa/include/asm/local64.h
new file mode 100644 (file)
index 0000000..36c93b5
--- /dev/null
@@ -0,0 +1 @@
+#include <asm-generic/local64.h>
index 5df60a6b67766adb402a30fee44d7b7782bc5e08..dd87e86048be7913e9cc0090c52a957712d0067c 100644 (file)
@@ -135,7 +135,7 @@ static int event_buffer_open(struct inode *inode, struct file *file)
         * echo 1 >/dev/oprofile/enable
         */
 
-       return 0;
+       return nonseekable_open(inode, file);
 
 fail:
        dcookie_unregister(file->private_data);
@@ -205,4 +205,5 @@ const struct file_operations event_buffer_fops = {
        .open           = event_buffer_open,
        .release        = event_buffer_release,
        .read           = event_buffer_read,
+       .llseek         = no_llseek,
 };
index e19de6a80339b3ceeae267283962f271ac6c5e55..97d91a03fb1339c03f2b78c42a0010d0266b1fbd 100644 (file)
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -653,6 +653,7 @@ int setup_arg_pages(struct linux_binprm *bprm,
        else
                stack_base = vma->vm_start - stack_expand;
 #endif
+       current->mm->start_stack = bprm->p;
        ret = expand_stack(vma, stack_base);
        if (ret)
                ret = -EFAULT;
diff --git a/include/asm-generic/local64.h b/include/asm-generic/local64.h
new file mode 100644 (file)
index 0000000..02ac760
--- /dev/null
@@ -0,0 +1,96 @@
+#ifndef _ASM_GENERIC_LOCAL64_H
+#define _ASM_GENERIC_LOCAL64_H
+
+#include <linux/percpu.h>
+#include <asm/types.h>
+
+/*
+ * A signed long type for operations which are atomic for a single CPU.
+ * Usually used in combination with per-cpu variables.
+ *
+ * This is the default implementation, which uses atomic64_t.  Which is
+ * rather pointless.  The whole point behind local64_t is that some processors
+ * can perform atomic adds and subtracts in a manner which is atomic wrt IRQs
+ * running on this CPU.  local64_t allows exploitation of such capabilities.
+ */
+
+/* Implement in terms of atomics. */
+
+#if BITS_PER_LONG == 64
+
+#include <asm/local.h>
+
+typedef struct {
+       local_t a;
+} local64_t;
+
+#define LOCAL64_INIT(i)        { LOCAL_INIT(i) }
+
+#define local64_read(l)                local_read(&(l)->a)
+#define local64_set(l,i)       local_set((&(l)->a),(i))
+#define local64_inc(l)         local_inc(&(l)->a)
+#define local64_dec(l)         local_dec(&(l)->a)
+#define local64_add(i,l)       local_add((i),(&(l)->a))
+#define local64_sub(i,l)       local_sub((i),(&(l)->a))
+
+#define local64_sub_and_test(i, l) local_sub_and_test((i), (&(l)->a))
+#define local64_dec_and_test(l) local_dec_and_test(&(l)->a)
+#define local64_inc_and_test(l) local_inc_and_test(&(l)->a)
+#define local64_add_negative(i, l) local_add_negative((i), (&(l)->a))
+#define local64_add_return(i, l) local_add_return((i), (&(l)->a))
+#define local64_sub_return(i, l) local_sub_return((i), (&(l)->a))
+#define local64_inc_return(l)  local_inc_return(&(l)->a)
+
+#define local64_cmpxchg(l, o, n) local_cmpxchg((&(l)->a), (o), (n))
+#define local64_xchg(l, n)     local_xchg((&(l)->a), (n))
+#define local64_add_unless(l, _a, u) local_add_unless((&(l)->a), (_a), (u))
+#define local64_inc_not_zero(l)        local_inc_not_zero(&(l)->a)
+
+/* Non-atomic variants, ie. preemption disabled and won't be touched
+ * in interrupt, etc.  Some archs can optimize this case well. */
+#define __local64_inc(l)       local64_set((l), local64_read(l) + 1)
+#define __local64_dec(l)       local64_set((l), local64_read(l) - 1)
+#define __local64_add(i,l)     local64_set((l), local64_read(l) + (i))
+#define __local64_sub(i,l)     local64_set((l), local64_read(l) - (i))
+
+#else /* BITS_PER_LONG != 64 */
+
+#include <asm/atomic.h>
+
+/* Don't use typedef: don't want them to be mixed with atomic_t's. */
+typedef struct {
+       atomic64_t a;
+} local64_t;
+
+#define LOCAL64_INIT(i)        { ATOMIC_LONG_INIT(i) }
+
+#define local64_read(l)                atomic64_read(&(l)->a)
+#define local64_set(l,i)       atomic64_set((&(l)->a),(i))
+#define local64_inc(l)         atomic64_inc(&(l)->a)
+#define local64_dec(l)         atomic64_dec(&(l)->a)
+#define local64_add(i,l)       atomic64_add((i),(&(l)->a))
+#define local64_sub(i,l)       atomic64_sub((i),(&(l)->a))
+
+#define local64_sub_and_test(i, l) atomic64_sub_and_test((i), (&(l)->a))
+#define local64_dec_and_test(l) atomic64_dec_and_test(&(l)->a)
+#define local64_inc_and_test(l) atomic64_inc_and_test(&(l)->a)
+#define local64_add_negative(i, l) atomic64_add_negative((i), (&(l)->a))
+#define local64_add_return(i, l) atomic64_add_return((i), (&(l)->a))
+#define local64_sub_return(i, l) atomic64_sub_return((i), (&(l)->a))
+#define local64_inc_return(l)  atomic64_inc_return(&(l)->a)
+
+#define local64_cmpxchg(l, o, n) atomic64_cmpxchg((&(l)->a), (o), (n))
+#define local64_xchg(l, n)     atomic64_xchg((&(l)->a), (n))
+#define local64_add_unless(l, _a, u) atomic64_add_unless((&(l)->a), (_a), (u))
+#define local64_inc_not_zero(l)        atomic64_inc_not_zero(&(l)->a)
+
+/* Non-atomic variants, ie. preemption disabled and won't be touched
+ * in interrupt, etc.  Some archs can optimize this case well. */
+#define __local64_inc(l)       local64_set((l), local64_read(l) + 1)
+#define __local64_dec(l)       local64_set((l), local64_read(l) - 1)
+#define __local64_add(i,l)     local64_set((l), local64_read(l) + (i))
+#define __local64_sub(i,l)     local64_set((l), local64_read(l) - (i))
+
+#endif /* BITS_PER_LONG != 64 */
+
+#endif /* _ASM_GENERIC_LOCAL64_H */
index 4e7ae6002056b0913c4d98e060bb0f7d129dfbf4..8a92a170fb7dfd87710bb26b3c7f384dd6801363 100644 (file)
        CPU_KEEP(exit.data)                                             \
        MEM_KEEP(init.data)                                             \
        MEM_KEEP(exit.data)                                             \
-       . = ALIGN(8);                                                   \
-       VMLINUX_SYMBOL(__start___markers) = .;                          \
-       *(__markers)                                                    \
-       VMLINUX_SYMBOL(__stop___markers) = .;                           \
        . = ALIGN(32);                                                  \
        VMLINUX_SYMBOL(__start___tracepoints) = .;                      \
        *(__tracepoints)                                                \
index 41e46330d9bedfd16d46a920cbd840dad54afffe..dcd6a7c3a4358b310b430a16dc52f43b547e02f2 100644 (file)
@@ -1,3 +1,8 @@
+/*
+ * Ftrace header.  For implementation details beyond the random comments
+ * scattered below, see: Documentation/trace/ftrace-design.txt
+ */
+
 #ifndef _LINUX_FTRACE_H
 #define _LINUX_FTRACE_H
 
index 3167f2df4126c12e195be536c687c577212da651..02b8b24f8f51f0e37156731ba94da19ba2d19131 100644 (file)
@@ -11,8 +11,6 @@ struct trace_array;
 struct tracer;
 struct dentry;
 
-DECLARE_PER_CPU(struct trace_seq, ftrace_event_seq);
-
 struct trace_print_flags {
        unsigned long           mask;
        const char              *name;
@@ -58,6 +56,9 @@ struct trace_iterator {
        struct ring_buffer_iter *buffer_iter[NR_CPUS];
        unsigned long           iter_flags;
 
+       /* trace_seq for __print_flags() and __print_symbolic() etc. */
+       struct trace_seq        tmp_seq;
+
        /* The below is zeroed out in pipe_read */
        struct trace_seq        seq;
        struct trace_entry      *ent;
@@ -146,14 +147,19 @@ struct ftrace_event_class {
        int                     (*raw_init)(struct ftrace_event_call *);
 };
 
+extern int ftrace_event_reg(struct ftrace_event_call *event,
+                           enum trace_reg type);
+
 enum {
        TRACE_EVENT_FL_ENABLED_BIT,
        TRACE_EVENT_FL_FILTERED_BIT,
+       TRACE_EVENT_FL_RECORDED_CMD_BIT,
 };
 
 enum {
-       TRACE_EVENT_FL_ENABLED  = (1 << TRACE_EVENT_FL_ENABLED_BIT),
-       TRACE_EVENT_FL_FILTERED = (1 << TRACE_EVENT_FL_FILTERED_BIT),
+       TRACE_EVENT_FL_ENABLED          = (1 << TRACE_EVENT_FL_ENABLED_BIT),
+       TRACE_EVENT_FL_FILTERED         = (1 << TRACE_EVENT_FL_FILTERED_BIT),
+       TRACE_EVENT_FL_RECORDED_CMD     = (1 << TRACE_EVENT_FL_RECORDED_CMD_BIT),
 };
 
 struct ftrace_event_call {
@@ -171,6 +177,7 @@ struct ftrace_event_call {
         * 32 bit flags:
         *   bit 1:             enabled
         *   bit 2:             filter_active
+        *   bit 3:             enabled cmd record
         *
         * Changes to flags must hold the event_mutex.
         *
@@ -257,8 +264,7 @@ static inline void
 perf_trace_buf_submit(void *raw_data, int size, int rctx, u64 addr,
                       u64 count, struct pt_regs *regs, void *head)
 {
-       perf_tp_event(addr, count, raw_data, size, regs, head);
-       perf_swevent_put_recursion_context(rctx);
+       perf_tp_event(addr, count, raw_data, size, regs, head, rctx);
 }
 #endif
 
index 5de838b0fc1a62afb20fb75eda44089937f87eea..38e462e00594f43dc5094b63bb52706cb7abb95f 100644 (file)
@@ -513,9 +513,6 @@ extern void tracing_start(void);
 extern void tracing_stop(void);
 extern void ftrace_off_permanent(void);
 
-extern void
-ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3);
-
 static inline void __attribute__ ((format (printf, 1, 2)))
 ____trace_printk_check_format(const char *fmt, ...)
 {
@@ -591,8 +588,6 @@ __ftrace_vprintk(unsigned long ip, const char *fmt, va_list ap);
 
 extern void ftrace_dump(enum ftrace_dump_mode oops_dump_mode);
 #else
-static inline void
-ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3) { }
 static inline int
 trace_printk(const char *fmt, ...) __attribute__ ((format (printf, 1, 2)));
 
diff --git a/include/linux/kmemtrace.h b/include/linux/kmemtrace.h
deleted file mode 100644 (file)
index b616d39..0000000
+++ /dev/null
@@ -1,25 +0,0 @@
-/*
- * Copyright (C) 2008 Eduard - Gabriel Munteanu
- *
- * This file is released under GPL version 2.
- */
-
-#ifndef _LINUX_KMEMTRACE_H
-#define _LINUX_KMEMTRACE_H
-
-#ifdef __KERNEL__
-
-#include <trace/events/kmem.h>
-
-#ifdef CONFIG_KMEMTRACE
-extern void kmemtrace_init(void);
-#else
-static inline void kmemtrace_init(void)
-{
-}
-#endif
-
-#endif /* __KERNEL__ */
-
-#endif /* _LINUX_KMEMTRACE_H */
-
index b752e807addece22ec9210f0d109d2818505d34d..06aab5eee134cd56c4bade9005912fa3a785a327 100644 (file)
@@ -20,10 +20,14 @@ extern void touch_nmi_watchdog(void);
 extern void acpi_nmi_disable(void);
 extern void acpi_nmi_enable(void);
 #else
+#ifndef CONFIG_HARDLOCKUP_DETECTOR
 static inline void touch_nmi_watchdog(void)
 {
        touch_softlockup_watchdog();
 }
+#else
+extern void touch_nmi_watchdog(void);
+#endif
 static inline void acpi_nmi_disable(void) { }
 static inline void acpi_nmi_enable(void) { }
 #endif
@@ -47,4 +51,13 @@ static inline bool trigger_all_cpu_backtrace(void)
 }
 #endif
 
+#ifdef CONFIG_LOCKUP_DETECTOR
+int hw_nmi_is_cpu_stuck(struct pt_regs *);
+u64 hw_nmi_get_sample_period(void);
+extern int watchdog_enabled;
+struct ctl_table;
+extern int proc_dowatchdog_enabled(struct ctl_table *, int ,
+                       void __user *, size_t *, loff_t *);
+#endif
+
 #endif
index 5d0266d94985c65acbd8b13a41961964cdde4a72..937495c250733418f10797d6d7098c5ae1641fa4 100644 (file)
@@ -214,8 +214,9 @@ struct perf_event_attr {
                                 *  See also PERF_RECORD_MISC_EXACT_IP
                                 */
                                precise_ip     :  2, /* skid constraint       */
+                               mmap_data      :  1, /* non-exec mmap data    */
 
-                               __reserved_1   : 47;
+                               __reserved_1   : 46;
 
        union {
                __u32           wakeup_events;    /* wakeup every n events */
@@ -461,6 +462,7 @@ enum perf_callchain_context {
 
 #ifdef CONFIG_PERF_EVENTS
 # include <asm/perf_event.h>
+# include <asm/local64.h>
 #endif
 
 struct perf_guest_info_callbacks {
@@ -531,14 +533,16 @@ struct hw_perf_event {
                        struct hrtimer  hrtimer;
                };
 #ifdef CONFIG_HAVE_HW_BREAKPOINT
-               /* breakpoint */
-               struct arch_hw_breakpoint       info;
+               struct { /* breakpoint */
+                       struct arch_hw_breakpoint       info;
+                       struct list_head                bp_list;
+               };
 #endif
        };
-       atomic64_t                      prev_count;
+       local64_t                       prev_count;
        u64                             sample_period;
        u64                             last_period;
-       atomic64_t                      period_left;
+       local64_t                       period_left;
        u64                             interrupts;
 
        u64                             freq_time_stamp;
@@ -548,7 +552,10 @@ struct hw_perf_event {
 
 struct perf_event;
 
-#define PERF_EVENT_TXN_STARTED 1
+/*
+ * Common implementation detail of pmu::{start,commit,cancel}_txn
+ */
+#define PERF_EVENT_TXN 0x1
 
 /**
  * struct pmu - generic performance monitoring unit
@@ -562,14 +569,28 @@ struct pmu {
        void (*unthrottle)              (struct perf_event *event);
 
        /*
-        * group events scheduling is treated as a transaction,
-        * add group events as a whole and perform one schedulability test.
-        * If test fails, roll back the whole group
+        * Group events scheduling is treated as a transaction, add group
+        * events as a whole and perform one schedulability test. If the test
+        * fails, roll back the whole group
         */
 
+       /*
+        * Start the transaction, after this ->enable() doesn't need
+        * to do schedulability tests.
+        */
        void (*start_txn)       (const struct pmu *pmu);
-       void (*cancel_txn)      (const struct pmu *pmu);
+       /*
+        * If ->start_txn() disabled the ->enable() schedulability test
+        * then ->commit_txn() is required to perform one. On success
+        * the transaction is closed. On error the transaction is kept
+        * open until ->cancel_txn() is called.
+        */
        int  (*commit_txn)      (const struct pmu *pmu);
+       /*
+        * Will cancel the transaction, assumes ->disable() is called for
+        * each successfull ->enable() during the transaction.
+        */
+       void (*cancel_txn)      (const struct pmu *pmu);
 };
 
 /**
@@ -584,7 +605,9 @@ enum perf_event_active_state {
 
 struct file;
 
-struct perf_mmap_data {
+#define PERF_BUFFER_WRITABLE           0x01
+
+struct perf_buffer {
        atomic_t                        refcount;
        struct rcu_head                 rcu_head;
 #ifdef CONFIG_PERF_USE_VMALLOC
@@ -650,7 +673,8 @@ struct perf_event {
 
        enum perf_event_active_state    state;
        unsigned int                    attach_state;
-       atomic64_t                      count;
+       local64_t                       count;
+       atomic64_t                      child_count;
 
        /*
         * These are the total time in nanoseconds that the event
@@ -709,7 +733,7 @@ struct perf_event {
        atomic_t                        mmap_count;
        int                             mmap_locked;
        struct user_struct              *mmap_user;
-       struct perf_mmap_data           *data;
+       struct perf_buffer              *buffer;
 
        /* poll related */
        wait_queue_head_t               waitq;
@@ -807,7 +831,7 @@ struct perf_cpu_context {
 
 struct perf_output_handle {
        struct perf_event               *event;
-       struct perf_mmap_data           *data;
+       struct perf_buffer              *buffer;
        unsigned long                   wakeup;
        unsigned long                   size;
        void                            *addr;
@@ -910,8 +934,10 @@ extern atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX];
 
 extern void __perf_sw_event(u32, u64, int, struct pt_regs *, u64);
 
-extern void
-perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int skip);
+#ifndef perf_arch_fetch_caller_regs
+static inline void
+perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip) { }
+#endif
 
 /*
  * Take a snapshot of the regs. Skip ip and frame pointer to
@@ -921,31 +947,11 @@ perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int skip);
  * - bp for callchains
  * - eflags, for future purposes, just in case
  */
-static inline void perf_fetch_caller_regs(struct pt_regs *regs, int skip)
+static inline void perf_fetch_caller_regs(struct pt_regs *regs)
 {
-       unsigned long ip;
-
        memset(regs, 0, sizeof(*regs));
 
-       switch (skip) {
-       case 1 :
-               ip = CALLER_ADDR0;
-               break;
-       case 2 :
-               ip = CALLER_ADDR1;
-               break;
-       case 3 :
-               ip = CALLER_ADDR2;
-               break;
-       case 4:
-               ip = CALLER_ADDR3;
-               break;
-       /* No need to support further for now */
-       default:
-               ip = 0;
-       }
-
-       return perf_arch_fetch_caller_regs(regs, ip, skip);
+       perf_arch_fetch_caller_regs(regs, CALLER_ADDR0);
 }
 
 static inline void
@@ -955,21 +961,14 @@ perf_sw_event(u32 event_id, u64 nr, int nmi, struct pt_regs *regs, u64 addr)
                struct pt_regs hot_regs;
 
                if (!regs) {
-                       perf_fetch_caller_regs(&hot_regs, 1);
+                       perf_fetch_caller_regs(&hot_regs);
                        regs = &hot_regs;
                }
                __perf_sw_event(event_id, nr, nmi, regs, addr);
        }
 }
 
-extern void __perf_event_mmap(struct vm_area_struct *vma);
-
-static inline void perf_event_mmap(struct vm_area_struct *vma)
-{
-       if (vma->vm_flags & VM_EXEC)
-               __perf_event_mmap(vma);
-}
-
+extern void perf_event_mmap(struct vm_area_struct *vma);
 extern struct perf_guest_info_callbacks *perf_guest_cbs;
 extern int perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks);
 extern int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks);
@@ -1001,7 +1000,7 @@ static inline bool perf_paranoid_kernel(void)
 extern void perf_event_init(void);
 extern void perf_tp_event(u64 addr, u64 count, void *record,
                          int entry_size, struct pt_regs *regs,
-                         struct hlist_head *head);
+                         struct hlist_head *head, int rctx);
 extern void perf_bp_event(struct perf_event *event, void *data);
 
 #ifndef perf_misc_flags
index 0478888c6899d3c59a1649d38dfdd58030f14918..3992f50de6145927a39f72c46dcf8279a16f57ab 100644 (file)
@@ -316,20 +316,16 @@ extern void scheduler_tick(void);
 
 extern void sched_show_task(struct task_struct *p);
 
-#ifdef CONFIG_DETECT_SOFTLOCKUP
-extern void softlockup_tick(void);
+#ifdef CONFIG_LOCKUP_DETECTOR
 extern void touch_softlockup_watchdog(void);
 extern void touch_softlockup_watchdog_sync(void);
 extern void touch_all_softlockup_watchdogs(void);
-extern int proc_dosoftlockup_thresh(struct ctl_table *table, int write,
-                                   void __user *buffer,
-                                   size_t *lenp, loff_t *ppos);
+extern int proc_dowatchdog_thresh(struct ctl_table *table, int write,
+                                 void __user *buffer,
+                                 size_t *lenp, loff_t *ppos);
 extern unsigned int  softlockup_panic;
 extern int softlockup_thresh;
 #else
-static inline void softlockup_tick(void)
-{
-}
 static inline void touch_softlockup_watchdog(void)
 {
 }
@@ -2435,18 +2431,6 @@ static inline void set_task_cpu(struct task_struct *p, unsigned int cpu)
 
 #endif /* CONFIG_SMP */
 
-#ifdef CONFIG_TRACING
-extern void
-__trace_special(void *__tr, void *__data,
-               unsigned long arg1, unsigned long arg2, unsigned long arg3);
-#else
-static inline void
-__trace_special(void *__tr, void *__data,
-               unsigned long arg1, unsigned long arg2, unsigned long arg3)
-{
-}
-#endif
-
 extern long sched_setaffinity(pid_t pid, const struct cpumask *new_mask);
 extern long sched_getaffinity(pid_t pid, struct cpumask *mask);
 
index 1812dac8c496b8694d18b45c372b86c411cf0400..1acfa73ce2ac4597559c729301c2c6479ea49c7e 100644 (file)
@@ -14,7 +14,8 @@
 #include <asm/page.h>          /* kmalloc_sizes.h needs PAGE_SIZE */
 #include <asm/cache.h>         /* kmalloc_sizes.h needs L1_CACHE_BYTES */
 #include <linux/compiler.h>
-#include <linux/kmemtrace.h>
+
+#include <trace/events/kmem.h>
 
 #ifndef ARCH_KMALLOC_MINALIGN
 /*
index 4ba59cfc1f7562c0bb8cb900c4f8d444554d25d5..6447a723ecb170b9c3fd4476e8a1c1016b2b69de 100644 (file)
 #include <linux/gfp.h>
 #include <linux/workqueue.h>
 #include <linux/kobject.h>
-#include <linux/kmemtrace.h>
 #include <linux/kmemleak.h>
 
+#include <trace/events/kmem.h>
+
 enum stat_item {
        ALLOC_FASTPATH,         /* Allocation from cpu slab */
        ALLOC_SLOWPATH,         /* Allocation by getting a new cpu slab */
index 13ebb5413a7982c5fdd1153432794f39a708f362..a6bfd1367d2a8493cbe7534cddbc1e2841e12fa6 100644 (file)
@@ -167,7 +167,6 @@ extern struct trace_event_functions exit_syscall_print_funcs;
                .enter_event    = &event_enter_##sname,         \
                .exit_event     = &event_exit_##sname,          \
                .enter_fields   = LIST_HEAD_INIT(__syscall_meta_##sname.enter_fields), \
-               .exit_fields    = LIST_HEAD_INIT(__syscall_meta_##sname.exit_fields), \
        };
 
 #define SYSCALL_DEFINE0(sname)                                 \
@@ -182,7 +181,6 @@ extern struct trace_event_functions exit_syscall_print_funcs;
                .enter_event    = &event_enter__##sname,        \
                .exit_event     = &event_exit__##sname,         \
                .enter_fields   = LIST_HEAD_INIT(__syscall_meta__##sname.enter_fields), \
-               .exit_fields    = LIST_HEAD_INIT(__syscall_meta__##sname.exit_fields), \
        };                                                      \
        asmlinkage long sys_##sname(void)
 #else
diff --git a/include/trace/boot.h b/include/trace/boot.h
deleted file mode 100644 (file)
index 088ea08..0000000
+++ /dev/null
@@ -1,60 +0,0 @@
-#ifndef _LINUX_TRACE_BOOT_H
-#define _LINUX_TRACE_BOOT_H
-
-#include <linux/module.h>
-#include <linux/kallsyms.h>
-#include <linux/init.h>
-
-/*
- * Structure which defines the trace of an initcall
- * while it is called.
- * You don't have to fill the func field since it is
- * only used internally by the tracer.
- */
-struct boot_trace_call {
-       pid_t                   caller;
-       char                    func[KSYM_SYMBOL_LEN];
-};
-
-/*
- * Structure which defines the trace of an initcall
- * while it returns.
- */
-struct boot_trace_ret {
-       char                    func[KSYM_SYMBOL_LEN];
-       int                             result;
-       unsigned long long      duration;               /* nsecs */
-};
-
-#ifdef CONFIG_BOOT_TRACER
-/* Append the traces on the ring-buffer */
-extern void trace_boot_call(struct boot_trace_call *bt, initcall_t fn);
-extern void trace_boot_ret(struct boot_trace_ret *bt, initcall_t fn);
-
-/* Tells the tracer that smp_pre_initcall is finished.
- * So we can start the tracing
- */
-extern void start_boot_trace(void);
-
-/* Resume the tracing of other necessary events
- * such as sched switches
- */
-extern void enable_boot_trace(void);
-
-/* Suspend this tracing. Actually, only sched_switches tracing have
- * to be suspended. Initcalls doesn't need it.)
- */
-extern void disable_boot_trace(void);
-#else
-static inline
-void trace_boot_call(struct boot_trace_call *bt, initcall_t fn) { }
-
-static inline
-void trace_boot_ret(struct boot_trace_ret *bt, initcall_t fn) { }
-
-static inline void start_boot_trace(void) { }
-static inline void enable_boot_trace(void) { }
-static inline void disable_boot_trace(void) { }
-#endif /* CONFIG_BOOT_TRACER */
-
-#endif /* __LINUX_TRACE_BOOT_H */
index b9e1dd6c6208b62b9201b7b9bff947fcfb0f2c51..9208c92aeab5eee575b21f3000ea8034f6c00788 100644 (file)
@@ -49,31 +49,6 @@ TRACE_EVENT(sched_kthread_stop_ret,
        TP_printk("ret=%d", __entry->ret)
 );
 
-/*
- * Tracepoint for waiting on task to unschedule:
- */
-TRACE_EVENT(sched_wait_task,
-
-       TP_PROTO(struct task_struct *p),
-
-       TP_ARGS(p),
-
-       TP_STRUCT__entry(
-               __array(        char,   comm,   TASK_COMM_LEN   )
-               __field(        pid_t,  pid                     )
-               __field(        int,    prio                    )
-       ),
-
-       TP_fast_assign(
-               memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
-               __entry->pid    = p->pid;
-               __entry->prio   = p->prio;
-       ),
-
-       TP_printk("comm=%s pid=%d prio=%d",
-                 __entry->comm, __entry->pid, __entry->prio)
-);
-
 /*
  * Tracepoint for waking up a task:
  */
@@ -239,6 +214,13 @@ DEFINE_EVENT(sched_process_template, sched_process_exit,
             TP_PROTO(struct task_struct *p),
             TP_ARGS(p));
 
+/*
+ * Tracepoint for waiting on task to unschedule:
+ */
+DEFINE_EVENT(sched_process_template, sched_wait_task,
+       TP_PROTO(struct task_struct *p),
+       TP_ARGS(p));
+
 /*
  * Tracepoint for a waiting task:
  */
index 9496b965d62ad9dd39929ccc7e471c7fcf2d374a..c624126a9c8a6fb889fe596ea261f649236d51a7 100644 (file)
@@ -8,11 +8,7 @@
 #include <linux/hrtimer.h>
 #include <linux/timer.h>
 
-/**
- * timer_init - called when the timer is initialized
- * @timer:     pointer to struct timer_list
- */
-TRACE_EVENT(timer_init,
+DECLARE_EVENT_CLASS(timer_class,
 
        TP_PROTO(struct timer_list *timer),
 
@@ -29,6 +25,17 @@ TRACE_EVENT(timer_init,
        TP_printk("timer=%p", __entry->timer)
 );
 
+/**
+ * timer_init - called when the timer is initialized
+ * @timer:     pointer to struct timer_list
+ */
+DEFINE_EVENT(timer_class, timer_init,
+
+       TP_PROTO(struct timer_list *timer),
+
+       TP_ARGS(timer)
+);
+
 /**
  * timer_start - called when the timer is started
  * @timer:     pointer to struct timer_list
@@ -94,42 +101,22 @@ TRACE_EVENT(timer_expire_entry,
  * NOTE: Do NOT derefernce timer in TP_fast_assign. The pointer might
  * be invalid. We solely track the pointer.
  */
-TRACE_EVENT(timer_expire_exit,
+DEFINE_EVENT(timer_class, timer_expire_exit,
 
        TP_PROTO(struct timer_list *timer),
 
-       TP_ARGS(timer),
-
-       TP_STRUCT__entry(
-               __field(void *, timer   )
-       ),
-
-       TP_fast_assign(
-               __entry->timer  = timer;
-       ),
-
-       TP_printk("timer=%p", __entry->timer)
+       TP_ARGS(timer)
 );
 
 /**
  * timer_cancel - called when the timer is canceled
  * @timer:     pointer to struct timer_list
  */
-TRACE_EVENT(timer_cancel,
+DEFINE_EVENT(timer_class, timer_cancel,
 
        TP_PROTO(struct timer_list *timer),
 
-       TP_ARGS(timer),
-
-       TP_STRUCT__entry(
-               __field( void *,        timer   )
-       ),
-
-       TP_fast_assign(
-               __entry->timer  = timer;
-       ),
-
-       TP_printk("timer=%p", __entry->timer)
+       TP_ARGS(timer)
 );
 
 /**
@@ -224,14 +211,7 @@ TRACE_EVENT(hrtimer_expire_entry,
                  (unsigned long long)ktime_to_ns((ktime_t) { .tv64 = __entry->now }))
  );
 
-/**
- * hrtimer_expire_exit - called immediately after the hrtimer callback returns
- * @timer:     pointer to struct hrtimer
- *
- * When used in combination with the hrtimer_expire_entry tracepoint we can
- * determine the runtime of the callback function.
- */
-TRACE_EVENT(hrtimer_expire_exit,
+DECLARE_EVENT_CLASS(hrtimer_class,
 
        TP_PROTO(struct hrtimer *hrtimer),
 
@@ -249,24 +229,28 @@ TRACE_EVENT(hrtimer_expire_exit,
 );
 
 /**
- * hrtimer_cancel - called when the hrtimer is canceled
- * @hrtimer:   pointer to struct hrtimer
+ * hrtimer_expire_exit - called immediately after the hrtimer callback returns
+ * @timer:     pointer to struct hrtimer
+ *
+ * When used in combination with the hrtimer_expire_entry tracepoint we can
+ * determine the runtime of the callback function.
  */
-TRACE_EVENT(hrtimer_cancel,
+DEFINE_EVENT(hrtimer_class, hrtimer_expire_exit,
 
        TP_PROTO(struct hrtimer *hrtimer),
 
-       TP_ARGS(hrtimer),
+       TP_ARGS(hrtimer)
+);
 
-       TP_STRUCT__entry(
-               __field( void *,        hrtimer )
-       ),
+/**
+ * hrtimer_cancel - called when the hrtimer is canceled
+ * @hrtimer:   pointer to struct hrtimer
+ */
+DEFINE_EVENT(hrtimer_class, hrtimer_cancel,
 
-       TP_fast_assign(
-               __entry->hrtimer        = hrtimer;
-       ),
+       TP_PROTO(struct hrtimer *hrtimer),
 
-       TP_printk("hrtimer=%p", __entry->hrtimer)
+       TP_ARGS(hrtimer)
 );
 
 /**
index 5a64905d7278a47fb683a0aceb63cef029dd467b..a9377c0083ad3ed612547f783647132a8268ef09 100644 (file)
 #define DEFINE_EVENT_PRINT(template, name, proto, args, print) \
        DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args))
 
-#undef __cpparg
-#define __cpparg(arg...) arg
-
 /* Callbacks are meaningless to ftrace. */
 #undef TRACE_EVENT_FN
 #define TRACE_EVENT_FN(name, proto, args, tstruct,                     \
                assign, print, reg, unreg)                              \
-       TRACE_EVENT(name, __cpparg(proto), __cpparg(args),              \
-               __cpparg(tstruct), __cpparg(assign), __cpparg(print))   \
+       TRACE_EVENT(name, PARAMS(proto), PARAMS(args),                  \
+               PARAMS(tstruct), PARAMS(assign), PARAMS(print))         \
 
 #include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
 
  *     struct trace_seq *s = &iter->seq;
  *     struct ftrace_raw_<call> *field; <-- defined in stage 1
  *     struct trace_entry *entry;
- *     struct trace_seq *p;
+ *     struct trace_seq *p = &iter->tmp_seq;
  *     int ret;
  *
  *     entry = iter->ent;
  *
  *     field = (typeof(field))entry;
  *
- *     p = &get_cpu_var(ftrace_event_seq);
  *     trace_seq_init(p);
  *     ret = trace_seq_printf(s, "%s: ", <call>);
  *     if (ret)
  *             ret = trace_seq_printf(s, <TP_printk> "\n");
- *     put_cpu();
  *     if (!ret)
  *             return TRACE_TYPE_PARTIAL_LINE;
  *
@@ -216,7 +211,7 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags,    \
        struct trace_seq *s = &iter->seq;                               \
        struct ftrace_raw_##call *field;                                \
        struct trace_entry *entry;                                      \
-       struct trace_seq *p;                                            \
+       struct trace_seq *p = &iter->tmp_seq;                           \
        int ret;                                                        \
                                                                        \
        event = container_of(trace_event, struct ftrace_event_call,     \
@@ -231,12 +226,10 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags,  \
                                                                        \
        field = (typeof(field))entry;                                   \
                                                                        \
-       p = &get_cpu_var(ftrace_event_seq);                             \
        trace_seq_init(p);                                              \
        ret = trace_seq_printf(s, "%s: ", event->name);                 \
        if (ret)                                                        \
                ret = trace_seq_printf(s, print);                       \
-       put_cpu();                                                      \
        if (!ret)                                                       \
                return TRACE_TYPE_PARTIAL_LINE;                         \
                                                                        \
@@ -255,7 +248,7 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags,    \
        struct trace_seq *s = &iter->seq;                               \
        struct ftrace_raw_##template *field;                            \
        struct trace_entry *entry;                                      \
-       struct trace_seq *p;                                            \
+       struct trace_seq *p = &iter->tmp_seq;                           \
        int ret;                                                        \
                                                                        \
        entry = iter->ent;                                              \
@@ -267,12 +260,10 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags,  \
                                                                        \
        field = (typeof(field))entry;                                   \
                                                                        \
-       p = &get_cpu_var(ftrace_event_seq);                             \
        trace_seq_init(p);                                              \
        ret = trace_seq_printf(s, "%s: ", #call);                       \
        if (ret)                                                        \
                ret = trace_seq_printf(s, print);                       \
-       put_cpu();                                                      \
        if (!ret)                                                       \
                return TRACE_TYPE_PARTIAL_LINE;                         \
                                                                        \
@@ -439,6 +430,7 @@ static inline notrace int ftrace_get_offsets_##call(                        \
  *     .fields                 = LIST_HEAD_INIT(event_class_##call.fields),
  *     .raw_init               = trace_event_raw_init,
  *     .probe                  = ftrace_raw_event_##call,
+ *     .reg                    = ftrace_event_reg,
  * };
  *
  * static struct ftrace_event_call __used
@@ -567,6 +559,7 @@ static struct ftrace_event_class __used event_class_##call = {              \
        .fields                 = LIST_HEAD_INIT(event_class_##call.fields),\
        .raw_init               = trace_event_raw_init,                 \
        .probe                  = ftrace_raw_event_##call,              \
+       .reg                    = ftrace_event_reg,                     \
        _TRACE_PERF_INIT(call)                                          \
 };
 
@@ -705,7 +698,7 @@ perf_trace_##call(void *__data, proto)                                      \
        int __data_size;                                                \
        int rctx;                                                       \
                                                                        \
-       perf_fetch_caller_regs(&__regs, 1);                             \
+       perf_fetch_caller_regs(&__regs);                                \
                                                                        \
        __data_size = ftrace_get_offsets_##call(&__data_offsets, args); \
        __entry_size = ALIGN(__data_size + sizeof(*entry) + sizeof(u32),\
index 257e08960d7b7f1c232ca6cef97f1cac3f0737e5..31966a4fb8ccab298a6e64f81400a1aec2c39ea9 100644 (file)
@@ -26,7 +26,6 @@ struct syscall_metadata {
        const char      **types;
        const char      **args;
        struct list_head enter_fields;
-       struct list_head exit_fields;
 
        struct ftrace_event_call *enter_event;
        struct ftrace_event_call *exit_event;
index 4ddb53f04f2acb0aefb84df89efc105e8f1ba90e..b03a4c1f69fa4f163a208783c7d9b4079ca541a0 100644 (file)
 #include <linux/ftrace.h>
 #include <linux/async.h>
 #include <linux/kmemcheck.h>
-#include <linux/kmemtrace.h>
 #include <linux/sfi.h>
 #include <linux/shmem_fs.h>
 #include <linux/slab.h>
-#include <trace/boot.h>
 
 #include <asm/io.h>
 #include <asm/bugs.h>
@@ -664,7 +662,6 @@ asmlinkage void __init start_kernel(void)
 #endif
        page_cgroup_init();
        enable_debug_pagealloc();
-       kmemtrace_init();
        kmemleak_init();
        debug_objects_mem_init();
        idr_init_cache();
@@ -726,38 +723,33 @@ int initcall_debug;
 core_param(initcall_debug, initcall_debug, bool, 0644);
 
 static char msgbuf[64];
-static struct boot_trace_call call;
-static struct boot_trace_ret ret;
 
 int do_one_initcall(initcall_t fn)
 {
        int count = preempt_count();
        ktime_t calltime, delta, rettime;
+       unsigned long long duration;
+       int ret;
 
        if (initcall_debug) {
-               call.caller = task_pid_nr(current);
-               printk("calling  %pF @ %i\n", fn, call.caller);
+               printk("calling  %pF @ %i\n", fn, task_pid_nr(current));
                calltime = ktime_get();
-               trace_boot_call(&call, fn);
-               enable_boot_trace();
        }
 
-       ret.result = fn();
+       ret = fn();
 
        if (initcall_debug) {
-               disable_boot_trace();
                rettime = ktime_get();
                delta = ktime_sub(rettime, calltime);
-               ret.duration = (unsigned long long) ktime_to_ns(delta) >> 10;
-               trace_boot_ret(&ret, fn);
-               printk("initcall %pF returned %d after %Ld usecs\n", fn,
-                       ret.result, ret.duration);
+               duration = (unsigned long long) ktime_to_ns(delta) >> 10;
+               printk("initcall %pF returned %d after %lld usecs\n", fn,
+                       ret, duration);
        }
 
        msgbuf[0] = 0;
 
-       if (ret.result && ret.result != -ENODEV && initcall_debug)
-               sprintf(msgbuf, "error code %d ", ret.result);
+       if (ret && ret != -ENODEV && initcall_debug)
+               sprintf(msgbuf, "error code %d ", ret);
 
        if (preempt_count() != count) {
                strlcat(msgbuf, "preemption imbalance ", sizeof(msgbuf));
@@ -771,7 +763,7 @@ int do_one_initcall(initcall_t fn)
                printk("initcall %pF returned with %s\n", fn, msgbuf);
        }
 
-       return ret.result;
+       return ret;
 }
 
 
@@ -895,7 +887,6 @@ static int __init kernel_init(void * unused)
        smp_prepare_cpus(setup_max_cpus);
 
        do_pre_smp_initcalls();
-       start_boot_trace();
 
        smp_init();
        sched_init_smp();
index 057472fbc272eed8b64f23c16f55b49217aa9043..ce53fb2bd1d90669caab50084cfac710c620c946 100644 (file)
@@ -76,8 +76,8 @@ obj-$(CONFIG_GCOV_KERNEL) += gcov/
 obj-$(CONFIG_AUDIT_TREE) += audit_tree.o
 obj-$(CONFIG_KPROBES) += kprobes.o
 obj-$(CONFIG_KGDB) += debug/
-obj-$(CONFIG_DETECT_SOFTLOCKUP) += softlockup.o
 obj-$(CONFIG_DETECT_HUNG_TASK) += hung_task.o
+obj-$(CONFIG_LOCKUP_DETECTOR) += watchdog.o
 obj-$(CONFIG_GENERIC_HARDIRQS) += irq/
 obj-$(CONFIG_SECCOMP) += seccomp.o
 obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o
index 71ed3ce29e12e7d2dbe25d9e4b92e8d46a71fecd..d71a987fd2bf2ba5e698f9b69fccb59db4a1bb30 100644 (file)
@@ -41,6 +41,7 @@
 #include <linux/sched.h>
 #include <linux/init.h>
 #include <linux/slab.h>
+#include <linux/list.h>
 #include <linux/cpu.h>
 #include <linux/smp.h>
 
@@ -62,6 +63,9 @@ static DEFINE_PER_CPU(unsigned int, nr_bp_flexible[TYPE_MAX]);
 
 static int nr_slots[TYPE_MAX];
 
+/* Keep track of the breakpoints attached to tasks */
+static LIST_HEAD(bp_task_head);
+
 static int constraints_initialized;
 
 /* Gather the number of total pinned and un-pinned bp in a cpuset */
@@ -103,33 +107,21 @@ static unsigned int max_task_bp_pinned(int cpu, enum bp_type_idx type)
        return 0;
 }
 
-static int task_bp_pinned(struct task_struct *tsk, enum bp_type_idx type)
+/*
+ * Count the number of breakpoints of the same type and same task.
+ * The given event must be not on the list.
+ */
+static int task_bp_pinned(struct perf_event *bp, enum bp_type_idx type)
 {
-       struct perf_event_context *ctx = tsk->perf_event_ctxp;
-       struct list_head *list;
-       struct perf_event *bp;
-       unsigned long flags;
+       struct perf_event_context *ctx = bp->ctx;
+       struct perf_event *iter;
        int count = 0;
 
-       if (WARN_ONCE(!ctx, "No perf context for this task"))
-               return 0;
-
-       list = &ctx->event_list;
-
-       raw_spin_lock_irqsave(&ctx->lock, flags);
-
-       /*
-        * The current breakpoint counter is not included in the list
-        * at the open() callback time
-        */
-       list_for_each_entry(bp, list, event_entry) {
-               if (bp->attr.type == PERF_TYPE_BREAKPOINT)
-                       if (find_slot_idx(bp) == type)
-                               count += hw_breakpoint_weight(bp);
+       list_for_each_entry(iter, &bp_task_head, hw.bp_list) {
+               if (iter->ctx == ctx && find_slot_idx(iter) == type)
+                       count += hw_breakpoint_weight(iter);
        }
 
-       raw_spin_unlock_irqrestore(&ctx->lock, flags);
-
        return count;
 }
 
@@ -149,7 +141,7 @@ fetch_bp_busy_slots(struct bp_busy_slots *slots, struct perf_event *bp,
                if (!tsk)
                        slots->pinned += max_task_bp_pinned(cpu, type);
                else
-                       slots->pinned += task_bp_pinned(tsk, type);
+                       slots->pinned += task_bp_pinned(bp, type);
                slots->flexible = per_cpu(nr_bp_flexible[type], cpu);
 
                return;
@@ -162,7 +154,7 @@ fetch_bp_busy_slots(struct bp_busy_slots *slots, struct perf_event *bp,
                if (!tsk)
                        nr += max_task_bp_pinned(cpu, type);
                else
-                       nr += task_bp_pinned(tsk, type);
+                       nr += task_bp_pinned(bp, type);
 
                if (nr > slots->pinned)
                        slots->pinned = nr;
@@ -188,7 +180,7 @@ fetch_this_slot(struct bp_busy_slots *slots, int weight)
 /*
  * Add a pinned breakpoint for the given task in our constraint table
  */
-static void toggle_bp_task_slot(struct task_struct *tsk, int cpu, bool enable,
+static void toggle_bp_task_slot(struct perf_event *bp, int cpu, bool enable,
                                enum bp_type_idx type, int weight)
 {
        unsigned int *tsk_pinned;
@@ -196,10 +188,11 @@ static void toggle_bp_task_slot(struct task_struct *tsk, int cpu, bool enable,
        int old_idx = 0;
        int idx = 0;
 
-       old_count = task_bp_pinned(tsk, type);
+       old_count = task_bp_pinned(bp, type);
        old_idx = old_count - 1;
        idx = old_idx + weight;
 
+       /* tsk_pinned[n] is the number of tasks having n breakpoints */
        tsk_pinned = per_cpu(nr_task_bp_pinned[type], cpu);
        if (enable) {
                tsk_pinned[idx]++;
@@ -222,23 +215,30 @@ toggle_bp_slot(struct perf_event *bp, bool enable, enum bp_type_idx type,
        int cpu = bp->cpu;
        struct task_struct *tsk = bp->ctx->task;
 
+       /* Pinned counter cpu profiling */
+       if (!tsk) {
+
+               if (enable)
+                       per_cpu(nr_cpu_bp_pinned[type], bp->cpu) += weight;
+               else
+                       per_cpu(nr_cpu_bp_pinned[type], bp->cpu) -= weight;
+               return;
+       }
+
        /* Pinned counter task profiling */
-       if (tsk) {
-               if (cpu >= 0) {
-                       toggle_bp_task_slot(tsk, cpu, enable, type, weight);
-                       return;
-               }
 
+       if (!enable)
+               list_del(&bp->hw.bp_list);
+
+       if (cpu >= 0) {
+               toggle_bp_task_slot(bp, cpu, enable, type, weight);
+       } else {
                for_each_online_cpu(cpu)
-                       toggle_bp_task_slot(tsk, cpu, enable, type, weight);
-               return;
+                       toggle_bp_task_slot(bp, cpu, enable, type, weight);
        }
 
-       /* Pinned counter cpu profiling */
        if (enable)
-               per_cpu(nr_cpu_bp_pinned[type], bp->cpu) += weight;
-       else
-               per_cpu(nr_cpu_bp_pinned[type], bp->cpu) -= weight;
+               list_add_tail(&bp->hw.bp_list, &bp_task_head);
 }
 
 /*
@@ -312,6 +312,10 @@ static int __reserve_bp_slot(struct perf_event *bp)
        weight = hw_breakpoint_weight(bp);
 
        fetch_bp_busy_slots(&slots, bp, type);
+       /*
+        * Simulate the addition of this breakpoint to the constraints
+        * and see the result.
+        */
        fetch_this_slot(&slots, weight);
 
        /* Flexible counters need to keep at least one slot */
index ff86c558af4c28dd4c9a7ea92cc592b70bee6d7d..c772a3d4000d85a1fc0691626d3ff26c90bb1c12 100644 (file)
@@ -675,7 +675,6 @@ group_sched_in(struct perf_event *group_event,
        struct perf_event *event, *partial_group = NULL;
        const struct pmu *pmu = group_event->pmu;
        bool txn = false;
-       int ret;
 
        if (group_event->state == PERF_EVENT_STATE_OFF)
                return 0;
@@ -703,14 +702,8 @@ group_sched_in(struct perf_event *group_event,
                }
        }
 
-       if (!txn)
-               return 0;
-
-       ret = pmu->commit_txn(pmu);
-       if (!ret) {
-               pmu->cancel_txn(pmu);
+       if (!txn || !pmu->commit_txn(pmu))
                return 0;
-       }
 
 group_error:
        /*
@@ -1155,9 +1148,9 @@ static void __perf_event_sync_stat(struct perf_event *event,
         * In order to keep per-task stats reliable we need to flip the event
         * values when we flip the contexts.
         */
-       value = atomic64_read(&next_event->count);
-       value = atomic64_xchg(&event->count, value);
-       atomic64_set(&next_event->count, value);
+       value = local64_read(&next_event->count);
+       value = local64_xchg(&event->count, value);
+       local64_set(&next_event->count, value);
 
        swap(event->total_time_enabled, next_event->total_time_enabled);
        swap(event->total_time_running, next_event->total_time_running);
@@ -1547,10 +1540,10 @@ static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count)
 
        hwc->sample_period = sample_period;
 
-       if (atomic64_read(&hwc->period_left) > 8*sample_period) {
+       if (local64_read(&hwc->period_left) > 8*sample_period) {
                perf_disable();
                perf_event_stop(event);
-               atomic64_set(&hwc->period_left, 0);
+               local64_set(&hwc->period_left, 0);
                perf_event_start(event);
                perf_enable();
        }
@@ -1591,7 +1584,7 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx)
 
                perf_disable();
                event->pmu->read(event);
-               now = atomic64_read(&event->count);
+               now = local64_read(&event->count);
                delta = now - hwc->freq_count_stamp;
                hwc->freq_count_stamp = now;
 
@@ -1743,6 +1736,11 @@ static void __perf_event_read(void *info)
        event->pmu->read(event);
 }
 
+static inline u64 perf_event_count(struct perf_event *event)
+{
+       return local64_read(&event->count) + atomic64_read(&event->child_count);
+}
+
 static u64 perf_event_read(struct perf_event *event)
 {
        /*
@@ -1762,7 +1760,7 @@ static u64 perf_event_read(struct perf_event *event)
                raw_spin_unlock_irqrestore(&ctx->lock, flags);
        }
 
-       return atomic64_read(&event->count);
+       return perf_event_count(event);
 }
 
 /*
@@ -1883,7 +1881,7 @@ static void free_event_rcu(struct rcu_head *head)
 }
 
 static void perf_pending_sync(struct perf_event *event);
-static void perf_mmap_data_put(struct perf_mmap_data *data);
+static void perf_buffer_put(struct perf_buffer *buffer);
 
 static void free_event(struct perf_event *event)
 {
@@ -1891,7 +1889,7 @@ static void free_event(struct perf_event *event)
 
        if (!event->parent) {
                atomic_dec(&nr_events);
-               if (event->attr.mmap)
+               if (event->attr.mmap || event->attr.mmap_data)
                        atomic_dec(&nr_mmap_events);
                if (event->attr.comm)
                        atomic_dec(&nr_comm_events);
@@ -1899,9 +1897,9 @@ static void free_event(struct perf_event *event)
                        atomic_dec(&nr_task_events);
        }
 
-       if (event->data) {
-               perf_mmap_data_put(event->data);
-               event->data = NULL;
+       if (event->buffer) {
+               perf_buffer_put(event->buffer);
+               event->buffer = NULL;
        }
 
        if (event->destroy)
@@ -2126,13 +2124,13 @@ perf_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
 static unsigned int perf_poll(struct file *file, poll_table *wait)
 {
        struct perf_event *event = file->private_data;
-       struct perf_mmap_data *data;
+       struct perf_buffer *buffer;
        unsigned int events = POLL_HUP;
 
        rcu_read_lock();
-       data = rcu_dereference(event->data);
-       if (data)
-               events = atomic_xchg(&data->poll, 0);
+       buffer = rcu_dereference(event->buffer);
+       if (buffer)
+               events = atomic_xchg(&buffer->poll, 0);
        rcu_read_unlock();
 
        poll_wait(file, &event->waitq, wait);
@@ -2143,7 +2141,7 @@ static unsigned int perf_poll(struct file *file, poll_table *wait)
 static void perf_event_reset(struct perf_event *event)
 {
        (void)perf_event_read(event);
-       atomic64_set(&event->count, 0);
+       local64_set(&event->count, 0);
        perf_event_update_userpage(event);
 }
 
@@ -2342,14 +2340,14 @@ static int perf_event_index(struct perf_event *event)
 void perf_event_update_userpage(struct perf_event *event)
 {
        struct perf_event_mmap_page *userpg;
-       struct perf_mmap_data *data;
+       struct perf_buffer *buffer;
 
        rcu_read_lock();
-       data = rcu_dereference(event->data);
-       if (!data)
+       buffer = rcu_dereference(event->buffer);
+       if (!buffer)
                goto unlock;
 
-       userpg = data->user_page;
+       userpg = buffer->user_page;
 
        /*
         * Disable preemption so as to not let the corresponding user-space
@@ -2359,9 +2357,9 @@ void perf_event_update_userpage(struct perf_event *event)
        ++userpg->lock;
        barrier();
        userpg->index = perf_event_index(event);
-       userpg->offset = atomic64_read(&event->count);
+       userpg->offset = perf_event_count(event);
        if (event->state == PERF_EVENT_STATE_ACTIVE)
-               userpg->offset -= atomic64_read(&event->hw.prev_count);
+               userpg->offset -= local64_read(&event->hw.prev_count);
 
        userpg->time_enabled = event->total_time_enabled +
                        atomic64_read(&event->child_total_time_enabled);
@@ -2376,6 +2374,25 @@ unlock:
        rcu_read_unlock();
 }
 
+static unsigned long perf_data_size(struct perf_buffer *buffer);
+
+static void
+perf_buffer_init(struct perf_buffer *buffer, long watermark, int flags)
+{
+       long max_size = perf_data_size(buffer);
+
+       if (watermark)
+               buffer->watermark = min(max_size, watermark);
+
+       if (!buffer->watermark)
+               buffer->watermark = max_size / 2;
+
+       if (flags & PERF_BUFFER_WRITABLE)
+               buffer->writable = 1;
+
+       atomic_set(&buffer->refcount, 1);
+}
+
 #ifndef CONFIG_PERF_USE_VMALLOC
 
 /*
@@ -2383,15 +2400,15 @@ unlock:
  */
 
 static struct page *
-perf_mmap_to_page(struct perf_mmap_data *data, unsigned long pgoff)
+perf_mmap_to_page(struct perf_buffer *buffer, unsigned long pgoff)
 {
-       if (pgoff > data->nr_pages)
+       if (pgoff > buffer->nr_pages)
                return NULL;
 
        if (pgoff == 0)
-               return virt_to_page(data->user_page);
+               return virt_to_page(buffer->user_page);
 
-       return virt_to_page(data->data_pages[pgoff - 1]);
+       return virt_to_page(buffer->data_pages[pgoff - 1]);
 }
 
 static void *perf_mmap_alloc_page(int cpu)
@@ -2407,42 +2424,44 @@ static void *perf_mmap_alloc_page(int cpu)
        return page_address(page);
 }
 
-static struct perf_mmap_data *
-perf_mmap_data_alloc(struct perf_event *event, int nr_pages)
+static struct perf_buffer *
+perf_buffer_alloc(int nr_pages, long watermark, int cpu, int flags)
 {
-       struct perf_mmap_data *data;
+       struct perf_buffer *buffer;
        unsigned long size;
        int i;
 
-       size = sizeof(struct perf_mmap_data);
+       size = sizeof(struct perf_buffer);
        size += nr_pages * sizeof(void *);
 
-       data = kzalloc(size, GFP_KERNEL);
-       if (!data)
+       buffer = kzalloc(size, GFP_KERNEL);
+       if (!buffer)
                goto fail;
 
-       data->user_page = perf_mmap_alloc_page(event->cpu);
-       if (!data->user_page)
+       buffer->user_page = perf_mmap_alloc_page(cpu);
+       if (!buffer->user_page)
                goto fail_user_page;
 
        for (i = 0; i < nr_pages; i++) {
-               data->data_pages[i] = perf_mmap_alloc_page(event->cpu);
-               if (!data->data_pages[i])
+               buffer->data_pages[i] = perf_mmap_alloc_page(cpu);
+               if (!buffer->data_pages[i])
                        goto fail_data_pages;
        }
 
-       data->nr_pages = nr_pages;
+       buffer->nr_pages = nr_pages;
+
+       perf_buffer_init(buffer, watermark, flags);
 
-       return data;
+       return buffer;
 
 fail_data_pages:
        for (i--; i >= 0; i--)
-               free_page((unsigned long)data->data_pages[i]);
+               free_page((unsigned long)buffer->data_pages[i]);
 
-       free_page((unsigned long)data->user_page);
+       free_page((unsigned long)buffer->user_page);
 
 fail_user_page:
-       kfree(data);
+       kfree(buffer);
 
 fail:
        return NULL;
@@ -2456,17 +2475,17 @@ static void perf_mmap_free_page(unsigned long addr)
        __free_page(page);
 }
 
-static void perf_mmap_data_free(struct perf_mmap_data *data)
+static void perf_buffer_free(struct perf_buffer *buffer)
 {
        int i;
 
-       perf_mmap_free_page((unsigned long)data->user_page);
-       for (i = 0; i < data->nr_pages; i++)
-               perf_mmap_free_page((unsigned long)data->data_pages[i]);
-       kfree(data);
+       perf_mmap_free_page((unsigned long)buffer->user_page);
+       for (i = 0; i < buffer->nr_pages; i++)
+               perf_mmap_free_page((unsigned long)buffer->data_pages[i]);
+       kfree(buffer);
 }
 
-static inline int page_order(struct perf_mmap_data *data)
+static inline int page_order(struct perf_buffer *buffer)
 {
        return 0;
 }
@@ -2479,18 +2498,18 @@ static inline int page_order(struct perf_mmap_data *data)
  * Required for architectures that have d-cache aliasing issues.
  */
 
-static inline int page_order(struct perf_mmap_data *data)
+static inline int page_order(struct perf_buffer *buffer)
 {
-       return data->page_order;
+       return buffer->page_order;
 }
 
 static struct page *
-perf_mmap_to_page(struct perf_mmap_data *data, unsigned long pgoff)
+perf_mmap_to_page(struct perf_buffer *buffer, unsigned long pgoff)
 {
-       if (pgoff > (1UL << page_order(data)))
+       if (pgoff > (1UL << page_order(buffer)))
                return NULL;
 
-       return vmalloc_to_page((void *)data->user_page + pgoff * PAGE_SIZE);
+       return vmalloc_to_page((void *)buffer->user_page + pgoff * PAGE_SIZE);
 }
 
 static void perf_mmap_unmark_page(void *addr)
@@ -2500,57 +2519,59 @@ static void perf_mmap_unmark_page(void *addr)
        page->mapping = NULL;
 }
 
-static void perf_mmap_data_free_work(struct work_struct *work)
+static void perf_buffer_free_work(struct work_struct *work)
 {
-       struct perf_mmap_data *data;
+       struct perf_buffer *buffer;
        void *base;
        int i, nr;
 
-       data = container_of(work, struct perf_mmap_data, work);
-       nr = 1 << page_order(data);
+       buffer = container_of(work, struct perf_buffer, work);
+       nr = 1 << page_order(buffer);
 
-       base = data->user_page;
+       base = buffer->user_page;
        for (i = 0; i < nr + 1; i++)
                perf_mmap_unmark_page(base + (i * PAGE_SIZE));
 
        vfree(base);
-       kfree(data);
+       kfree(buffer);
 }
 
-static void perf_mmap_data_free(struct perf_mmap_data *data)
+static void perf_buffer_free(struct perf_buffer *buffer)
 {
-       schedule_work(&data->work);
+       schedule_work(&buffer->work);
 }
 
-static struct perf_mmap_data *
-perf_mmap_data_alloc(struct perf_event *event, int nr_pages)
+static struct perf_buffer *
+perf_buffer_alloc(int nr_pages, long watermark, int cpu, int flags)
 {
-       struct perf_mmap_data *data;
+       struct perf_buffer *buffer;
        unsigned long size;
        void *all_buf;
 
-       size = sizeof(struct perf_mmap_data);
+       size = sizeof(struct perf_buffer);
        size += sizeof(void *);
 
-       data = kzalloc(size, GFP_KERNEL);
-       if (!data)
+       buffer = kzalloc(size, GFP_KERNEL);
+       if (!buffer)
                goto fail;
 
-       INIT_WORK(&data->work, perf_mmap_data_free_work);
+       INIT_WORK(&buffer->work, perf_buffer_free_work);
 
        all_buf = vmalloc_user((nr_pages + 1) * PAGE_SIZE);
        if (!all_buf)
                goto fail_all_buf;
 
-       data->user_page = all_buf;
-       data->data_pages[0] = all_buf + PAGE_SIZE;
-       data->page_order = ilog2(nr_pages);
-       data->nr_pages = 1;
+       buffer->user_page = all_buf;
+       buffer->data_pages[0] = all_buf + PAGE_SIZE;
+       buffer->page_order = ilog2(nr_pages);
+       buffer->nr_pages = 1;
+
+       perf_buffer_init(buffer, watermark, flags);
 
-       return data;
+       return buffer;
 
 fail_all_buf:
-       kfree(data);
+       kfree(buffer);
 
 fail:
        return NULL;
@@ -2558,15 +2579,15 @@ fail:
 
 #endif
 
-static unsigned long perf_data_size(struct perf_mmap_data *data)
+static unsigned long perf_data_size(struct perf_buffer *buffer)
 {
-       return data->nr_pages << (PAGE_SHIFT + page_order(data));
+       return buffer->nr_pages << (PAGE_SHIFT + page_order(buffer));
 }
 
 static int perf_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 {
        struct perf_event *event = vma->vm_file->private_data;
-       struct perf_mmap_data *data;
+       struct perf_buffer *buffer;
        int ret = VM_FAULT_SIGBUS;
 
        if (vmf->flags & FAULT_FLAG_MKWRITE) {
@@ -2576,14 +2597,14 @@ static int perf_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
        }
 
        rcu_read_lock();
-       data = rcu_dereference(event->data);
-       if (!data)
+       buffer = rcu_dereference(event->buffer);
+       if (!buffer)
                goto unlock;
 
        if (vmf->pgoff && (vmf->flags & FAULT_FLAG_WRITE))
                goto unlock;
 
-       vmf->page = perf_mmap_to_page(data, vmf->pgoff);
+       vmf->page = perf_mmap_to_page(buffer, vmf->pgoff);
        if (!vmf->page)
                goto unlock;
 
@@ -2598,52 +2619,35 @@ unlock:
        return ret;
 }
 
-static void
-perf_mmap_data_init(struct perf_event *event, struct perf_mmap_data *data)
-{
-       long max_size = perf_data_size(data);
-
-       if (event->attr.watermark) {
-               data->watermark = min_t(long, max_size,
-                                       event->attr.wakeup_watermark);
-       }
-
-       if (!data->watermark)
-               data->watermark = max_size / 2;
-
-       atomic_set(&data->refcount, 1);
-       rcu_assign_pointer(event->data, data);
-}
-
-static void perf_mmap_data_free_rcu(struct rcu_head *rcu_head)
+static void perf_buffer_free_rcu(struct rcu_head *rcu_head)
 {
-       struct perf_mmap_data *data;
+       struct perf_buffer *buffer;
 
-       data = container_of(rcu_head, struct perf_mmap_data, rcu_head);
-       perf_mmap_data_free(data);
+       buffer = container_of(rcu_head, struct perf_buffer, rcu_head);
+       perf_buffer_free(buffer);
 }
 
-static struct perf_mmap_data *perf_mmap_data_get(struct perf_event *event)
+static struct perf_buffer *perf_buffer_get(struct perf_event *event)
 {
-       struct perf_mmap_data *data;
+       struct perf_buffer *buffer;
 
        rcu_read_lock();
-       data = rcu_dereference(event->data);
-       if (data) {
-               if (!atomic_inc_not_zero(&data->refcount))
-                       data = NULL;
+       buffer = rcu_dereference(event->buffer);
+       if (buffer) {
+               if (!atomic_inc_not_zero(&buffer->refcount))
+                       buffer = NULL;
        }
        rcu_read_unlock();
 
-       return data;
+       return buffer;
 }
 
-static void perf_mmap_data_put(struct perf_mmap_data *data)
+static void perf_buffer_put(struct perf_buffer *buffer)
 {
-       if (!atomic_dec_and_test(&data->refcount))
+       if (!atomic_dec_and_test(&buffer->refcount))
                return;
 
-       call_rcu(&data->rcu_head, perf_mmap_data_free_rcu);
+       call_rcu(&buffer->rcu_head, perf_buffer_free_rcu);
 }
 
 static void perf_mmap_open(struct vm_area_struct *vma)
@@ -2658,16 +2662,16 @@ static void perf_mmap_close(struct vm_area_struct *vma)
        struct perf_event *event = vma->vm_file->private_data;
 
        if (atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex)) {
-               unsigned long size = perf_data_size(event->data);
+               unsigned long size = perf_data_size(event->buffer);
                struct user_struct *user = event->mmap_user;
-               struct perf_mmap_data *data = event->data;
+               struct perf_buffer *buffer = event->buffer;
 
                atomic_long_sub((size >> PAGE_SHIFT) + 1, &user->locked_vm);
                vma->vm_mm->locked_vm -= event->mmap_locked;
-               rcu_assign_pointer(event->data, NULL);
+               rcu_assign_pointer(event->buffer, NULL);
                mutex_unlock(&event->mmap_mutex);
 
-               perf_mmap_data_put(data);
+               perf_buffer_put(buffer);
                free_uid(user);
        }
 }
@@ -2685,11 +2689,11 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
        unsigned long user_locked, user_lock_limit;
        struct user_struct *user = current_user();
        unsigned long locked, lock_limit;
-       struct perf_mmap_data *data;
+       struct perf_buffer *buffer;
        unsigned long vma_size;
        unsigned long nr_pages;
        long user_extra, extra;
-       int ret = 0;
+       int ret = 0, flags = 0;
 
        /*
         * Don't allow mmap() of inherited per-task counters. This would
@@ -2706,7 +2710,7 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
        nr_pages = (vma_size / PAGE_SIZE) - 1;
 
        /*
-        * If we have data pages ensure they're a power-of-two number, so we
+        * If we have buffer pages ensure they're a power-of-two number, so we
         * can do bitmasks instead of modulo.
         */
        if (nr_pages != 0 && !is_power_of_2(nr_pages))
@@ -2720,9 +2724,9 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
 
        WARN_ON_ONCE(event->ctx->parent_ctx);
        mutex_lock(&event->mmap_mutex);
-       if (event->data) {
-               if (event->data->nr_pages == nr_pages)
-                       atomic_inc(&event->data->refcount);
+       if (event->buffer) {
+               if (event->buffer->nr_pages == nr_pages)
+                       atomic_inc(&event->buffer->refcount);
                else
                        ret = -EINVAL;
                goto unlock;
@@ -2752,17 +2756,18 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
                goto unlock;
        }
 
-       WARN_ON(event->data);
+       WARN_ON(event->buffer);
+
+       if (vma->vm_flags & VM_WRITE)
+               flags |= PERF_BUFFER_WRITABLE;
 
-       data = perf_mmap_data_alloc(event, nr_pages);
-       if (!data) {
+       buffer = perf_buffer_alloc(nr_pages, event->attr.wakeup_watermark,
+                                  event->cpu, flags);
+       if (!buffer) {
                ret = -ENOMEM;
                goto unlock;
        }
-
-       perf_mmap_data_init(event, data);
-       if (vma->vm_flags & VM_WRITE)
-               event->data->writable = 1;
+       rcu_assign_pointer(event->buffer, buffer);
 
        atomic_long_add(user_extra, &user->locked_vm);
        event->mmap_locked = extra;
@@ -2941,11 +2946,6 @@ __weak struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
        return NULL;
 }
 
-__weak
-void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int skip)
-{
-}
-
 
 /*
  * We assume there is only KVM supporting the callbacks.
@@ -2971,15 +2971,15 @@ EXPORT_SYMBOL_GPL(perf_unregister_guest_info_callbacks);
 /*
  * Output
  */
-static bool perf_output_space(struct perf_mmap_data *data, unsigned long tail,
+static bool perf_output_space(struct perf_buffer *buffer, unsigned long tail,
                              unsigned long offset, unsigned long head)
 {
        unsigned long mask;
 
-       if (!data->writable)
+       if (!buffer->writable)
                return true;
 
-       mask = perf_data_size(data) - 1;
+       mask = perf_data_size(buffer) - 1;
 
        offset = (offset - tail) & mask;
        head   = (head   - tail) & mask;
@@ -2992,7 +2992,7 @@ static bool perf_output_space(struct perf_mmap_data *data, unsigned long tail,
 
 static void perf_output_wakeup(struct perf_output_handle *handle)
 {
-       atomic_set(&handle->data->poll, POLL_IN);
+       atomic_set(&handle->buffer->poll, POLL_IN);
 
        if (handle->nmi) {
                handle->event->pending_wakeup = 1;
@@ -3012,45 +3012,45 @@ static void perf_output_wakeup(struct perf_output_handle *handle)
  */
 static void perf_output_get_handle(struct perf_output_handle *handle)
 {
-       struct perf_mmap_data *data = handle->data;
+       struct perf_buffer *buffer = handle->buffer;
 
        preempt_disable();
-       local_inc(&data->nest);
-       handle->wakeup = local_read(&data->wakeup);
+       local_inc(&buffer->nest);
+       handle->wakeup = local_read(&buffer->wakeup);
 }
 
 static void perf_output_put_handle(struct perf_output_handle *handle)
 {
-       struct perf_mmap_data *data = handle->data;
+       struct perf_buffer *buffer = handle->buffer;
        unsigned long head;
 
 again:
-       head = local_read(&data->head);
+       head = local_read(&buffer->head);
 
        /*
         * IRQ/NMI can happen here, which means we can miss a head update.
         */
 
-       if (!local_dec_and_test(&data->nest))
+       if (!local_dec_and_test(&buffer->nest))
                goto out;
 
        /*
         * Publish the known good head. Rely on the full barrier implied
-        * by atomic_dec_and_test() order the data->head read and this
+        * by atomic_dec_and_test() order the buffer->head read and this
         * write.
         */
-       data->user_page->data_head = head;
+       buffer->user_page->data_head = head;
 
        /*
         * Now check if we missed an update, rely on the (compiler)
-        * barrier in atomic_dec_and_test() to re-read data->head.
+        * barrier in atomic_dec_and_test() to re-read buffer->head.
         */
-       if (unlikely(head != local_read(&data->head))) {
-               local_inc(&data->nest);
+       if (unlikely(head != local_read(&buffer->head))) {
+               local_inc(&buffer->nest);
                goto again;
        }
 
-       if (handle->wakeup != local_read(&data->wakeup))
+       if (handle->wakeup != local_read(&buffer->wakeup))
                perf_output_wakeup(handle);
 
  out:
@@ -3070,12 +3070,12 @@ __always_inline void perf_output_copy(struct perf_output_handle *handle,
                buf += size;
                handle->size -= size;
                if (!handle->size) {
-                       struct perf_mmap_data *data = handle->data;
+                       struct perf_buffer *buffer = handle->buffer;
 
                        handle->page++;
-                       handle->page &= data->nr_pages - 1;
-                       handle->addr = data->data_pages[handle->page];
-                       handle->size = PAGE_SIZE << page_order(data);
+                       handle->page &= buffer->nr_pages - 1;
+                       handle->addr = buffer->data_pages[handle->page];
+                       handle->size = PAGE_SIZE << page_order(buffer);
                }
        } while (len);
 }
@@ -3084,7 +3084,7 @@ int perf_output_begin(struct perf_output_handle *handle,
                      struct perf_event *event, unsigned int size,
                      int nmi, int sample)
 {
-       struct perf_mmap_data *data;
+       struct perf_buffer *buffer;
        unsigned long tail, offset, head;
        int have_lost;
        struct {
@@ -3100,19 +3100,19 @@ int perf_output_begin(struct perf_output_handle *handle,
        if (event->parent)
                event = event->parent;
 
-       data = rcu_dereference(event->data);
-       if (!data)
+       buffer = rcu_dereference(event->buffer);
+       if (!buffer)
                goto out;
 
-       handle->data    = data;
+       handle->buffer  = buffer;
        handle->event   = event;
        handle->nmi     = nmi;
        handle->sample  = sample;
 
-       if (!data->nr_pages)
+       if (!buffer->nr_pages)
                goto out;
 
-       have_lost = local_read(&data->lost);
+       have_lost = local_read(&buffer->lost);
        if (have_lost)
                size += sizeof(lost_event);
 
@@ -3124,30 +3124,30 @@ int perf_output_begin(struct perf_output_handle *handle,
                 * tail pointer. So that all reads will be completed before the
                 * write is issued.
                 */
-               tail = ACCESS_ONCE(data->user_page->data_tail);
+               tail = ACCESS_ONCE(buffer->user_page->data_tail);
                smp_rmb();
-               offset = head = local_read(&data->head);
+               offset = head = local_read(&buffer->head);
                head += size;
-               if (unlikely(!perf_output_space(data, tail, offset, head)))
+               if (unlikely(!perf_output_space(buffer, tail, offset, head)))
                        goto fail;
-       } while (local_cmpxchg(&data->head, offset, head) != offset);
+       } while (local_cmpxchg(&buffer->head, offset, head) != offset);
 
-       if (head - local_read(&data->wakeup) > data->watermark)
-               local_add(data->watermark, &data->wakeup);
+       if (head - local_read(&buffer->wakeup) > buffer->watermark)
+               local_add(buffer->watermark, &buffer->wakeup);
 
-       handle->page = offset >> (PAGE_SHIFT + page_order(data));
-       handle->page &= data->nr_pages - 1;
-       handle->size = offset & ((PAGE_SIZE << page_order(data)) - 1);
-       handle->addr = data->data_pages[handle->page];
+       handle->page = offset >> (PAGE_SHIFT + page_order(buffer));
+       handle->page &= buffer->nr_pages - 1;
+       handle->size = offset & ((PAGE_SIZE << page_order(buffer)) - 1);
+       handle->addr = buffer->data_pages[handle->page];
        handle->addr += handle->size;
-       handle->size = (PAGE_SIZE << page_order(data)) - handle->size;
+       handle->size = (PAGE_SIZE << page_order(buffer)) - handle->size;
 
        if (have_lost) {
                lost_event.header.type = PERF_RECORD_LOST;
                lost_event.header.misc = 0;
                lost_event.header.size = sizeof(lost_event);
                lost_event.id          = event->id;
-               lost_event.lost        = local_xchg(&data->lost, 0);
+               lost_event.lost        = local_xchg(&buffer->lost, 0);
 
                perf_output_put(handle, lost_event);
        }
@@ -3155,7 +3155,7 @@ int perf_output_begin(struct perf_output_handle *handle,
        return 0;
 
 fail:
-       local_inc(&data->lost);
+       local_inc(&buffer->lost);
        perf_output_put_handle(handle);
 out:
        rcu_read_unlock();
@@ -3166,15 +3166,15 @@ out:
 void perf_output_end(struct perf_output_handle *handle)
 {
        struct perf_event *event = handle->event;
-       struct perf_mmap_data *data = handle->data;
+       struct perf_buffer *buffer = handle->buffer;
 
        int wakeup_events = event->attr.wakeup_events;
 
        if (handle->sample && wakeup_events) {
-               int events = local_inc_return(&data->events);
+               int events = local_inc_return(&buffer->events);
                if (events >= wakeup_events) {
-                       local_sub(wakeup_events, &data->events);
-                       local_inc(&data->wakeup);
+                       local_sub(wakeup_events, &buffer->events);
+                       local_inc(&buffer->wakeup);
                }
        }
 
@@ -3211,7 +3211,7 @@ static void perf_output_read_one(struct perf_output_handle *handle,
        u64 values[4];
        int n = 0;
 
-       values[n++] = atomic64_read(&event->count);
+       values[n++] = perf_event_count(event);
        if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
                values[n++] = event->total_time_enabled +
                        atomic64_read(&event->child_total_time_enabled);
@@ -3248,7 +3248,7 @@ static void perf_output_read_group(struct perf_output_handle *handle,
        if (leader != event)
                leader->pmu->read(leader);
 
-       values[n++] = atomic64_read(&leader->count);
+       values[n++] = perf_event_count(leader);
        if (read_format & PERF_FORMAT_ID)
                values[n++] = primary_event_id(leader);
 
@@ -3260,7 +3260,7 @@ static void perf_output_read_group(struct perf_output_handle *handle,
                if (sub != event)
                        sub->pmu->read(sub);
 
-               values[n++] = atomic64_read(&sub->count);
+               values[n++] = perf_event_count(sub);
                if (read_format & PERF_FORMAT_ID)
                        values[n++] = primary_event_id(sub);
 
@@ -3491,7 +3491,7 @@ perf_event_read_event(struct perf_event *event,
 /*
  * task tracking -- fork/exit
  *
- * enabled by: attr.comm | attr.mmap | attr.task
+ * enabled by: attr.comm | attr.mmap | attr.mmap_data | attr.task
  */
 
 struct perf_task_event {
@@ -3541,7 +3541,8 @@ static int perf_event_task_match(struct perf_event *event)
        if (event->cpu != -1 && event->cpu != smp_processor_id())
                return 0;
 
-       if (event->attr.comm || event->attr.mmap || event->attr.task)
+       if (event->attr.comm || event->attr.mmap ||
+           event->attr.mmap_data || event->attr.task)
                return 1;
 
        return 0;
@@ -3766,7 +3767,8 @@ static void perf_event_mmap_output(struct perf_event *event,
 }
 
 static int perf_event_mmap_match(struct perf_event *event,
-                                  struct perf_mmap_event *mmap_event)
+                                  struct perf_mmap_event *mmap_event,
+                                  int executable)
 {
        if (event->state < PERF_EVENT_STATE_INACTIVE)
                return 0;
@@ -3774,19 +3776,21 @@ static int perf_event_mmap_match(struct perf_event *event,
        if (event->cpu != -1 && event->cpu != smp_processor_id())
                return 0;
 
-       if (event->attr.mmap)
+       if ((!executable && event->attr.mmap_data) ||
+           (executable && event->attr.mmap))
                return 1;
 
        return 0;
 }
 
 static void perf_event_mmap_ctx(struct perf_event_context *ctx,
-                                 struct perf_mmap_event *mmap_event)
+                                 struct perf_mmap_event *mmap_event,
+                                 int executable)
 {
        struct perf_event *event;
 
        list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
-               if (perf_event_mmap_match(event, mmap_event))
+               if (perf_event_mmap_match(event, mmap_event, executable))
                        perf_event_mmap_output(event, mmap_event);
        }
 }
@@ -3830,6 +3834,14 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event)
                if (!vma->vm_mm) {
                        name = strncpy(tmp, "[vdso]", sizeof(tmp));
                        goto got_name;
+               } else if (vma->vm_start <= vma->vm_mm->start_brk &&
+                               vma->vm_end >= vma->vm_mm->brk) {
+                       name = strncpy(tmp, "[heap]", sizeof(tmp));
+                       goto got_name;
+               } else if (vma->vm_start <= vma->vm_mm->start_stack &&
+                               vma->vm_end >= vma->vm_mm->start_stack) {
+                       name = strncpy(tmp, "[stack]", sizeof(tmp));
+                       goto got_name;
                }
 
                name = strncpy(tmp, "//anon", sizeof(tmp));
@@ -3846,17 +3858,17 @@ got_name:
 
        rcu_read_lock();
        cpuctx = &get_cpu_var(perf_cpu_context);
-       perf_event_mmap_ctx(&cpuctx->ctx, mmap_event);
+       perf_event_mmap_ctx(&cpuctx->ctx, mmap_event, vma->vm_flags & VM_EXEC);
        ctx = rcu_dereference(current->perf_event_ctxp);
        if (ctx)
-               perf_event_mmap_ctx(ctx, mmap_event);
+               perf_event_mmap_ctx(ctx, mmap_event, vma->vm_flags & VM_EXEC);
        put_cpu_var(perf_cpu_context);
        rcu_read_unlock();
 
        kfree(buf);
 }
 
-void __perf_event_mmap(struct vm_area_struct *vma)
+void perf_event_mmap(struct vm_area_struct *vma)
 {
        struct perf_mmap_event mmap_event;
 
@@ -4018,14 +4030,14 @@ static u64 perf_swevent_set_period(struct perf_event *event)
        hwc->last_period = hwc->sample_period;
 
 again:
-       old = val = atomic64_read(&hwc->period_left);
+       old = val = local64_read(&hwc->period_left);
        if (val < 0)
                return 0;
 
        nr = div64_u64(period + val, period);
        offset = nr * period;
        val -= offset;
-       if (atomic64_cmpxchg(&hwc->period_left, old, val) != old)
+       if (local64_cmpxchg(&hwc->period_left, old, val) != old)
                goto again;
 
        return nr;
@@ -4064,7 +4076,7 @@ static void perf_swevent_add(struct perf_event *event, u64 nr,
 {
        struct hw_perf_event *hwc = &event->hw;
 
-       atomic64_add(nr, &event->count);
+       local64_add(nr, &event->count);
 
        if (!regs)
                return;
@@ -4075,7 +4087,7 @@ static void perf_swevent_add(struct perf_event *event, u64 nr,
        if (nr == 1 && hwc->sample_period == 1 && !event->attr.freq)
                return perf_swevent_overflow(event, 1, nmi, data, regs);
 
-       if (atomic64_add_negative(nr, &hwc->period_left))
+       if (local64_add_negative(nr, &hwc->period_left))
                return;
 
        perf_swevent_overflow(event, 0, nmi, data, regs);
@@ -4213,14 +4225,12 @@ int perf_swevent_get_recursion_context(void)
 }
 EXPORT_SYMBOL_GPL(perf_swevent_get_recursion_context);
 
-void perf_swevent_put_recursion_context(int rctx)
+void inline perf_swevent_put_recursion_context(int rctx)
 {
        struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
        barrier();
        cpuctx->recursion[rctx]--;
 }
-EXPORT_SYMBOL_GPL(perf_swevent_put_recursion_context);
-
 
 void __perf_sw_event(u32 event_id, u64 nr, int nmi,
                            struct pt_regs *regs, u64 addr)
@@ -4368,8 +4378,8 @@ static void cpu_clock_perf_event_update(struct perf_event *event)
        u64 now;
 
        now = cpu_clock(cpu);
-       prev = atomic64_xchg(&event->hw.prev_count, now);
-       atomic64_add(now - prev, &event->count);
+       prev = local64_xchg(&event->hw.prev_count, now);
+       local64_add(now - prev, &event->count);
 }
 
 static int cpu_clock_perf_event_enable(struct perf_event *event)
@@ -4377,7 +4387,7 @@ static int cpu_clock_perf_event_enable(struct perf_event *event)
        struct hw_perf_event *hwc = &event->hw;
        int cpu = raw_smp_processor_id();
 
-       atomic64_set(&hwc->prev_count, cpu_clock(cpu));
+       local64_set(&hwc->prev_count, cpu_clock(cpu));
        perf_swevent_start_hrtimer(event);
 
        return 0;
@@ -4409,9 +4419,9 @@ static void task_clock_perf_event_update(struct perf_event *event, u64 now)
        u64 prev;
        s64 delta;
 
-       prev = atomic64_xchg(&event->hw.prev_count, now);
+       prev = local64_xchg(&event->hw.prev_count, now);
        delta = now - prev;
-       atomic64_add(delta, &event->count);
+       local64_add(delta, &event->count);
 }
 
 static int task_clock_perf_event_enable(struct perf_event *event)
@@ -4421,7 +4431,7 @@ static int task_clock_perf_event_enable(struct perf_event *event)
 
        now = event->ctx->time;
 
-       atomic64_set(&hwc->prev_count, now);
+       local64_set(&hwc->prev_count, now);
 
        perf_swevent_start_hrtimer(event);
 
@@ -4601,7 +4611,7 @@ static int perf_tp_event_match(struct perf_event *event,
 }
 
 void perf_tp_event(u64 addr, u64 count, void *record, int entry_size,
-                  struct pt_regs *regs, struct hlist_head *head)
+                  struct pt_regs *regs, struct hlist_head *head, int rctx)
 {
        struct perf_sample_data data;
        struct perf_event *event;
@@ -4615,12 +4625,12 @@ void perf_tp_event(u64 addr, u64 count, void *record, int entry_size,
        perf_sample_data_init(&data, addr);
        data.raw = &raw;
 
-       rcu_read_lock();
        hlist_for_each_entry_rcu(event, node, head, hlist_entry) {
                if (perf_tp_event_match(event, &data, regs))
                        perf_swevent_add(event, count, 1, &data, regs);
        }
-       rcu_read_unlock();
+
+       perf_swevent_put_recursion_context(rctx);
 }
 EXPORT_SYMBOL_GPL(perf_tp_event);
 
@@ -4864,7 +4874,7 @@ perf_event_alloc(struct perf_event_attr *attr,
                hwc->sample_period = 1;
        hwc->last_period = hwc->sample_period;
 
-       atomic64_set(&hwc->period_left, hwc->sample_period);
+       local64_set(&hwc->period_left, hwc->sample_period);
 
        /*
         * we currently do not support PERF_FORMAT_GROUP on inherited events
@@ -4913,7 +4923,7 @@ done:
 
        if (!event->parent) {
                atomic_inc(&nr_events);
-               if (event->attr.mmap)
+               if (event->attr.mmap || event->attr.mmap_data)
                        atomic_inc(&nr_mmap_events);
                if (event->attr.comm)
                        atomic_inc(&nr_comm_events);
@@ -5007,7 +5017,7 @@ err_size:
 static int
 perf_event_set_output(struct perf_event *event, struct perf_event *output_event)
 {
-       struct perf_mmap_data *data = NULL, *old_data = NULL;
+       struct perf_buffer *buffer = NULL, *old_buffer = NULL;
        int ret = -EINVAL;
 
        if (!output_event)
@@ -5037,19 +5047,19 @@ set:
 
        if (output_event) {
                /* get the buffer we want to redirect to */
-               data = perf_mmap_data_get(output_event);
-               if (!data)
+               buffer = perf_buffer_get(output_event);
+               if (!buffer)
                        goto unlock;
        }
 
-       old_data = event->data;
-       rcu_assign_pointer(event->data, data);
+       old_buffer = event->buffer;
+       rcu_assign_pointer(event->buffer, buffer);
        ret = 0;
 unlock:
        mutex_unlock(&event->mmap_mutex);
 
-       if (old_data)
-               perf_mmap_data_put(old_data);
+       if (old_buffer)
+               perf_buffer_put(old_buffer);
 out:
        return ret;
 }
@@ -5298,7 +5308,7 @@ inherit_event(struct perf_event *parent_event,
                hwc->sample_period = sample_period;
                hwc->last_period   = sample_period;
 
-               atomic64_set(&hwc->period_left, sample_period);
+               local64_set(&hwc->period_left, sample_period);
        }
 
        child_event->overflow_handler = parent_event->overflow_handler;
@@ -5359,12 +5369,12 @@ static void sync_child_event(struct perf_event *child_event,
        if (child_event->attr.inherit_stat)
                perf_event_read_event(child_event, child);
 
-       child_val = atomic64_read(&child_event->count);
+       child_val = perf_event_count(child_event);
 
        /*
         * Add back the child's count to the parent's count:
         */
-       atomic64_add(child_val, &parent_event->count);
+       atomic64_add(child_val, &parent_event->child_count);
        atomic64_add(child_event->total_time_enabled,
                     &parent_event->child_total_time_enabled);
        atomic64_add(child_event->total_time_running,
index f52a8801b7a285fb252ecc6935b55f525813881b..265cf3a2b5d898e89d714535c04af312831d79c8 100644 (file)
@@ -3726,7 +3726,7 @@ int mutex_spin_on_owner(struct mutex *lock, struct thread_info *owner)
  * off of preempt_enable. Kernel preemptions off return from interrupt
  * occur there and call schedule directly.
  */
-asmlinkage void __sched preempt_schedule(void)
+asmlinkage void __sched notrace preempt_schedule(void)
 {
        struct thread_info *ti = current_thread_info();
 
@@ -3738,9 +3738,9 @@ asmlinkage void __sched preempt_schedule(void)
                return;
 
        do {
-               add_preempt_count(PREEMPT_ACTIVE);
+               add_preempt_count_notrace(PREEMPT_ACTIVE);
                schedule();
-               sub_preempt_count(PREEMPT_ACTIVE);
+               sub_preempt_count_notrace(PREEMPT_ACTIVE);
 
                /*
                 * Check again in case we missed a preemption opportunity
diff --git a/kernel/softlockup.c b/kernel/softlockup.c
deleted file mode 100644 (file)
index 4b493f6..0000000
+++ /dev/null
@@ -1,293 +0,0 @@
-/*
- * Detect Soft Lockups
- *
- * started by Ingo Molnar, Copyright (C) 2005, 2006 Red Hat, Inc.
- *
- * this code detects soft lockups: incidents in where on a CPU
- * the kernel does not reschedule for 10 seconds or more.
- */
-#include <linux/mm.h>
-#include <linux/cpu.h>
-#include <linux/nmi.h>
-#include <linux/init.h>
-#include <linux/delay.h>
-#include <linux/freezer.h>
-#include <linux/kthread.h>
-#include <linux/lockdep.h>
-#include <linux/notifier.h>
-#include <linux/module.h>
-#include <linux/sysctl.h>
-
-#include <asm/irq_regs.h>
-
-static DEFINE_SPINLOCK(print_lock);
-
-static DEFINE_PER_CPU(unsigned long, softlockup_touch_ts); /* touch timestamp */
-static DEFINE_PER_CPU(unsigned long, softlockup_print_ts); /* print timestamp */
-static DEFINE_PER_CPU(struct task_struct *, softlockup_watchdog);
-static DEFINE_PER_CPU(bool, softlock_touch_sync);
-
-static int __read_mostly did_panic;
-int __read_mostly softlockup_thresh = 60;
-
-/*
- * Should we panic (and reboot, if panic_timeout= is set) when a
- * soft-lockup occurs:
- */
-unsigned int __read_mostly softlockup_panic =
-                               CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE;
-
-static int __init softlockup_panic_setup(char *str)
-{
-       softlockup_panic = simple_strtoul(str, NULL, 0);
-
-       return 1;
-}
-__setup("softlockup_panic=", softlockup_panic_setup);
-
-static int
-softlock_panic(struct notifier_block *this, unsigned long event, void *ptr)
-{
-       did_panic = 1;
-
-       return NOTIFY_DONE;
-}
-
-static struct notifier_block panic_block = {
-       .notifier_call = softlock_panic,
-};
-
-/*
- * Returns seconds, approximately.  We don't need nanosecond
- * resolution, and we don't need to waste time with a big divide when
- * 2^30ns == 1.074s.
- */
-static unsigned long get_timestamp(int this_cpu)
-{
-       return cpu_clock(this_cpu) >> 30LL;  /* 2^30 ~= 10^9 */
-}
-
-static void __touch_softlockup_watchdog(void)
-{
-       int this_cpu = raw_smp_processor_id();
-
-       __raw_get_cpu_var(softlockup_touch_ts) = get_timestamp(this_cpu);
-}
-
-void touch_softlockup_watchdog(void)
-{
-       __raw_get_cpu_var(softlockup_touch_ts) = 0;
-}
-EXPORT_SYMBOL(touch_softlockup_watchdog);
-
-void touch_softlockup_watchdog_sync(void)
-{
-       __raw_get_cpu_var(softlock_touch_sync) = true;
-       __raw_get_cpu_var(softlockup_touch_ts) = 0;
-}
-
-void touch_all_softlockup_watchdogs(void)
-{
-       int cpu;
-
-       /* Cause each CPU to re-update its timestamp rather than complain */
-       for_each_online_cpu(cpu)
-               per_cpu(softlockup_touch_ts, cpu) = 0;
-}
-EXPORT_SYMBOL(touch_all_softlockup_watchdogs);
-
-int proc_dosoftlockup_thresh(struct ctl_table *table, int write,
-                            void __user *buffer,
-                            size_t *lenp, loff_t *ppos)
-{
-       touch_all_softlockup_watchdogs();
-       return proc_dointvec_minmax(table, write, buffer, lenp, ppos);
-}
-
-/*
- * This callback runs from the timer interrupt, and checks
- * whether the watchdog thread has hung or not:
- */
-void softlockup_tick(void)
-{
-       int this_cpu = smp_processor_id();
-       unsigned long touch_ts = per_cpu(softlockup_touch_ts, this_cpu);
-       unsigned long print_ts;
-       struct pt_regs *regs = get_irq_regs();
-       unsigned long now;
-
-       /* Is detection switched off? */
-       if (!per_cpu(softlockup_watchdog, this_cpu) || softlockup_thresh <= 0) {
-               /* Be sure we don't false trigger if switched back on */
-               if (touch_ts)
-                       per_cpu(softlockup_touch_ts, this_cpu) = 0;
-               return;
-       }
-
-       if (touch_ts == 0) {
-               if (unlikely(per_cpu(softlock_touch_sync, this_cpu))) {
-                       /*
-                        * If the time stamp was touched atomically
-                        * make sure the scheduler tick is up to date.
-                        */
-                       per_cpu(softlock_touch_sync, this_cpu) = false;
-                       sched_clock_tick();
-               }
-               __touch_softlockup_watchdog();
-               return;
-       }
-
-       print_ts = per_cpu(softlockup_print_ts, this_cpu);
-
-       /* report at most once a second */
-       if (print_ts == touch_ts || did_panic)
-               return;
-
-       /* do not print during early bootup: */
-       if (unlikely(system_state != SYSTEM_RUNNING)) {
-               __touch_softlockup_watchdog();
-               return;
-       }
-
-       now = get_timestamp(this_cpu);
-
-       /*
-        * Wake up the high-prio watchdog task twice per
-        * threshold timespan.
-        */
-       if (time_after(now - softlockup_thresh/2, touch_ts))
-               wake_up_process(per_cpu(softlockup_watchdog, this_cpu));
-
-       /* Warn about unreasonable delays: */
-       if (time_before_eq(now - softlockup_thresh, touch_ts))
-               return;
-
-       per_cpu(softlockup_print_ts, this_cpu) = touch_ts;
-
-       spin_lock(&print_lock);
-       printk(KERN_ERR "BUG: soft lockup - CPU#%d stuck for %lus! [%s:%d]\n",
-                       this_cpu, now - touch_ts,
-                       current->comm, task_pid_nr(current));
-       print_modules();
-       print_irqtrace_events(current);
-       if (regs)
-               show_regs(regs);
-       else
-               dump_stack();
-       spin_unlock(&print_lock);
-
-       if (softlockup_panic)
-               panic("softlockup: hung tasks");
-}
-
-/*
- * The watchdog thread - runs every second and touches the timestamp.
- */
-static int watchdog(void *__bind_cpu)
-{
-       struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
-
-       sched_setscheduler(current, SCHED_FIFO, &param);
-
-       /* initialize timestamp */
-       __touch_softlockup_watchdog();
-
-       set_current_state(TASK_INTERRUPTIBLE);
-       /*
-        * Run briefly once per second to reset the softlockup timestamp.
-        * If this gets delayed for more than 60 seconds then the
-        * debug-printout triggers in softlockup_tick().
-        */
-       while (!kthread_should_stop()) {
-               __touch_softlockup_watchdog();
-               schedule();
-
-               if (kthread_should_stop())
-                       break;
-
-               set_current_state(TASK_INTERRUPTIBLE);
-       }
-       __set_current_state(TASK_RUNNING);
-
-       return 0;
-}
-
-/*
- * Create/destroy watchdog threads as CPUs come and go:
- */
-static int __cpuinit
-cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
-{
-       int hotcpu = (unsigned long)hcpu;
-       struct task_struct *p;
-
-       switch (action) {
-       case CPU_UP_PREPARE:
-       case CPU_UP_PREPARE_FROZEN:
-               BUG_ON(per_cpu(softlockup_watchdog, hotcpu));
-               p = kthread_create(watchdog, hcpu, "watchdog/%d", hotcpu);
-               if (IS_ERR(p)) {
-                       printk(KERN_ERR "watchdog for %i failed\n", hotcpu);
-                       return NOTIFY_BAD;
-               }
-               per_cpu(softlockup_touch_ts, hotcpu) = 0;
-               per_cpu(softlockup_watchdog, hotcpu) = p;
-               kthread_bind(p, hotcpu);
-               break;
-       case CPU_ONLINE:
-       case CPU_ONLINE_FROZEN:
-               wake_up_process(per_cpu(softlockup_watchdog, hotcpu));
-               break;
-#ifdef CONFIG_HOTPLUG_CPU
-       case CPU_UP_CANCELED:
-       case CPU_UP_CANCELED_FROZEN:
-               if (!per_cpu(softlockup_watchdog, hotcpu))
-                       break;
-               /* Unbind so it can run.  Fall thru. */
-               kthread_bind(per_cpu(softlockup_watchdog, hotcpu),
-                            cpumask_any(cpu_online_mask));
-       case CPU_DEAD:
-       case CPU_DEAD_FROZEN:
-               p = per_cpu(softlockup_watchdog, hotcpu);
-               per_cpu(softlockup_watchdog, hotcpu) = NULL;
-               kthread_stop(p);
-               break;
-#endif /* CONFIG_HOTPLUG_CPU */
-       }
-       return NOTIFY_OK;
-}
-
-static struct notifier_block __cpuinitdata cpu_nfb = {
-       .notifier_call = cpu_callback
-};
-
-static int __initdata nosoftlockup;
-
-static int __init nosoftlockup_setup(char *str)
-{
-       nosoftlockup = 1;
-       return 1;
-}
-__setup("nosoftlockup", nosoftlockup_setup);
-
-static int __init spawn_softlockup_task(void)
-{
-       void *cpu = (void *)(long)smp_processor_id();
-       int err;
-
-       if (nosoftlockup)
-               return 0;
-
-       err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu);
-       if (err == NOTIFY_BAD) {
-               BUG();
-               return 1;
-       }
-       cpu_callback(&cpu_nfb, CPU_ONLINE, cpu);
-       register_cpu_notifier(&cpu_nfb);
-
-       atomic_notifier_chain_register(&panic_notifier_list, &panic_block);
-
-       return 0;
-}
-early_initcall(spawn_softlockup_task);
index d24f761f48769d925692efcbb233a276dad01905..6f79c7f81c960a3da9d6b3832a92f4deaad9c7d0 100644 (file)
 #include <scsi/sg.h>
 #endif
 
+#ifdef CONFIG_LOCKUP_DETECTOR
+#include <linux/nmi.h>
+#endif
+
 
 #if defined(CONFIG_SYSCTL)
 
@@ -106,7 +110,7 @@ extern int blk_iopoll_enabled;
 #endif
 
 /* Constants used for minimum and  maximum */
-#ifdef CONFIG_DETECT_SOFTLOCKUP
+#ifdef CONFIG_LOCKUP_DETECTOR
 static int sixty = 60;
 static int neg_one = -1;
 #endif
@@ -710,7 +714,34 @@ static struct ctl_table kern_table[] = {
                .mode           = 0444,
                .proc_handler   = proc_dointvec,
        },
-#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
+#if defined(CONFIG_LOCKUP_DETECTOR)
+       {
+               .procname       = "watchdog",
+               .data           = &watchdog_enabled,
+               .maxlen         = sizeof (int),
+               .mode           = 0644,
+               .proc_handler   = proc_dowatchdog_enabled,
+       },
+       {
+               .procname       = "watchdog_thresh",
+               .data           = &softlockup_thresh,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = proc_dowatchdog_thresh,
+               .extra1         = &neg_one,
+               .extra2         = &sixty,
+       },
+       {
+               .procname       = "softlockup_panic",
+               .data           = &softlockup_panic,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec_minmax,
+               .extra1         = &zero,
+               .extra2         = &one,
+       },
+#endif
+#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86) && !defined(CONFIG_LOCKUP_DETECTOR)
        {
                .procname       = "unknown_nmi_panic",
                .data           = &unknown_nmi_panic,
@@ -813,26 +844,6 @@ static struct ctl_table kern_table[] = {
                .proc_handler   = proc_dointvec,
        },
 #endif
-#ifdef CONFIG_DETECT_SOFTLOCKUP
-       {
-               .procname       = "softlockup_panic",
-               .data           = &softlockup_panic,
-               .maxlen         = sizeof(int),
-               .mode           = 0644,
-               .proc_handler   = proc_dointvec_minmax,
-               .extra1         = &zero,
-               .extra2         = &one,
-       },
-       {
-               .procname       = "softlockup_thresh",
-               .data           = &softlockup_thresh,
-               .maxlen         = sizeof(int),
-               .mode           = 0644,
-               .proc_handler   = proc_dosoftlockup_thresh,
-               .extra1         = &neg_one,
-               .extra2         = &sixty,
-       },
-#endif
 #ifdef CONFIG_DETECT_HUNG_TASK
        {
                .procname       = "hung_task_panic",
index efde11e197c4d40f2abd8badd13459e159fdb5cb..6aa6f7e69ad5d59b35d1e266092a709e75f98dee 100644 (file)
@@ -1302,7 +1302,6 @@ void run_local_timers(void)
 {
        hrtimer_run_queues();
        raise_softirq(TIMER_SOFTIRQ);
-       softlockup_tick();
 }
 
 /*
index 8b1797c4545b41c00cca7b3e5e268cf8b8f0e164..c7683fd8a03ac09cc61a4125a904379a305f233b 100644 (file)
@@ -194,15 +194,6 @@ config PREEMPT_TRACER
          enabled. This option and the irqs-off timing option can be
          used together or separately.)
 
-config SYSPROF_TRACER
-       bool "Sysprof Tracer"
-       depends on X86
-       select GENERIC_TRACER
-       select CONTEXT_SWITCH_TRACER
-       help
-         This tracer provides the trace needed by the 'Sysprof' userspace
-         tool.
-
 config SCHED_TRACER
        bool "Scheduling Latency Tracer"
        select GENERIC_TRACER
@@ -229,23 +220,6 @@ config FTRACE_SYSCALLS
        help
          Basic tracer to catch the syscall entry and exit events.
 
-config BOOT_TRACER
-       bool "Trace boot initcalls"
-       select GENERIC_TRACER
-       select CONTEXT_SWITCH_TRACER
-       help
-         This tracer helps developers to optimize boot times: it records
-         the timings of the initcalls and traces key events and the identity
-         of tasks that can cause boot delays, such as context-switches.
-
-         Its aim is to be parsed by the scripts/bootgraph.pl tool to
-         produce pretty graphics about boot inefficiencies, giving a visual
-         representation of the delays during initcalls - but the raw
-         /debug/tracing/trace text output is readable too.
-
-         You must pass in initcall_debug and ftrace=initcall to the kernel
-         command line to enable this on bootup.
-
 config TRACE_BRANCH_PROFILING
        bool
        select GENERIC_TRACER
@@ -325,28 +299,6 @@ config BRANCH_TRACER
 
          Say N if unsure.
 
-config KSYM_TRACER
-       bool "Trace read and write access on kernel memory locations"
-       depends on HAVE_HW_BREAKPOINT
-       select TRACING
-       help
-         This tracer helps find read and write operations on any given kernel
-         symbol i.e. /proc/kallsyms.
-
-config PROFILE_KSYM_TRACER
-       bool "Profile all kernel memory accesses on 'watched' variables"
-       depends on KSYM_TRACER
-       help
-         This tracer profiles kernel accesses on variables watched through the
-         ksym tracer ftrace plugin. Depending upon the hardware, all read
-         and write operations on kernel variables can be monitored for
-         accesses.
-
-         The results will be displayed in:
-         /debugfs/tracing/profile_ksym
-
-         Say N if unsure.
-
 config STACK_TRACER
        bool "Trace max stack"
        depends on HAVE_FUNCTION_TRACER
@@ -371,26 +323,6 @@ config STACK_TRACER
 
          Say N if unsure.
 
-config KMEMTRACE
-       bool "Trace SLAB allocations"
-       select GENERIC_TRACER
-       help
-         kmemtrace provides tracing for slab allocator functions, such as
-         kmalloc, kfree, kmem_cache_alloc, kmem_cache_free, etc. Collected
-         data is then fed to the userspace application in order to analyse
-         allocation hotspots, internal fragmentation and so on, making it
-         possible to see how well an allocator performs, as well as debug
-         and profile kernel code.
-
-         This requires an userspace application to use. See
-         Documentation/trace/kmemtrace.txt for more information.
-
-         Saying Y will make the kernel somewhat larger and slower. However,
-         if you disable kmemtrace at run-time or boot-time, the performance
-         impact is minimal (depending on the arch the kernel is built for).
-
-         If unsure, say N.
-
 config WORKQUEUE_TRACER
        bool "Trace workqueues"
        select GENERIC_TRACER
index 4215530b490b8659e2169ddb5e06bc91b5a7046d..53f338190b260df929d3ef6020d2bad817fcc73f 100644 (file)
@@ -30,7 +30,6 @@ obj-$(CONFIG_TRACING) += trace_output.o
 obj-$(CONFIG_TRACING) += trace_stat.o
 obj-$(CONFIG_TRACING) += trace_printk.o
 obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o
-obj-$(CONFIG_SYSPROF_TRACER) += trace_sysprof.o
 obj-$(CONFIG_FUNCTION_TRACER) += trace_functions.o
 obj-$(CONFIG_IRQSOFF_TRACER) += trace_irqsoff.o
 obj-$(CONFIG_PREEMPT_TRACER) += trace_irqsoff.o
@@ -38,10 +37,8 @@ obj-$(CONFIG_SCHED_TRACER) += trace_sched_wakeup.o
 obj-$(CONFIG_NOP_TRACER) += trace_nop.o
 obj-$(CONFIG_STACK_TRACER) += trace_stack.o
 obj-$(CONFIG_MMIOTRACE) += trace_mmiotrace.o
-obj-$(CONFIG_BOOT_TRACER) += trace_boot.o
 obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += trace_functions_graph.o
 obj-$(CONFIG_TRACE_BRANCH_PROFILING) += trace_branch.o
-obj-$(CONFIG_KMEMTRACE) += kmemtrace.o
 obj-$(CONFIG_WORKQUEUE_TRACER) += trace_workqueue.o
 obj-$(CONFIG_BLK_DEV_IO_TRACE) += blktrace.o
 ifeq ($(CONFIG_BLOCK),y)
@@ -55,7 +52,6 @@ obj-$(CONFIG_EVENT_TRACING) += trace_event_perf.o
 endif
 obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o
 obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o
-obj-$(CONFIG_KSYM_TRACER) += trace_ksym.o
 obj-$(CONFIG_EVENT_TRACING) += power-traces.o
 ifeq ($(CONFIG_TRACING),y)
 obj-$(CONFIG_KGDB_KDB) += trace_kdb.o
index 6d2cb14f9449083c9a2e78f507b9c1255c8e7ca2..0d88ce9b9fb8828c9a81fdffcd47763ae5cc2543 100644 (file)
@@ -1883,7 +1883,6 @@ function_trace_probe_call(unsigned long ip, unsigned long parent_ip)
        struct hlist_head *hhd;
        struct hlist_node *n;
        unsigned long key;
-       int resched;
 
        key = hash_long(ip, FTRACE_HASH_BITS);
 
@@ -1897,12 +1896,12 @@ function_trace_probe_call(unsigned long ip, unsigned long parent_ip)
         * period. This syncs the hash iteration and freeing of items
         * on the hash. rcu_read_lock is too dangerous here.
         */
-       resched = ftrace_preempt_disable();
+       preempt_disable_notrace();
        hlist_for_each_entry_rcu(entry, n, hhd, node) {
                if (entry->ip == ip)
                        entry->ops->func(ip, parent_ip, &entry->data);
        }
-       ftrace_preempt_enable(resched);
+       preempt_enable_notrace();
 }
 
 static struct ftrace_ops trace_probe_ops __read_mostly =
diff --git a/kernel/trace/kmemtrace.c b/kernel/trace/kmemtrace.c
deleted file mode 100644 (file)
index bbfc1bb..0000000
+++ /dev/null
@@ -1,529 +0,0 @@
-/*
- * Memory allocator tracing
- *
- * Copyright (C) 2008 Eduard - Gabriel Munteanu
- * Copyright (C) 2008 Pekka Enberg <penberg@cs.helsinki.fi>
- * Copyright (C) 2008 Frederic Weisbecker <fweisbec@gmail.com>
- */
-
-#include <linux/tracepoint.h>
-#include <linux/seq_file.h>
-#include <linux/debugfs.h>
-#include <linux/dcache.h>
-#include <linux/fs.h>
-
-#include <linux/kmemtrace.h>
-
-#include "trace_output.h"
-#include "trace.h"
-
-/* Select an alternative, minimalistic output than the original one */
-#define TRACE_KMEM_OPT_MINIMAL 0x1
-
-static struct tracer_opt kmem_opts[] = {
-       /* Default disable the minimalistic output */
-       { TRACER_OPT(kmem_minimalistic, TRACE_KMEM_OPT_MINIMAL) },
-       { }
-};
-
-static struct tracer_flags kmem_tracer_flags = {
-       .val                    = 0,
-       .opts                   = kmem_opts
-};
-
-static struct trace_array *kmemtrace_array;
-
-/* Trace allocations */
-static inline void kmemtrace_alloc(enum kmemtrace_type_id type_id,
-                                  unsigned long call_site,
-                                  const void *ptr,
-                                  size_t bytes_req,
-                                  size_t bytes_alloc,
-                                  gfp_t gfp_flags,
-                                  int node)
-{
-       struct ftrace_event_call *call = &event_kmem_alloc;
-       struct trace_array *tr = kmemtrace_array;
-       struct kmemtrace_alloc_entry *entry;
-       struct ring_buffer_event *event;
-
-       event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry));
-       if (!event)
-               return;
-
-       entry = ring_buffer_event_data(event);
-       tracing_generic_entry_update(&entry->ent, 0, 0);
-
-       entry->ent.type         = TRACE_KMEM_ALLOC;
-       entry->type_id          = type_id;
-       entry->call_site        = call_site;
-       entry->ptr              = ptr;
-       entry->bytes_req        = bytes_req;
-       entry->bytes_alloc      = bytes_alloc;
-       entry->gfp_flags        = gfp_flags;
-       entry->node             = node;
-
-       if (!filter_check_discard(call, entry, tr->buffer, event))
-               ring_buffer_unlock_commit(tr->buffer, event);
-
-       trace_wake_up();
-}
-
-static inline void kmemtrace_free(enum kmemtrace_type_id type_id,
-                                 unsigned long call_site,
-                                 const void *ptr)
-{
-       struct ftrace_event_call *call = &event_kmem_free;
-       struct trace_array *tr = kmemtrace_array;
-       struct kmemtrace_free_entry *entry;
-       struct ring_buffer_event *event;
-
-       event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry));
-       if (!event)
-               return;
-       entry   = ring_buffer_event_data(event);
-       tracing_generic_entry_update(&entry->ent, 0, 0);
-
-       entry->ent.type         = TRACE_KMEM_FREE;
-       entry->type_id          = type_id;
-       entry->call_site        = call_site;
-       entry->ptr              = ptr;
-
-       if (!filter_check_discard(call, entry, tr->buffer, event))
-               ring_buffer_unlock_commit(tr->buffer, event);
-
-       trace_wake_up();
-}
-
-static void kmemtrace_kmalloc(void *ignore,
-                             unsigned long call_site,
-                             const void *ptr,
-                             size_t bytes_req,
-                             size_t bytes_alloc,
-                             gfp_t gfp_flags)
-{
-       kmemtrace_alloc(KMEMTRACE_TYPE_KMALLOC, call_site, ptr,
-                       bytes_req, bytes_alloc, gfp_flags, -1);
-}
-
-static void kmemtrace_kmem_cache_alloc(void *ignore,
-                                      unsigned long call_site,
-                                      const void *ptr,
-                                      size_t bytes_req,
-                                      size_t bytes_alloc,
-                                      gfp_t gfp_flags)
-{
-       kmemtrace_alloc(KMEMTRACE_TYPE_CACHE, call_site, ptr,
-                       bytes_req, bytes_alloc, gfp_flags, -1);
-}
-
-static void kmemtrace_kmalloc_node(void *ignore,
-                                  unsigned long call_site,
-                                  const void *ptr,
-                                  size_t bytes_req,
-                                  size_t bytes_alloc,
-                                  gfp_t gfp_flags,
-                                  int node)
-{
-       kmemtrace_alloc(KMEMTRACE_TYPE_KMALLOC, call_site, ptr,
-                       bytes_req, bytes_alloc, gfp_flags, node);
-}
-
-static void kmemtrace_kmem_cache_alloc_node(void *ignore,
-                                           unsigned long call_site,
-                                           const void *ptr,
-                                           size_t bytes_req,
-                                           size_t bytes_alloc,
-                                           gfp_t gfp_flags,
-                                           int node)
-{
-       kmemtrace_alloc(KMEMTRACE_TYPE_CACHE, call_site, ptr,
-                       bytes_req, bytes_alloc, gfp_flags, node);
-}
-
-static void
-kmemtrace_kfree(void *ignore, unsigned long call_site, const void *ptr)
-{
-       kmemtrace_free(KMEMTRACE_TYPE_KMALLOC, call_site, ptr);
-}
-
-static void kmemtrace_kmem_cache_free(void *ignore,
-                                     unsigned long call_site, const void *ptr)
-{
-       kmemtrace_free(KMEMTRACE_TYPE_CACHE, call_site, ptr);
-}
-
-static int kmemtrace_start_probes(void)
-{
-       int err;
-
-       err = register_trace_kmalloc(kmemtrace_kmalloc, NULL);
-       if (err)
-               return err;
-       err = register_trace_kmem_cache_alloc(kmemtrace_kmem_cache_alloc, NULL);
-       if (err)
-               return err;
-       err = register_trace_kmalloc_node(kmemtrace_kmalloc_node, NULL);
-       if (err)
-               return err;
-       err = register_trace_kmem_cache_alloc_node(kmemtrace_kmem_cache_alloc_node, NULL);
-       if (err)
-               return err;
-       err = register_trace_kfree(kmemtrace_kfree, NULL);
-       if (err)
-               return err;
-       err = register_trace_kmem_cache_free(kmemtrace_kmem_cache_free, NULL);
-
-       return err;
-}
-
-static void kmemtrace_stop_probes(void)
-{
-       unregister_trace_kmalloc(kmemtrace_kmalloc, NULL);
-       unregister_trace_kmem_cache_alloc(kmemtrace_kmem_cache_alloc, NULL);
-       unregister_trace_kmalloc_node(kmemtrace_kmalloc_node, NULL);
-       unregister_trace_kmem_cache_alloc_node(kmemtrace_kmem_cache_alloc_node, NULL);
-       unregister_trace_kfree(kmemtrace_kfree, NULL);
-       unregister_trace_kmem_cache_free(kmemtrace_kmem_cache_free, NULL);
-}
-
-static int kmem_trace_init(struct trace_array *tr)
-{
-       kmemtrace_array = tr;
-
-       tracing_reset_online_cpus(tr);
-
-       kmemtrace_start_probes();
-
-       return 0;
-}
-
-static void kmem_trace_reset(struct trace_array *tr)
-{
-       kmemtrace_stop_probes();
-}
-
-static void kmemtrace_headers(struct seq_file *s)
-{
-       /* Don't need headers for the original kmemtrace output */
-       if (!(kmem_tracer_flags.val & TRACE_KMEM_OPT_MINIMAL))
-               return;
-
-       seq_printf(s, "#\n");
-       seq_printf(s, "# ALLOC  TYPE  REQ   GIVEN  FLAGS     "
-                       "      POINTER         NODE    CALLER\n");
-       seq_printf(s, "# FREE   |      |     |       |       "
-                       "       |   |            |        |\n");
-       seq_printf(s, "# |\n\n");
-}
-
-/*
- * The following functions give the original output from kmemtrace,
- * plus the origin CPU, since reordering occurs in-kernel now.
- */
-
-#define KMEMTRACE_USER_ALLOC   0
-#define KMEMTRACE_USER_FREE    1
-
-struct kmemtrace_user_event {
-       u8                      event_id;
-       u8                      type_id;
-       u16                     event_size;
-       u32                     cpu;
-       u64                     timestamp;
-       unsigned long           call_site;
-       unsigned long           ptr;
-};
-
-struct kmemtrace_user_event_alloc {
-       size_t                  bytes_req;
-       size_t                  bytes_alloc;
-       unsigned                gfp_flags;
-       int                     node;
-};
-
-static enum print_line_t
-kmemtrace_print_alloc(struct trace_iterator *iter, int flags,
-                     struct trace_event *event)
-{
-       struct trace_seq *s = &iter->seq;
-       struct kmemtrace_alloc_entry *entry;
-       int ret;
-
-       trace_assign_type(entry, iter->ent);
-
-       ret = trace_seq_printf(s, "type_id %d call_site %pF ptr %lu "
-           "bytes_req %lu bytes_alloc %lu gfp_flags %lu node %d\n",
-           entry->type_id, (void *)entry->call_site, (unsigned long)entry->ptr,
-           (unsigned long)entry->bytes_req, (unsigned long)entry->bytes_alloc,
-           (unsigned long)entry->gfp_flags, entry->node);
-
-       if (!ret)
-               return TRACE_TYPE_PARTIAL_LINE;
-       return TRACE_TYPE_HANDLED;
-}
-
-static enum print_line_t
-kmemtrace_print_free(struct trace_iterator *iter, int flags,
-                    struct trace_event *event)
-{
-       struct trace_seq *s = &iter->seq;
-       struct kmemtrace_free_entry *entry;
-       int ret;
-
-       trace_assign_type(entry, iter->ent);
-
-       ret = trace_seq_printf(s, "type_id %d call_site %pF ptr %lu\n",
-                              entry->type_id, (void *)entry->call_site,
-                              (unsigned long)entry->ptr);
-
-       if (!ret)
-               return TRACE_TYPE_PARTIAL_LINE;
-       return TRACE_TYPE_HANDLED;
-}
-
-static enum print_line_t
-kmemtrace_print_alloc_user(struct trace_iterator *iter, int flags,
-                          struct trace_event *event)
-{
-       struct trace_seq *s = &iter->seq;
-       struct kmemtrace_alloc_entry *entry;
-       struct kmemtrace_user_event *ev;
-       struct kmemtrace_user_event_alloc *ev_alloc;
-
-       trace_assign_type(entry, iter->ent);
-
-       ev = trace_seq_reserve(s, sizeof(*ev));
-       if (!ev)
-               return TRACE_TYPE_PARTIAL_LINE;
-
-       ev->event_id            = KMEMTRACE_USER_ALLOC;
-       ev->type_id             = entry->type_id;
-       ev->event_size          = sizeof(*ev) + sizeof(*ev_alloc);
-       ev->cpu                 = iter->cpu;
-       ev->timestamp           = iter->ts;
-       ev->call_site           = entry->call_site;
-       ev->ptr                 = (unsigned long)entry->ptr;
-
-       ev_alloc = trace_seq_reserve(s, sizeof(*ev_alloc));
-       if (!ev_alloc)
-               return TRACE_TYPE_PARTIAL_LINE;
-
-       ev_alloc->bytes_req     = entry->bytes_req;
-       ev_alloc->bytes_alloc   = entry->bytes_alloc;
-       ev_alloc->gfp_flags     = entry->gfp_flags;
-       ev_alloc->node          = entry->node;
-
-       return TRACE_TYPE_HANDLED;
-}
-
-static enum print_line_t
-kmemtrace_print_free_user(struct trace_iterator *iter, int flags,
-                         struct trace_event *event)
-{
-       struct trace_seq *s = &iter->seq;
-       struct kmemtrace_free_entry *entry;
-       struct kmemtrace_user_event *ev;
-
-       trace_assign_type(entry, iter->ent);
-
-       ev = trace_seq_reserve(s, sizeof(*ev));
-       if (!ev)
-               return TRACE_TYPE_PARTIAL_LINE;
-
-       ev->event_id            = KMEMTRACE_USER_FREE;
-       ev->type_id             = entry->type_id;
-       ev->event_size          = sizeof(*ev);
-       ev->cpu                 = iter->cpu;
-       ev->timestamp           = iter->ts;
-       ev->call_site           = entry->call_site;
-       ev->ptr                 = (unsigned long)entry->ptr;
-
-       return TRACE_TYPE_HANDLED;
-}
-
-/* The two other following provide a more minimalistic output */
-static enum print_line_t
-kmemtrace_print_alloc_compress(struct trace_iterator *iter)
-{
-       struct kmemtrace_alloc_entry *entry;
-       struct trace_seq *s = &iter->seq;
-       int ret;
-
-       trace_assign_type(entry, iter->ent);
-
-       /* Alloc entry */
-       ret = trace_seq_printf(s, "  +      ");
-       if (!ret)
-               return TRACE_TYPE_PARTIAL_LINE;
-
-       /* Type */
-       switch (entry->type_id) {
-       case KMEMTRACE_TYPE_KMALLOC:
-               ret = trace_seq_printf(s, "K   ");
-               break;
-       case KMEMTRACE_TYPE_CACHE:
-               ret = trace_seq_printf(s, "C   ");
-               break;
-       case KMEMTRACE_TYPE_PAGES:
-               ret = trace_seq_printf(s, "P   ");
-               break;
-       default:
-               ret = trace_seq_printf(s, "?   ");
-       }
-
-       if (!ret)
-               return TRACE_TYPE_PARTIAL_LINE;
-
-       /* Requested */
-       ret = trace_seq_printf(s, "%4zu   ", entry->bytes_req);
-       if (!ret)
-               return TRACE_TYPE_PARTIAL_LINE;
-
-       /* Allocated */
-       ret = trace_seq_printf(s, "%4zu   ", entry->bytes_alloc);
-       if (!ret)
-               return TRACE_TYPE_PARTIAL_LINE;
-
-       /* Flags
-        * TODO: would be better to see the name of the GFP flag names
-        */
-       ret = trace_seq_printf(s, "%08x   ", entry->gfp_flags);
-       if (!ret)
-               return TRACE_TYPE_PARTIAL_LINE;
-
-       /* Pointer to allocated */
-       ret = trace_seq_printf(s, "0x%tx   ", (ptrdiff_t)entry->ptr);
-       if (!ret)
-               return TRACE_TYPE_PARTIAL_LINE;
-
-       /* Node and call site*/
-       ret = trace_seq_printf(s, "%4d   %pf\n", entry->node,
-                                                (void *)entry->call_site);
-       if (!ret)
-               return TRACE_TYPE_PARTIAL_LINE;
-
-       return TRACE_TYPE_HANDLED;
-}
-
-static enum print_line_t
-kmemtrace_print_free_compress(struct trace_iterator *iter)
-{
-       struct kmemtrace_free_entry *entry;
-       struct trace_seq *s = &iter->seq;
-       int ret;
-
-       trace_assign_type(entry, iter->ent);
-
-       /* Free entry */
-       ret = trace_seq_printf(s, "  -      ");
-       if (!ret)
-               return TRACE_TYPE_PARTIAL_LINE;
-
-       /* Type */
-       switch (entry->type_id) {
-       case KMEMTRACE_TYPE_KMALLOC:
-               ret = trace_seq_printf(s, "K     ");
-               break;
-       case KMEMTRACE_TYPE_CACHE:
-               ret = trace_seq_printf(s, "C     ");
-               break;
-       case KMEMTRACE_TYPE_PAGES:
-               ret = trace_seq_printf(s, "P     ");
-               break;
-       default:
-               ret = trace_seq_printf(s, "?     ");
-       }
-
-       if (!ret)
-               return TRACE_TYPE_PARTIAL_LINE;
-
-       /* Skip requested/allocated/flags */
-       ret = trace_seq_printf(s, "                       ");
-       if (!ret)
-               return TRACE_TYPE_PARTIAL_LINE;
-
-       /* Pointer to allocated */
-       ret = trace_seq_printf(s, "0x%tx   ", (ptrdiff_t)entry->ptr);
-       if (!ret)
-               return TRACE_TYPE_PARTIAL_LINE;
-
-       /* Skip node and print call site*/
-       ret = trace_seq_printf(s, "       %pf\n", (void *)entry->call_site);
-       if (!ret)
-               return TRACE_TYPE_PARTIAL_LINE;
-
-       return TRACE_TYPE_HANDLED;
-}
-
-static enum print_line_t kmemtrace_print_line(struct trace_iterator *iter)
-{
-       struct trace_entry *entry = iter->ent;
-
-       if (!(kmem_tracer_flags.val & TRACE_KMEM_OPT_MINIMAL))
-               return TRACE_TYPE_UNHANDLED;
-
-       switch (entry->type) {
-       case TRACE_KMEM_ALLOC:
-               return kmemtrace_print_alloc_compress(iter);
-       case TRACE_KMEM_FREE:
-               return kmemtrace_print_free_compress(iter);
-       default:
-               return TRACE_TYPE_UNHANDLED;
-       }
-}
-
-static struct trace_event_functions kmem_trace_alloc_funcs = {
-       .trace                  = kmemtrace_print_alloc,
-       .binary                 = kmemtrace_print_alloc_user,
-};
-
-static struct trace_event kmem_trace_alloc = {
-       .type                   = TRACE_KMEM_ALLOC,
-       .funcs                  = &kmem_trace_alloc_funcs,
-};
-
-static struct trace_event_functions kmem_trace_free_funcs = {
-       .trace                  = kmemtrace_print_free,
-       .binary                 = kmemtrace_print_free_user,
-};
-
-static struct trace_event kmem_trace_free = {
-       .type                   = TRACE_KMEM_FREE,
-       .funcs                  = &kmem_trace_free_funcs,
-};
-
-static struct tracer kmem_tracer __read_mostly = {
-       .name                   = "kmemtrace",
-       .init                   = kmem_trace_init,
-       .reset                  = kmem_trace_reset,
-       .print_line             = kmemtrace_print_line,
-       .print_header           = kmemtrace_headers,
-       .flags                  = &kmem_tracer_flags
-};
-
-void kmemtrace_init(void)
-{
-       /* earliest opportunity to start kmem tracing */
-}
-
-static int __init init_kmem_tracer(void)
-{
-       if (!register_ftrace_event(&kmem_trace_alloc)) {
-               pr_warning("Warning: could not register kmem events\n");
-               return 1;
-       }
-
-       if (!register_ftrace_event(&kmem_trace_free)) {
-               pr_warning("Warning: could not register kmem events\n");
-               return 1;
-       }
-
-       if (register_tracer(&kmem_tracer) != 0) {
-               pr_warning("Warning: could not register the kmem tracer\n");
-               return 1;
-       }
-
-       return 0;
-}
-device_initcall(init_kmem_tracer);
index 1da7b6ea8b85d70dde15b50369202cb68acab0bf..3632ce87674f88dfd6c4ce5c8ed09eb184ace1a3 100644 (file)
@@ -443,6 +443,7 @@ int ring_buffer_print_page_header(struct trace_seq *s)
  */
 struct ring_buffer_per_cpu {
        int                             cpu;
+       atomic_t                        record_disabled;
        struct ring_buffer              *buffer;
        spinlock_t                      reader_lock;    /* serialize readers */
        arch_spinlock_t                 lock;
@@ -462,7 +463,6 @@ struct ring_buffer_per_cpu {
        unsigned long                   read;
        u64                             write_stamp;
        u64                             read_stamp;
-       atomic_t                        record_disabled;
 };
 
 struct ring_buffer {
@@ -2242,8 +2242,6 @@ static void trace_recursive_unlock(void)
 
 #endif
 
-static DEFINE_PER_CPU(int, rb_need_resched);
-
 /**
  * ring_buffer_lock_reserve - reserve a part of the buffer
  * @buffer: the ring buffer to reserve from
@@ -2264,13 +2262,13 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long length)
 {
        struct ring_buffer_per_cpu *cpu_buffer;
        struct ring_buffer_event *event;
-       int cpu, resched;
+       int cpu;
 
        if (ring_buffer_flags != RB_BUFFERS_ON)
                return NULL;
 
        /* If we are tracing schedule, we don't want to recurse */
-       resched = ftrace_preempt_disable();
+       preempt_disable_notrace();
 
        if (atomic_read(&buffer->record_disabled))
                goto out_nocheck;
@@ -2295,21 +2293,13 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long length)
        if (!event)
                goto out;
 
-       /*
-        * Need to store resched state on this cpu.
-        * Only the first needs to.
-        */
-
-       if (preempt_count() == 1)
-               per_cpu(rb_need_resched, cpu) = resched;
-
        return event;
 
  out:
        trace_recursive_unlock();
 
  out_nocheck:
-       ftrace_preempt_enable(resched);
+       preempt_enable_notrace();
        return NULL;
 }
 EXPORT_SYMBOL_GPL(ring_buffer_lock_reserve);
@@ -2355,13 +2345,7 @@ int ring_buffer_unlock_commit(struct ring_buffer *buffer,
 
        trace_recursive_unlock();
 
-       /*
-        * Only the last preempt count needs to restore preemption.
-        */
-       if (preempt_count() == 1)
-               ftrace_preempt_enable(per_cpu(rb_need_resched, cpu));
-       else
-               preempt_enable_no_resched_notrace();
+       preempt_enable_notrace();
 
        return 0;
 }
@@ -2469,13 +2453,7 @@ void ring_buffer_discard_commit(struct ring_buffer *buffer,
 
        trace_recursive_unlock();
 
-       /*
-        * Only the last preempt count needs to restore preemption.
-        */
-       if (preempt_count() == 1)
-               ftrace_preempt_enable(per_cpu(rb_need_resched, cpu));
-       else
-               preempt_enable_no_resched_notrace();
+       preempt_enable_notrace();
 
 }
 EXPORT_SYMBOL_GPL(ring_buffer_discard_commit);
@@ -2501,12 +2479,12 @@ int ring_buffer_write(struct ring_buffer *buffer,
        struct ring_buffer_event *event;
        void *body;
        int ret = -EBUSY;
-       int cpu, resched;
+       int cpu;
 
        if (ring_buffer_flags != RB_BUFFERS_ON)
                return -EBUSY;
 
-       resched = ftrace_preempt_disable();
+       preempt_disable_notrace();
 
        if (atomic_read(&buffer->record_disabled))
                goto out;
@@ -2536,7 +2514,7 @@ int ring_buffer_write(struct ring_buffer *buffer,
 
        ret = 0;
  out:
-       ftrace_preempt_enable(resched);
+       preempt_enable_notrace();
 
        return ret;
 }
index d6736b93dc2aed0a1934165cf945012306a2f516..ed1032d6f81de5afd35f4c017a849394d4d9615f 100644 (file)
@@ -341,7 +341,7 @@ static DECLARE_WAIT_QUEUE_HEAD(trace_wait);
 /* trace_flags holds trace_options default values */
 unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |
        TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME |
-       TRACE_ITER_GRAPH_TIME;
+       TRACE_ITER_GRAPH_TIME | TRACE_ITER_RECORD_CMD;
 
 static int trace_stop_count;
 static DEFINE_SPINLOCK(tracing_start_lock);
@@ -425,6 +425,7 @@ static const char *trace_options[] = {
        "latency-format",
        "sleep-time",
        "graph-time",
+       "record-cmd",
        NULL
 };
 
@@ -656,6 +657,10 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
                return;
 
        WARN_ON_ONCE(!irqs_disabled());
+       if (!current_trace->use_max_tr) {
+               WARN_ON_ONCE(1);
+               return;
+       }
        arch_spin_lock(&ftrace_max_lock);
 
        tr->buffer = max_tr.buffer;
@@ -682,6 +687,11 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
                return;
 
        WARN_ON_ONCE(!irqs_disabled());
+       if (!current_trace->use_max_tr) {
+               WARN_ON_ONCE(1);
+               return;
+       }
+
        arch_spin_lock(&ftrace_max_lock);
 
        ftrace_disable_cpu();
@@ -726,7 +736,7 @@ __acquires(kernel_lock)
      &nb