Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git...
authorLinus Torvalds <torvalds@linux-foundation.org>
Fri, 22 Jul 2011 23:44:39 +0000 (16:44 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 22 Jul 2011 23:44:39 +0000 (16:44 -0700)
* 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (123 commits)
  perf: Remove the nmi parameter from the oprofile_perf backend
  x86, perf: Make copy_from_user_nmi() a library function
  perf: Remove perf_event_attr::type check
  x86, perf: P4 PMU - Fix typos in comments and style cleanup
  perf tools: Make test use the preset debugfs path
  perf tools: Add automated tests for events parsing
  perf tools: De-opt the parse_events function
  perf script: Fix display of IP address for non-callchain path
  perf tools: Fix endian conversion reading event attr from file header
  perf tools: Add missing 'node' alias to the hw_cache[] array
  perf probe: Support adding probes on offline kernel modules
  perf probe: Add probed module in front of function
  perf probe: Introduce debuginfo to encapsulate dwarf information
  perf-probe: Move dwarf library routines to dwarf-aux.{c, h}
  perf probe: Remove redundant dwarf functions
  perf probe: Move strtailcmp to string.c
  perf probe: Rename DIE_FIND_CB_FOUND to DIE_FIND_CB_END
  tracing/kprobe: Update symbol reference when loading module
  tracing/kprobes: Support module init function probing
  kprobes: Return -ENOENT if probe point doesn't exist
  ...

140 files changed:
Documentation/trace/kprobetrace.txt
Makefile
arch/alpha/kernel/perf_event.c
arch/alpha/kernel/time.c
arch/arm/kernel/perf_event_v6.c
arch/arm/kernel/perf_event_v7.c
arch/arm/kernel/perf_event_xscale.c
arch/arm/kernel/ptrace.c
arch/arm/kernel/swp_emulate.c
arch/arm/mm/fault.c
arch/mips/include/asm/stacktrace.h
arch/mips/kernel/perf_event.c
arch/mips/kernel/perf_event_mipsxx.c
arch/mips/kernel/process.c
arch/mips/kernel/traps.c
arch/mips/kernel/unaligned.c
arch/mips/math-emu/cp1emu.c
arch/mips/mm/fault.c
arch/mips/oprofile/Makefile
arch/mips/oprofile/backtrace.c [new file with mode: 0644]
arch/mips/oprofile/common.c
arch/mips/oprofile/op_impl.h
arch/powerpc/include/asm/emulated_ops.h
arch/powerpc/include/asm/hw_breakpoint.h
arch/powerpc/kernel/e500-pmu.c
arch/powerpc/kernel/mpc7450-pmu.c
arch/powerpc/kernel/perf_event.c
arch/powerpc/kernel/perf_event_fsl_emb.c
arch/powerpc/kernel/power4-pmu.c
arch/powerpc/kernel/power5+-pmu.c
arch/powerpc/kernel/power5-pmu.c
arch/powerpc/kernel/power6-pmu.c
arch/powerpc/kernel/power7-pmu.c
arch/powerpc/kernel/ppc970-pmu.c
arch/powerpc/kernel/ptrace.c
arch/powerpc/kernel/time.c
arch/powerpc/mm/fault.c
arch/s390/mm/fault.c
arch/sh/kernel/cpu/sh4/perf_event.c
arch/sh/kernel/cpu/sh4a/perf_event.c
arch/sh/kernel/ptrace_32.c
arch/sh/kernel/traps_32.c
arch/sh/kernel/traps_64.c
arch/sh/math-emu/math.c
arch/sh/mm/fault_32.c
arch/sh/mm/tlbflush_64.c
arch/sparc/kernel/perf_event.c
arch/sparc/kernel/unaligned_32.c
arch/sparc/kernel/unaligned_64.c
arch/sparc/kernel/visemul.c
arch/sparc/math-emu/math_32.c
arch/sparc/math-emu/math_64.c
arch/sparc/mm/fault_32.c
arch/sparc/mm/fault_64.c
arch/x86/include/asm/irqflags.h
arch/x86/include/asm/perf_event.h
arch/x86/include/asm/perf_event_p4.h
arch/x86/include/asm/uaccess.h
arch/x86/kernel/cpu/perf_event.c
arch/x86/kernel/cpu/perf_event_amd.c
arch/x86/kernel/cpu/perf_event_intel.c
arch/x86/kernel/cpu/perf_event_intel_ds.c
arch/x86/kernel/cpu/perf_event_p4.c
arch/x86/kernel/dumpstack_64.c
arch/x86/kernel/entry_64.S
arch/x86/kernel/kgdb.c
arch/x86/kernel/ptrace.c
arch/x86/kernel/stacktrace.c
arch/x86/lib/Makefile
arch/x86/lib/usercopy.c [new file with mode: 0644]
arch/x86/mm/fault.c
arch/x86/mm/kmemcheck/error.c
arch/x86/oprofile/backtrace.c
drivers/oprofile/oprofile_perf.c
include/linux/ftrace.h
include/linux/ftrace_event.h
include/linux/hw_breakpoint.h
include/linux/perf_event.h
include/linux/ring_buffer.h
include/linux/stacktrace.h
kernel/async.c
kernel/events/Makefile
kernel/events/core.c
kernel/events/hw_breakpoint.c
kernel/events/internal.h [new file with mode: 0644]
kernel/events/ring_buffer.c [new file with mode: 0644]
kernel/kprobes.c
kernel/sched.c
kernel/stacktrace.c
kernel/trace/ftrace.c
kernel/trace/ring_buffer.c
kernel/trace/ring_buffer_benchmark.c
kernel/trace/trace.c
kernel/trace/trace.h
kernel/trace/trace_entries.h
kernel/trace/trace_events.c
kernel/trace/trace_events_filter.c
kernel/trace/trace_functions.c
kernel/trace/trace_functions_graph.c
kernel/trace/trace_irqsoff.c
kernel/trace/trace_kprobe.c
kernel/trace/trace_output.c
kernel/trace/trace_sched_wakeup.c
kernel/trace/trace_stack.c
kernel/watchdog.c
samples/hw_breakpoint/data_breakpoint.c
tools/perf/Documentation/perf-annotate.txt
tools/perf/Documentation/perf-probe.txt
tools/perf/Documentation/perf-report.txt
tools/perf/Documentation/perf-script.txt
tools/perf/Makefile
tools/perf/builtin-annotate.c
tools/perf/builtin-probe.c
tools/perf/builtin-record.c
tools/perf/builtin-report.c
tools/perf/builtin-script.c
tools/perf/builtin-stat.c
tools/perf/builtin-test.c
tools/perf/builtin-top.c
tools/perf/util/callchain.h
tools/perf/util/dwarf-aux.c [new file with mode: 0644]
tools/perf/util/dwarf-aux.h [new file with mode: 0644]
tools/perf/util/evsel.c
tools/perf/util/evsel.h
tools/perf/util/header.c
tools/perf/util/hist.c
tools/perf/util/parse-events.c
tools/perf/util/parse-events.h
tools/perf/util/probe-event.c
tools/perf/util/probe-event.h
tools/perf/util/probe-finder.c
tools/perf/util/probe-finder.h
tools/perf/util/python.c
tools/perf/util/session.c
tools/perf/util/session.h
tools/perf/util/sort.c
tools/perf/util/sort.h
tools/perf/util/string.c
tools/perf/util/trace-event-info.c
tools/perf/util/util.h

index c83bd6b4e6e82b7641eb5d411cb69c415b57d24d..d0d0bb9e3e25653ce46189a046f622d207c0dbc9 100644 (file)
@@ -22,14 +22,15 @@ current_tracer. Instead of that, add probe points via
 
 Synopsis of kprobe_events
 -------------------------
-  p[:[GRP/]EVENT] SYMBOL[+offs]|MEMADDR [FETCHARGS]    : Set a probe
-  r[:[GRP/]EVENT] SYMBOL[+0] [FETCHARGS]               : Set a return probe
+  p[:[GRP/]EVENT] [MOD:]SYM[+offs]|MEMADDR [FETCHARGS] : Set a probe
+  r[:[GRP/]EVENT] [MOD:]SYM[+0] [FETCHARGS]            : Set a return probe
   -:[GRP/]EVENT                                                : Clear a probe
 
  GRP           : Group name. If omitted, use "kprobes" for it.
  EVENT         : Event name. If omitted, the event name is generated
-                 based on SYMBOL+offs or MEMADDR.
- SYMBOL[+offs] : Symbol+offset where the probe is inserted.
+                 based on SYM+offs or MEMADDR.
+ MOD           : Module name which has given SYM.
+ SYM[+offs]    : Symbol+offset where the probe is inserted.
  MEMADDR       : Address where the probe is inserted.
 
  FETCHARGS     : Arguments. Each probe can have up to 128 args.
index 6a5bdad524affe34c62141ae9678da115fcc28ee..d0189560613cd715b70674540fb2b824d1f5c9eb 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -1290,6 +1290,7 @@ help:
        @echo  '  make O=dir [targets] Locate all output files in "dir", including .config'
        @echo  '  make C=1   [targets] Check all c source with $$CHECK (sparse by default)'
        @echo  '  make C=2   [targets] Force check of all c source with $$CHECK'
+       @echo  '  make RECORDMCOUNT_WARN=1 [targets] Warn about ignored mcount sections'
        @echo  '  make W=n   [targets] Enable extra gcc checks, n=1,2,3 where'
        @echo  '                1: warnings which may be relevant and do not occur too often'
        @echo  '                2: warnings which occur quite often but may still be relevant'
index 90561c45e7d8928e8e137e33164d2d2d661a28b8..8e47709160f84962bd6b2744ea6a1a5b9ae49b28 100644 (file)
@@ -847,7 +847,7 @@ static void alpha_perf_event_irq_handler(unsigned long la_ptr,
        data.period = event->hw.last_period;
 
        if (alpha_perf_event_set_period(event, hwc, idx)) {
-               if (perf_event_overflow(event, 1, &data, regs)) {
+               if (perf_event_overflow(event, &data, regs)) {
                        /* Interrupts coming too quickly; "throttle" the
                         * counter, i.e., disable it for a little while.
                         */
index 818e74ed45dc01bbc0eaef0eb60b2ccbf8c137e9..f20d1b5396b86989ab4f4fc68200f054e0203b0e 100644 (file)
@@ -91,7 +91,7 @@ DEFINE_PER_CPU(u8, irq_work_pending);
 #define test_irq_work_pending()      __get_cpu_var(irq_work_pending)
 #define clear_irq_work_pending()     __get_cpu_var(irq_work_pending) = 0
 
-void set_irq_work_pending(void)
+void arch_irq_work_raise(void)
 {
        set_irq_work_pending_flag();
 }
index f1e8dd94afe8feade5b8aad4a9b0bcb4f727668b..dd7f3b9f4cb31bffbab806e75bc2c447cc9142fe 100644 (file)
@@ -173,6 +173,20 @@ static const unsigned armv6_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
                        [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
                },
        },
+       [C(NODE)] = {
+               [C(OP_READ)] = {
+                       [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
+                       [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
+               },
+               [C(OP_WRITE)] = {
+                       [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
+                       [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
+               },
+               [C(OP_PREFETCH)] = {
+                       [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
+                       [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
+               },
+       },
 };
 
 enum armv6mpcore_perf_types {
@@ -310,6 +324,20 @@ static const unsigned armv6mpcore_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
                        [C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
                },
        },
+       [C(NODE)] = {
+               [C(OP_READ)] = {
+                       [C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
+                       [C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
+               },
+               [C(OP_WRITE)] = {
+                       [C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
+                       [C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
+               },
+               [C(OP_PREFETCH)] = {
+                       [C(RESULT_ACCESS)]  = CACHE_OP_UNSUPPORTED,
+                       [C(RESULT_MISS)]    = CACHE_OP_UNSUPPORTED,
+               },
+       },
 };
 
 static inline unsigned long
@@ -479,7 +507,7 @@ armv6pmu_handle_irq(int irq_num,
                if (!armpmu_event_set_period(event, hwc, idx))
                        continue;
 
-               if (perf_event_overflow(event, 0, &data, regs))
+               if (perf_event_overflow(event, &data, regs))
                        armpmu->disable(hwc, idx);
        }
 
index 4960686afb5815c2b54d1258e41bc6ab81a9adde..e20ca9cafef597c81cec1e0f54c57486643c60c8 100644 (file)
@@ -255,6 +255,20 @@ static const unsigned armv7_a8_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
                        [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
                },
        },
+       [C(NODE)] = {
+               [C(OP_READ)] = {
+                       [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
+                       [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
+               },
+               [C(OP_WRITE)] = {
+                       [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
+                       [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
+               },
+               [C(OP_PREFETCH)] = {
+                       [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
+                       [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
+               },
+       },
 };
 
 /*
@@ -371,6 +385,20 @@ static const unsigned armv7_a9_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
                        [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
                },
        },
+       [C(NODE)] = {
+               [C(OP_READ)] = {
+                       [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
+                       [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
+               },
+               [C(OP_WRITE)] = {
+                       [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
+                       [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
+               },
+               [C(OP_PREFETCH)] = {
+                       [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
+                       [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
+               },
+       },
 };
 
 /*
@@ -787,7 +815,7 @@ static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev)
                if (!armpmu_event_set_period(event, hwc, idx))
                        continue;
 
-               if (perf_event_overflow(event, 0, &data, regs))
+               if (perf_event_overflow(event, &data, regs))
                        armpmu->disable(hwc, idx);
        }
 
index 39affbe4fdb2400a34dcfed05a0c0e6adb1b3e7f..3c4397491d080f2425b3a2b28bf3c5306e422a43 100644 (file)
@@ -144,6 +144,20 @@ static const unsigned xscale_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
                        [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
                },
        },
+       [C(NODE)] = {
+               [C(OP_READ)] = {
+                       [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
+                       [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
+               },
+               [C(OP_WRITE)] = {
+                       [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
+                       [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
+               },
+               [C(OP_PREFETCH)] = {
+                       [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
+                       [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
+               },
+       },
 };
 
 #define        XSCALE_PMU_ENABLE       0x001
@@ -251,7 +265,7 @@ xscale1pmu_handle_irq(int irq_num, void *dev)
                if (!armpmu_event_set_period(event, hwc, idx))
                        continue;
 
-               if (perf_event_overflow(event, 0, &data, regs))
+               if (perf_event_overflow(event, &data, regs))
                        armpmu->disable(hwc, idx);
        }
 
@@ -583,7 +597,7 @@ xscale2pmu_handle_irq(int irq_num, void *dev)
                if (!armpmu_event_set_period(event, hwc, idx))
                        continue;
 
-               if (perf_event_overflow(event, 0, &data, regs))
+               if (perf_event_overflow(event, &data, regs))
                        armpmu->disable(hwc, idx);
        }
 
index 97260060bf2605e5809eb2655eba33a3acf60759..5c199610719fe96ed8c2b8d62e8c5d29a50e1404 100644 (file)
@@ -396,7 +396,7 @@ static long ptrace_hbp_idx_to_num(int idx)
 /*
  * Handle hitting a HW-breakpoint.
  */
-static void ptrace_hbptriggered(struct perf_event *bp, int unused,
+static void ptrace_hbptriggered(struct perf_event *bp,
                                     struct perf_sample_data *data,
                                     struct pt_regs *regs)
 {
@@ -479,7 +479,8 @@ static struct perf_event *ptrace_hbp_create(struct task_struct *tsk, int type)
        attr.bp_type    = type;
        attr.disabled   = 1;
 
-       return register_user_hw_breakpoint(&attr, ptrace_hbptriggered, tsk);
+       return register_user_hw_breakpoint(&attr, ptrace_hbptriggered, NULL,
+                                          tsk);
 }
 
 static int ptrace_gethbpregs(struct task_struct *tsk, long num,
index 40ee7e5045e40f7a1f78cd8bd9d632b9130175b3..5f452f8fde0569d140e0d6055220f29be08396f1 100644 (file)
@@ -183,7 +183,7 @@ static int swp_handler(struct pt_regs *regs, unsigned int instr)
        unsigned int address, destreg, data, type;
        unsigned int res = 0;
 
-       perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, 0, regs, regs->ARM_pc);
+       perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, regs->ARM_pc);
 
        if (current->pid != previous_pid) {
                pr_debug("\"%s\" (%ld) uses deprecated SWP{B} instruction\n",
index bc0e1d88fd3ba8b7863edfc9eca9cb11d90413dd..9ea4f7ddd665cdba971c0b6d433dac89fb3aba9a 100644 (file)
@@ -318,11 +318,11 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
        fault = __do_page_fault(mm, addr, fsr, tsk);
        up_read(&mm->mmap_sem);
 
-       perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, addr);
+       perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr);
        if (fault & VM_FAULT_MAJOR)
-               perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0, regs, addr);
+               perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, addr);
        else if (fault & VM_FAULT_MINOR)
-               perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0, regs, addr);
+               perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, addr);
 
        /*
         * Handle the "normal" case first - VM_FAULT_MAJOR / VM_FAULT_MINOR
index 0bf82818aa53d9abe10a75c86e7c40c240d957d5..780ee2c2a2ac54afb0f4adad265387cfe2285f7d 100644 (file)
@@ -7,6 +7,10 @@
 extern int raw_show_trace;
 extern unsigned long unwind_stack(struct task_struct *task, unsigned long *sp,
                                  unsigned long pc, unsigned long *ra);
+extern unsigned long unwind_stack_by_address(unsigned long stack_page,
+                                            unsigned long *sp,
+                                            unsigned long pc,
+                                            unsigned long *ra);
 #else
 #define raw_show_trace 1
 static inline unsigned long unwind_stack(struct task_struct *task,
index a8244854d3dc623f99a4a4d1a2de0d0950153459..d0deaab9ace2e670db80844235de55ce157349d7 100644 (file)
@@ -527,7 +527,7 @@ handle_associated_event(struct cpu_hw_events *cpuc,
        if (!mipspmu_event_set_period(event, hwc, idx))
                return;
 
-       if (perf_event_overflow(event, 0, data, regs))
+       if (perf_event_overflow(event, data, regs))
                mipspmu->disable_event(idx);
 }
 
index 75266ff4cc3317e332c124bce569533f15485f04..e5ad09a9baf7f55ab2fbf3f0ba602a6f7eb5a4c8 100644 (file)
@@ -377,6 +377,20 @@ static const struct mips_perf_event mipsxxcore_cache_map
                [C(RESULT_MISS)]        = { UNSUPPORTED_PERF_EVENT_ID },
        },
 },
+[C(NODE)] = {
+       [C(OP_READ)] = {
+               [C(RESULT_ACCESS)]      = { UNSUPPORTED_PERF_EVENT_ID },
+               [C(RESULT_MISS)]        = { UNSUPPORTED_PERF_EVENT_ID },
+       },
+       [C(OP_WRITE)] = {
+               [C(RESULT_ACCESS)]      = { UNSUPPORTED_PERF_EVENT_ID },
+               [C(RESULT_MISS)]        = { UNSUPPORTED_PERF_EVENT_ID },
+       },
+       [C(OP_PREFETCH)] = {
+               [C(RESULT_ACCESS)]      = { UNSUPPORTED_PERF_EVENT_ID },
+               [C(RESULT_MISS)]        = { UNSUPPORTED_PERF_EVENT_ID },
+       },
+},
 };
 
 /* 74K core has completely different cache event map. */
@@ -480,6 +494,20 @@ static const struct mips_perf_event mipsxx74Kcore_cache_map
                [C(RESULT_MISS)]        = { UNSUPPORTED_PERF_EVENT_ID },
        },
 },
+[C(NODE)] = {
+       [C(OP_READ)] = {
+               [C(RESULT_ACCESS)]      = { UNSUPPORTED_PERF_EVENT_ID },
+               [C(RESULT_MISS)]        = { UNSUPPORTED_PERF_EVENT_ID },
+       },
+       [C(OP_WRITE)] = {
+               [C(RESULT_ACCESS)]      = { UNSUPPORTED_PERF_EVENT_ID },
+               [C(RESULT_MISS)]        = { UNSUPPORTED_PERF_EVENT_ID },
+       },
+       [C(OP_PREFETCH)] = {
+               [C(RESULT_ACCESS)]      = { UNSUPPORTED_PERF_EVENT_ID },
+               [C(RESULT_MISS)]        = { UNSUPPORTED_PERF_EVENT_ID },
+       },
+},
 };
 
 #ifdef CONFIG_MIPS_MT_SMP
index d2112d3cf115b85cce3bc1643e61d560673bb338..c28fbe6107bc3aae5fb590f19742f29520f7501e 100644 (file)
@@ -373,18 +373,18 @@ unsigned long thread_saved_pc(struct task_struct *tsk)
 
 
 #ifdef CONFIG_KALLSYMS
-/* used by show_backtrace() */
-unsigned long unwind_stack(struct task_struct *task, unsigned long *sp,
-                          unsigned long pc, unsigned long *ra)
+/* generic stack unwinding function */
+unsigned long notrace unwind_stack_by_address(unsigned long stack_page,
+                                             unsigned long *sp,
+                                             unsigned long pc,
+                                             unsigned long *ra)
 {
-       unsigned long stack_page;
        struct mips_frame_info info;
        unsigned long size, ofs;
        int leaf;
        extern void ret_from_irq(void);
        extern void ret_from_exception(void);
 
-       stack_page = (unsigned long)task_stack_page(task);
        if (!stack_page)
                return 0;
 
@@ -443,6 +443,15 @@ unsigned long unwind_stack(struct task_struct *task, unsigned long *sp,
        *ra = 0;
        return __kernel_text_address(pc) ? pc : 0;
 }
+EXPORT_SYMBOL(unwind_stack_by_address);
+
+/* used by show_backtrace() */
+unsigned long unwind_stack(struct task_struct *task, unsigned long *sp,
+                          unsigned long pc, unsigned long *ra)
+{
+       unsigned long stack_page = (unsigned long)task_stack_page(task);
+       return unwind_stack_by_address(stack_page, sp, pc, ra);
+}
 #endif
 
 /*
index e9b3af27d844b5db837dc641bdd9c87ba326f9ba..b7517e3abc8527721fdffca5ded64a035d33fddb 100644 (file)
@@ -578,12 +578,12 @@ static int simulate_llsc(struct pt_regs *regs, unsigned int opcode)
 {
        if ((opcode & OPCODE) == LL) {
                perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS,
-                               1, 0, regs, 0);
+                               1, regs, 0);
                return simulate_ll(regs, opcode);
        }
        if ((opcode & OPCODE) == SC) {
                perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS,
-                               1, 0, regs, 0);
+                               1, regs, 0);
                return simulate_sc(regs, opcode);
        }
 
@@ -602,7 +602,7 @@ static int simulate_rdhwr(struct pt_regs *regs, unsigned int opcode)
                int rd = (opcode & RD) >> 11;
                int rt = (opcode & RT) >> 16;
                perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS,
-                               1, 0, regs, 0);
+                               1, regs, 0);
                switch (rd) {
                case 0:         /* CPU number */
                        regs->regs[rt] = smp_processor_id();
@@ -640,7 +640,7 @@ static int simulate_sync(struct pt_regs *regs, unsigned int opcode)
 {
        if ((opcode & OPCODE) == SPEC0 && (opcode & FUNC) == SYNC) {
                perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS,
-                               1, 0, regs, 0);
+                               1, regs, 0);
                return 0;
        }
 
index cfea1adfa1536dcb08302f9aa8ce3c279e37b7a9..eb319b58035377f5305286a587926f1f356ced41 100644 (file)
@@ -111,8 +111,7 @@ static void emulate_load_store_insn(struct pt_regs *regs,
        unsigned long value;
        unsigned int res;
 
-       perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS,
-                     1, 0, regs, 0);
+       perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, 0);
 
        /*
         * This load never faults.
@@ -517,7 +516,7 @@ asmlinkage void do_ade(struct pt_regs *regs)
        mm_segment_t seg;
 
        perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS,
-                       1, 0, regs, regs->cp0_badvaddr);
+                       1, regs, regs->cp0_badvaddr);
        /*
         * Did we catch a fault trying to load an instruction?
         * Or are we running in MIPS16 mode?
index d32cb050311053a8e873ea470617b0ec17f13e26..dbf2f93a50911b5914a295a82b3179c36562013f 100644 (file)
@@ -272,8 +272,7 @@ static int cop1Emulate(struct pt_regs *xcp, struct mips_fpu_struct *ctx,
        }
 
       emul:
-       perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS,
-                       1, 0, xcp, 0);
+       perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, xcp, 0);
        MIPS_FPU_EMU_INC_STATS(emulated);
        switch (MIPSInst_OPCODE(ir)) {
        case ldc1_op:{
index 137ee76a0045c2cc51629a07bbaf1deb382a1451..937cf3368164c6f6d4a6db4b1867ca5866a36ed7 100644 (file)
@@ -145,7 +145,7 @@ good_area:
         * the fault.
         */
        fault = handle_mm_fault(mm, vma, address, write ? FAULT_FLAG_WRITE : 0);
-       perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address);
+       perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
        if (unlikely(fault & VM_FAULT_ERROR)) {
                if (fault & VM_FAULT_OOM)
                        goto out_of_memory;
@@ -154,12 +154,10 @@ good_area:
                BUG();
        }
        if (fault & VM_FAULT_MAJOR) {
-               perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ,
-                               1, 0, regs, address);
+               perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, address);
                tsk->maj_flt++;
        } else {
-               perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN,
-                               1, 0, regs, address);
+               perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, address);
                tsk->min_flt++;
        }
 
index 4b9d7044e26c236b87f3f4497cacd5acd1cc7e4b..29f2f13eb31c433a0a6a79208dabee4706fdcf3a 100644 (file)
@@ -8,7 +8,7 @@ DRIVER_OBJS = $(addprefix ../../../drivers/oprofile/, \
                oprofilefs.o oprofile_stats.o \
                timer_int.o )
 
-oprofile-y                             := $(DRIVER_OBJS) common.o
+oprofile-y                             := $(DRIVER_OBJS) common.o backtrace.o
 
 oprofile-$(CONFIG_CPU_MIPS32)          += op_model_mipsxx.o
 oprofile-$(CONFIG_CPU_MIPS64)          += op_model_mipsxx.o
diff --git a/arch/mips/oprofile/backtrace.c b/arch/mips/oprofile/backtrace.c
new file mode 100644 (file)
index 0000000..6854ed5
--- /dev/null
@@ -0,0 +1,175 @@
+#include <linux/oprofile.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/uaccess.h>
+#include <asm/ptrace.h>
+#include <asm/stacktrace.h>
+#include <linux/stacktrace.h>
+#include <linux/kernel.h>
+#include <asm/sections.h>
+#include <asm/inst.h>
+
+struct stackframe {
+       unsigned long sp;
+       unsigned long pc;
+       unsigned long ra;
+};
+
+static inline int get_mem(unsigned long addr, unsigned long *result)
+{
+       unsigned long *address = (unsigned long *) addr;
+       if (!access_ok(VERIFY_READ, addr, sizeof(unsigned long)))
+               return -1;
+       if (__copy_from_user_inatomic(result, address, sizeof(unsigned long)))
+               return -3;
+       return 0;
+}
+
+/*
+ * These two instruction helpers were taken from process.c
+ */
+static inline int is_ra_save_ins(union mips_instruction *ip)
+{
+       /* sw / sd $ra, offset($sp) */
+       return (ip->i_format.opcode == sw_op || ip->i_format.opcode == sd_op)
+               && ip->i_format.rs == 29 && ip->i_format.rt == 31;
+}
+
+static inline int is_sp_move_ins(union mips_instruction *ip)
+{
+       /* addiu/daddiu sp,sp,-imm */
+       if (ip->i_format.rs != 29 || ip->i_format.rt != 29)
+               return 0;
+       if (ip->i_format.opcode == addiu_op || ip->i_format.opcode == daddiu_op)
+               return 1;
+       return 0;
+}
+
+/*
+ * Looks for specific instructions that mark the end of a function.
+ * This usually means we ran into the code area of the previous function.
+ */
+static inline int is_end_of_function_marker(union mips_instruction *ip)
+{
+       /* jr ra */
+       if (ip->r_format.func == jr_op && ip->r_format.rs == 31)
+               return 1;
+       /* lui gp */
+       if (ip->i_format.opcode == lui_op && ip->i_format.rt == 28)
+               return 1;
+       return 0;
+}
+
+/*
+ * TODO for userspace stack unwinding:
+ * - handle cases where the stack is adjusted inside a function
+ *     (generally doesn't happen)
+ * - find optimal value for max_instr_check
+ * - try to find a way to handle leaf functions
+ */
+
+static inline int unwind_user_frame(struct stackframe *old_frame,
+                                   const unsigned int max_instr_check)
+{
+       struct stackframe new_frame = *old_frame;
+       off_t ra_offset = 0;
+       size_t stack_size = 0;
+       unsigned long addr;
+
+       if (old_frame->pc == 0 || old_frame->sp == 0 || old_frame->ra == 0)
+               return -9;
+
+       for (addr = new_frame.pc; (addr + max_instr_check > new_frame.pc)
+               && (!ra_offset || !stack_size); --addr) {
+               union mips_instruction ip;
+
+               if (get_mem(addr, (unsigned long *) &ip))
+                       return -11;
+
+               if (is_sp_move_ins(&ip)) {
+                       int stack_adjustment = ip.i_format.simmediate;
+                       if (stack_adjustment > 0)
+                               /* This marks the end of the previous function,
+                                  which means we overran. */
+                               break;
+                       stack_size = (unsigned) stack_adjustment;
+               } else if (is_ra_save_ins(&ip)) {
+                       int ra_slot = ip.i_format.simmediate;
+                       if (ra_slot < 0)
+                               /* This shouldn't happen. */
+                               break;
+                       ra_offset = ra_slot;
+               } else if (is_end_of_function_marker(&ip))
+                       break;
+       }
+
+       if (!ra_offset || !stack_size)
+               return -1;
+
+       if (ra_offset) {
+               new_frame.ra = old_frame->sp + ra_offset;
+               if (get_mem(new_frame.ra, &(new_frame.ra)))
+                       return -13;
+       }
+
+       if (stack_size) {
+               new_frame.sp = old_frame->sp + stack_size;
+               if (get_mem(new_frame.sp, &(new_frame.sp)))
+                       return -14;
+       }
+
+       if (new_frame.sp > old_frame->sp)
+               return -2;
+
+       new_frame.pc = old_frame->ra;
+       *old_frame = new_frame;
+
+       return 0;
+}
+
+static inline void do_user_backtrace(unsigned long low_addr,
+                                    struct stackframe *frame,
+                                    unsigned int depth)
+{
+       const unsigned int max_instr_check = 512;
+       const unsigned long high_addr = low_addr + THREAD_SIZE;
+
+       while (depth-- && !unwind_user_frame(frame, max_instr_check)) {
+               oprofile_add_trace(frame->ra);
+               if (frame->sp < low_addr || frame->sp > high_addr)
+                       break;
+       }
+}
+
+#ifndef CONFIG_KALLSYMS
+static inline void do_kernel_backtrace(unsigned long low_addr,
+                                      struct stackframe *frame,
+                                      unsigned int depth) { }
+#else
+static inline void do_kernel_backtrace(unsigned long low_addr,
+                                      struct stackframe *frame,
+                                      unsigned int depth)
+{
+       while (depth-- && frame->pc) {
+               frame->pc = unwind_stack_by_address(low_addr,
+                                                   &(frame->sp),
+                                                   frame->pc,
+                                                   &(frame->ra));
+               oprofile_add_trace(frame->ra);
+       }
+}
+#endif
+
+void notrace op_mips_backtrace(struct pt_regs *const regs, unsigned int depth)
+{
+       struct stackframe frame = { .sp = regs->regs[29],
+                                   .pc = regs->cp0_epc,
+                                   .ra = regs->regs[31] };
+       const int userspace = user_mode(regs);
+       const unsigned long low_addr = ALIGN(frame.sp, THREAD_SIZE);
+
+       if (userspace)
+               do_user_backtrace(low_addr, &frame, depth);
+       else
+               do_kernel_backtrace(low_addr, &frame, depth);
+}
index f9eb1aba6345ae398853acc8b9982e54bd468c18..d1f2d4c52d42d3a0e1e92f5605d81632eefc8aa6 100644 (file)
@@ -115,6 +115,7 @@ int __init oprofile_arch_init(struct oprofile_operations *ops)
        ops->start              = op_mips_start;
        ops->stop               = op_mips_stop;
        ops->cpu_type           = lmodel->cpu_type;
+       ops->backtrace          = op_mips_backtrace;
 
        printk(KERN_INFO "oprofile: using %s performance monitoring.\n",
               lmodel->cpu_type);
index f04b54fb37d14a7050f96f058262728e88ae4aa4..7c2da27ece04d78e44f46677bd3d5bb9c316ddb4 100644 (file)
@@ -36,4 +36,6 @@ struct op_mips_model {
        unsigned char num_counters;
 };
 
+void op_mips_backtrace(struct pt_regs * const regs, unsigned int depth);
+
 #endif
index 45921672b97af14c0bb645de7bbf8772f2c3fb5f..2cc41c715d2ba2cfb9783d91bda466c3edac0099 100644 (file)
@@ -78,14 +78,14 @@ extern void ppc_warn_emulated_print(const char *type);
 #define PPC_WARN_EMULATED(type, regs)                                  \
        do {                                                            \
                perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS,           \
-                       1, 0, regs, 0);                                 \
+                       1, regs, 0);                                    \
                __PPC_WARN_EMULATED(type);                              \
        } while (0)
 
 #define PPC_WARN_ALIGNMENT(type, regs)                                 \
        do {                                                            \
                perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS,           \
-                       1, 0, regs, regs->dar);                         \
+                       1, regs, regs->dar);                            \
                __PPC_WARN_EMULATED(type);                              \
        } while (0)
 
index 1c33ec17ca36da7e086163252425efe939fa0a07..80fd4d2b4a62ca6e46849703b6ff812a1b50575c 100644 (file)
@@ -57,7 +57,7 @@ void hw_breakpoint_pmu_read(struct perf_event *bp);
 extern void flush_ptrace_hw_breakpoint(struct task_struct *tsk);
 
 extern struct pmu perf_ops_bp;
-extern void ptrace_triggered(struct perf_event *bp, int nmi,
+extern void ptrace_triggered(struct perf_event *bp,
                        struct perf_sample_data *data, struct pt_regs *regs);
 static inline void hw_breakpoint_disable(void)
 {
index b150b510510f167d2782f645999303dfa297e2ad..cb2e2949c8d12c04270331d098599608df1fa5ec 100644 (file)
@@ -75,6 +75,11 @@ static int e500_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
                [C(OP_WRITE)] = {       -1,             -1      },
                [C(OP_PREFETCH)] = {    -1,             -1      },
        },
+       [C(NODE)] = {           /*      RESULT_ACCESS   RESULT_MISS */
+               [C(OP_READ)] = {        -1,             -1      },
+               [C(OP_WRITE)] = {       -1,             -1      },
+               [C(OP_PREFETCH)] = {    -1,             -1      },
+       },
 };
 
 static int num_events = 128;
index 2cc5e0301d0b532a2291e400cb7bdc0a87aa89cd..845a584788903cecde1b4a183b192cc2a747a2e9 100644 (file)
@@ -388,6 +388,11 @@ static int mpc7450_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
                [C(OP_WRITE)] = {       -1,             -1      },
                [C(OP_PREFETCH)] = {    -1,             -1      },
        },
+       [C(NODE)] = {           /*      RESULT_ACCESS   RESULT_MISS */
+               [C(OP_READ)] = {        -1,             -1      },
+               [C(OP_WRITE)] = {       -1,             -1      },
+               [C(OP_PREFETCH)] = {    -1,             -1      },
+       },
 };
 
 struct power_pmu mpc7450_pmu = {
index 822f63008ae11642b570986c7faf8ca61478ae10..14967de9887603c91d10650939a015eb0aa7a2ec 100644 (file)
@@ -1207,7 +1207,7 @@ struct pmu power_pmu = {
  * here so there is no possibility of being interrupted.
  */
 static void record_and_restart(struct perf_event *event, unsigned long val,
-                              struct pt_regs *regs, int nmi)
+                              struct pt_regs *regs)
 {
        u64 period = event->hw.sample_period;
        s64 prev, delta, left;
@@ -1258,7 +1258,7 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
                if (event->attr.sample_type & PERF_SAMPLE_ADDR)
                        perf_get_data_addr(regs, &data.addr);
 
-               if (perf_event_overflow(event, nmi, &data, regs))
+               if (perf_event_overflow(event, &data, regs))
                        power_pmu_stop(event, 0);
        }
 }
@@ -1346,7 +1346,7 @@ static void perf_event_interrupt(struct pt_regs *regs)
                if ((int)val < 0) {
                        /* event has overflowed */
                        found = 1;
-                       record_and_restart(event, val, regs, nmi);
+                       record_and_restart(event, val, regs);
                }
        }
 
index b0dc8f7069cd5fb7bb41ec8b2a3bd9a046f3f57e..0a6d2a9d569cde1e924735dd24147eb90bd84c04 100644 (file)
@@ -568,7 +568,7 @@ static struct pmu fsl_emb_pmu = {
  * here so there is no possibility of being interrupted.
  */
 static void record_and_restart(struct perf_event *event, unsigned long val,
-                              struct pt_regs *regs, int nmi)
+                              struct pt_regs *regs)
 {
        u64 period = event->hw.sample_period;
        s64 prev, delta, left;
@@ -616,7 +616,7 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
                perf_sample_data_init(&data, 0);
                data.period = event->hw.last_period;
 
-               if (perf_event_overflow(event, nmi, &data, regs))
+               if (perf_event_overflow(event, &data, regs))
                        fsl_emb_pmu_stop(event, 0);
        }
 }
@@ -644,7 +644,7 @@ static void perf_event_interrupt(struct pt_regs *regs)
                        if (event) {
                                /* event has overflowed */
                                found = 1;
-                               record_and_restart(event, val, regs, nmi);
+                               record_and_restart(event, val, regs);
                        } else {
                                /*
                                 * Disabled counter is negative,
index ead8b3c2649ebba98c00423727e7dae54c6f5a7f..e9dbc2d35c9c99af7b9894864e9055ddfd9f6b23 100644 (file)
@@ -587,6 +587,11 @@ static int power4_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
                [C(OP_WRITE)] = {       -1,             -1      },
                [C(OP_PREFETCH)] = {    -1,             -1      },
        },
+       [C(NODE)] = {           /*      RESULT_ACCESS   RESULT_MISS */
+               [C(OP_READ)] = {        -1,             -1      },
+               [C(OP_WRITE)] = {       -1,             -1      },
+               [C(OP_PREFETCH)] = {    -1,             -1      },
+       },
 };
 
 static struct power_pmu power4_pmu = {
index eca0ac595cb6c5b790ea4fd37d77ebb3a8ee474b..f58a2bd41b591da659f2654f855f24060ebfde58 100644 (file)
@@ -653,6 +653,11 @@ static int power5p_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
                [C(OP_WRITE)] = {       -1,             -1              },
                [C(OP_PREFETCH)] = {    -1,             -1              },
        },
+       [C(NODE)] = {           /*      RESULT_ACCESS   RESULT_MISS */
+               [C(OP_READ)] = {        -1,             -1              },
+               [C(OP_WRITE)] = {       -1,             -1              },
+               [C(OP_PREFETCH)] = {    -1,             -1              },
+       },
 };
 
 static struct power_pmu power5p_pmu = {
index d5ff0f64a5e645e01ddc6f9b56c91c60e14b204a..b1acab6841421bfdd4bd26ec57a58a048d6fcd7d 100644 (file)
@@ -595,6 +595,11 @@ static int power5_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
                [C(OP_WRITE)] = {       -1,             -1              },
                [C(OP_PREFETCH)] = {    -1,             -1              },
        },
+       [C(NODE)] = {           /*      RESULT_ACCESS   RESULT_MISS */
+               [C(OP_READ)] = {        -1,             -1              },
+               [C(OP_WRITE)] = {       -1,             -1              },
+               [C(OP_PREFETCH)] = {    -1,             -1              },
+       },
 };
 
 static struct power_pmu power5_pmu = {
index 31603927e376e7e8854bf6a0faf86bc2bc9e56f7..b24a3a23d073bcdc78613a6cd00a649c0c3c6494 100644 (file)
@@ -516,6 +516,11 @@ static int power6_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
                [C(OP_WRITE)] = {       -1,             -1              },
                [C(OP_PREFETCH)] = {    -1,             -1              },
        },
+       [C(NODE)] = {           /*      RESULT_ACCESS   RESULT_MISS */
+               [C(OP_READ)] = {        -1,             -1              },
+               [C(OP_WRITE)] = {       -1,             -1              },
+               [C(OP_PREFETCH)] = {    -1,             -1              },
+       },
 };
 
 static struct power_pmu power6_pmu = {
index 593740fcb799d6fc9c29faca49425ad97b15b19a..6d9dccb2ea592067a0f20e297d5146b46bd0e123 100644 (file)
@@ -342,6 +342,11 @@ static int power7_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
                [C(OP_WRITE)] = {       -1,             -1      },
                [C(OP_PREFETCH)] = {    -1,             -1      },
        },
+       [C(NODE)] = {           /*      RESULT_ACCESS   RESULT_MISS */
+               [C(OP_READ)] = {        -1,             -1      },
+               [C(OP_WRITE)] = {       -1,             -1      },
+               [C(OP_PREFETCH)] = {    -1,             -1      },
+       },
 };
 
 static struct power_pmu power7_pmu = {
index 9a6e093858fe13fd30d2a79adb82e38e8c424b84..b121de9658eb6000c3735120756613628e81f3be 100644 (file)
@@ -467,6 +467,11 @@ static int ppc970_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
                [C(OP_WRITE)] = {       -1,             -1      },
                [C(OP_PREFETCH)] = {    -1,             -1      },
        },
+       [C(NODE)] = {           /*      RESULT_ACCESS   RESULT_MISS */
+               [C(OP_READ)] = {        -1,             -1      },
+               [C(OP_WRITE)] = {       -1,             -1      },
+               [C(OP_PREFETCH)] = {    -1,             -1      },
+       },
 };
 
 static struct power_pmu ppc970_pmu = {
index cb22024f2b42a189d9848978ac3dafd1c611f19f..05b7dd217f6094a5e3027e2bab51ab01dfac6973 100644 (file)
@@ -882,7 +882,7 @@ void user_disable_single_step(struct task_struct *task)
 }
 
 #ifdef CONFIG_HAVE_HW_BREAKPOINT
-void ptrace_triggered(struct perf_event *bp, int nmi,
+void ptrace_triggered(struct perf_event *bp,
                      struct perf_sample_data *data, struct pt_regs *regs)
 {
        struct perf_event_attr attr;
@@ -973,7 +973,7 @@ int ptrace_set_debugreg(struct task_struct *task, unsigned long addr,
                                                                &attr.bp_type);
 
        thread->ptrace_bps[0] = bp = register_user_hw_breakpoint(&attr,
-                                                       ptrace_triggered, task);
+                                              ptrace_triggered, NULL, task);
        if (IS_ERR(bp)) {
                thread->ptrace_bps[0] = NULL;
                ptrace_put_breakpoints(task);
index f33acfd872ad31ef73df610b07a507d2e6bb78a1..03b29a6759ab55b087528c9e1be70d095c2a8d2b 100644 (file)
@@ -544,7 +544,7 @@ DEFINE_PER_CPU(u8, irq_work_pending);
 
 #endif /* 32 vs 64 bit */
 
-void set_irq_work_pending(void)
+void arch_irq_work_raise(void)
 {
        preempt_disable();
        set_irq_work_pending_flag();
index ad35f66c69e867893e17172c470a11904c2eac9d..5efe8c96d37fede5785f3a3e3947cb050d1a02de 100644 (file)
@@ -174,7 +174,7 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address,
                die("Weird page fault", regs, SIGSEGV);
        }
 
-       perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address);
+       perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
 
        /* When running in the kernel we expect faults to occur only to
         * addresses in user space.  All other faults represent errors in the
@@ -320,7 +320,7 @@ good_area:
        }
        if (ret & VM_FAULT_MAJOR) {
                current->maj_flt++;
-               perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0,
+               perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1,
                                     regs, address);
 #ifdef CONFIG_PPC_SMLPAR
                if (firmware_has_feature(FW_FEATURE_CMO)) {
@@ -331,7 +331,7 @@ good_area:
 #endif
        } else {
                current->min_flt++;
-               perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0,
+               perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1,
                                     regs, address);
        }
        up_read(&mm->mmap_sem);
index fe103e891e7a0eb32dc7c67fc0cea3217bd4b6e3..095f782a5512d1c7c8c783e459729286786ef834 100644 (file)
@@ -299,7 +299,7 @@ static inline int do_exception(struct pt_regs *regs, int access,
                goto out;
 
        address = trans_exc_code & __FAIL_ADDR_MASK;
-       perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address);
+       perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
        flags = FAULT_FLAG_ALLOW_RETRY;
        if (access == VM_WRITE || (trans_exc_code & store_indication) == 0x400)
                flags |= FAULT_FLAG_WRITE;
@@ -345,11 +345,11 @@ retry:
        if (flags & FAULT_FLAG_ALLOW_RETRY) {
                if (fault & VM_FAULT_MAJOR) {
                        tsk->maj_flt++;
-                       perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0,
+                       perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1,
                                      regs, address);
                } else {
                        tsk->min_flt++;
-                       perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0,
+                       perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1,
                                      regs, address);
                }
                if (fault & VM_FAULT_RETRY) {
index 748955df018d801db05137f1831cf18f01938b55..fa4f724b295a1c6814902f4088c81f9fd6201ae3 100644 (file)
@@ -180,6 +180,21 @@ static const int sh7750_cache_events
                        [ C(RESULT_MISS)   ] = -1,
                },
        },
+
+       [ C(NODE) ] = {
+               [ C(OP_READ) ] = {
+                       [ C(RESULT_ACCESS) ] = -1,
+                       [ C(RESULT_MISS)   ] = -1,
+               },
+               [ C(OP_WRITE) ] = {
+                       [ C(RESULT_ACCESS) ] = -1,
+                       [ C(RESULT_MISS)   ] = -1,
+               },
+               [ C(OP_PREFETCH) ] = {
+                       [ C(RESULT_ACCESS) ] = -1,
+                       [ C(RESULT_MISS)   ] = -1,
+               },
+       },
 };
 
 static int sh7750_event_map(int event)
index 17e6bebfede067c26379efde6b8e0a69fc6565e9..84a2c396ceee06ed54bef271ff7eae4f29e84c6b 100644 (file)
@@ -205,6 +205,21 @@ static const int sh4a_cache_events
                        [ C(RESULT_MISS)   ] = -1,
                },
        },
+
+       [ C(NODE) ] = {
+               [ C(OP_READ) ] = {
+                       [ C(RESULT_ACCESS) ] = -1,
+                       [ C(RESULT_MISS)   ] = -1,
+               },
+               [ C(OP_WRITE) ] = {
+                       [ C(RESULT_ACCESS) ] = -1,
+                       [ C(RESULT_MISS)   ] = -1,
+               },
+               [ C(OP_PREFETCH) ] = {
+                       [ C(RESULT_ACCESS) ] = -1,
+                       [ C(RESULT_MISS)   ] = -1,
+               },
+       },
 };
 
 static int sh4a_event_map(int event)
index 3d7b209b2178cc63c520975a35d4146c99042ba4..92b3c276339a3a50d95a027c1d2a2e902a507ee3 100644 (file)
@@ -63,7 +63,7 @@ static inline int put_stack_long(struct task_struct *task, int offset,
        return 0;
 }
 
-void ptrace_triggered(struct perf_event *bp, int nmi,
+void ptrace_triggered(struct perf_event *bp,
                      struct perf_sample_data *data, struct pt_regs *regs)
 {
        struct perf_event_attr attr;
@@ -91,7 +91,8 @@ static int set_single_step(struct task_struct *tsk, unsigned long addr)
                attr.bp_len = HW_BREAKPOINT_LEN_2;
                attr.bp_type = HW_BREAKPOINT_R;
 
-               bp = register_user_hw_breakpoint(&attr, ptrace_triggered, tsk);
+               bp = register_user_hw_breakpoint(&attr, ptrace_triggered,
+                                                NULL, tsk);
                if (IS_ERR(bp))
                        return PTR_ERR(bp);
 
index b51a17104b5f8aba77312e4431d1f89677405463..d9006f8ffc142532d99b0ff539f88831f4027999 100644 (file)
@@ -393,7 +393,7 @@ int handle_unaligned_access(insn_size_t instruction, struct pt_regs *regs,
         */
        if (!expected) {
                unaligned_fixups_notify(current, instruction, regs);
-               perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, 0,
+               perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1,
                              regs, address);
        }
 
index 6713ca97e553ea68a5743a4f3e2a373f9b23f822..67110be83fd7239b963390942f7a2fa165fca59b 100644 (file)
@@ -434,7 +434,7 @@ static int misaligned_load(struct pt_regs *regs,
                return error;
        }
 
-       perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, 0, regs, address);
+       perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, address);
 
        destreg = (opcode >> 4) & 0x3f;
        if (user_mode(regs)) {
@@ -512,7 +512,7 @@ static int misaligned_store(struct pt_regs *regs,
                return error;
        }
 
-       perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, 0, regs, address);
+       perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, address);
 
        srcreg = (opcode >> 4) & 0x3f;
        if (user_mode(regs)) {
@@ -588,7 +588,7 @@ static int misaligned_fpu_load(struct pt_regs *regs,
                return error;
        }
 
-       perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, 0, regs, address);
+       perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, address);
 
        destreg = (opcode >> 4) & 0x3f;
        if (user_mode(regs)) {
@@ -665,7 +665,7 @@ static int misaligned_fpu_store(struct pt_regs *regs,
                return error;
        }
 
-       perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, 0, regs, address);
+       perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, address);
 
        srcreg = (opcode >> 4) & 0x3f;
        if (user_mode(regs)) {
index f76a5090d5d1b51e78221432b5340c4a8eb8bbcd..977195210653ede066d541f39e75ebb2745e55c6 100644 (file)
@@ -620,7 +620,7 @@ int do_fpu_inst(unsigned short inst, struct pt_regs *regs)
        struct task_struct *tsk = current;
        struct sh_fpu_soft_struct *fpu = &(tsk->thread.xstate->softfpu);
 
-       perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, 0, regs, 0);
+       perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, 0);
 
        if (!(task_thread_info(tsk)->status & TS_USEDFPU)) {
                /* initialize once. */
index d4c34d757f0d5b9ccf5a0350aca9584c8ee5e971..7bebd044f2a1fc02f598c3293a358064784c2045 100644 (file)
@@ -160,7 +160,7 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
        if ((regs->sr & SR_IMASK) != SR_IMASK)
                local_irq_enable();
 
-       perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address);
+       perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
 
        /*
         * If we're in an interrupt, have no user context or are running
@@ -210,11 +210,11 @@ good_area:
        }
        if (fault & VM_FAULT_MAJOR) {
                tsk->maj_flt++;
-               perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0,
+               perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1,
                                     regs, address);
        } else {
                tsk->min_flt++;
-               perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0,
+               perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1,
                                     regs, address);
        }
 
index 7f5810f5dfdc4b5141aabcacf7ca6b61f1b5bba5..e3430e093d436d300bb1928ea4320ce230b1d632 100644 (file)
@@ -116,7 +116,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long writeaccess,
        /* Not an IO address, so reenable interrupts */
        local_irq_enable();
 
-       perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address);
+       perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
 
        /*
         * If we're in an interrupt or have no user
@@ -200,11 +200,11 @@ good_area:
 
        if (fault & VM_FAULT_MAJOR) {
                tsk->maj_flt++;
-               perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0,
+               perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1,
                                     regs, address);
        } else {
                tsk->min_flt++;
-               perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0,
+               perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1,
                                     regs, address);
        }
 
index 2cb0e1c001e254baea6895ad399776cec8c6c699..62a034318b1818ba12f010f89821a53e41bd851b 100644 (file)
@@ -246,6 +246,20 @@ static const cache_map_t ultra3_cache_map = {
                [ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
        },
 },
+[C(NODE)] = {
+       [C(OP_READ)] = {
+               [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
+               [C(RESULT_MISS)  ] = { CACHE_OP_UNSUPPORTED },
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
+               [ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
+               [ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
+       },
+},
 };
 
 static const struct sparc_pmu ultra3_pmu = {
@@ -361,6 +375,20 @@ static const cache_map_t niagara1_cache_map = {
                [ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
        },
 },
+[C(NODE)] = {
+       [C(OP_READ)] = {
+               [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
+               [C(RESULT_MISS)  ] = { CACHE_OP_UNSUPPORTED },
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
+               [ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
+               [ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
+       },
+},
 };
 
 static const struct sparc_pmu niagara1_pmu = {
@@ -473,6 +501,20 @@ static const cache_map_t niagara2_cache_map = {
                [ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
        },
 },
+[C(NODE)] = {
+       [C(OP_READ)] = {
+               [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
+               [C(RESULT_MISS)  ] = { CACHE_OP_UNSUPPORTED },
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
+               [ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
+               [ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
+       },
+},
 };
 
 static const struct sparc_pmu niagara2_pmu = {
@@ -1277,7 +1319,7 @@ static int __kprobes perf_event_nmi_handler(struct notifier_block *self,
                if (!sparc_perf_event_set_period(event, hwc, idx))
                        continue;
 
-               if (perf_event_overflow(event, 1, &data, regs))
+               if (perf_event_overflow(event, &data, regs))
                        sparc_pmu_stop(event, 0);
        }
 
index 4491f4cb26953c1582e0344311b62fe82b956900..7efbb2f9e77ff63d9a18be9f44f166cd04f05bda 100644 (file)
@@ -247,7 +247,7 @@ asmlinkage void kernel_unaligned_trap(struct pt_regs *regs, unsigned int insn)
                unsigned long addr = compute_effective_address(regs, insn);
                int err;
 
-               perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, 0, regs, addr);
+               perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, addr);
                switch (dir) {
                case load:
                        err = do_int_load(fetch_reg_addr(((insn>>25)&0x1f),
@@ -338,7 +338,7 @@ asmlinkage void user_unaligned_trap(struct pt_regs *regs, unsigned int insn)
                }
 
                addr = compute_effective_address(regs, insn);
-               perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, 0, regs, addr);
+               perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, addr);
                switch(dir) {
                case load:
                        err = do_int_load(fetch_reg_addr(((insn>>25)&0x1f),
index b2b019ea8caab36b346cd6a0ed766bfe075eb264..35cff1673aa4ec320cd25fc86b660eb88cc28cb8 100644 (file)
@@ -317,7 +317,7 @@ asmlinkage void kernel_unaligned_trap(struct pt_regs *regs, unsigned int insn)
 
                addr = compute_effective_address(regs, insn,
                                                 ((insn >> 25) & 0x1f));
-               perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, 0, regs, addr);
+               perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, addr);
                switch (asi) {
                case ASI_NL:
                case ASI_AIUPL:
@@ -384,7 +384,7 @@ int handle_popc(u32 insn, struct pt_regs *regs)
        int ret, i, rd = ((insn >> 25) & 0x1f);
        int from_kernel = (regs->tstate & TSTATE_PRIV) != 0;
                                
-       perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, 0, regs, 0);
+       perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, 0);
        if (insn & 0x2000) {
                maybe_flush_windows(0, 0, rd, from_kernel);
                value = sign_extend_imm13(insn);
@@ -431,7 +431,7 @@ int handle_ldf_stq(u32 insn, struct pt_regs *regs)
        int asi = decode_asi(insn, regs);
        int flag = (freg < 32) ? FPRS_DL : FPRS_DU;
 
-       perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, 0, regs, 0);
+       perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, 0);
 
        save_and_clear_fpu();
        current_thread_info()->xfsr[0] &= ~0x1c000;
@@ -554,7 +554,7 @@ void handle_ld_nf(u32 insn, struct pt_regs *regs)
        int from_kernel = (regs->tstate & TSTATE_PRIV) != 0;
        unsigned long *reg;
                                
-       perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, 0, regs, 0);
+       perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, 0);
 
        maybe_flush_windows(0, 0, rd, from_kernel);
        reg = fetch_reg_addr(rd, regs);
@@ -586,7 +586,7 @@ void handle_lddfmna(struct pt_regs *regs, unsigned long sfar, unsigned long sfsr
 
        if (tstate & TSTATE_PRIV)
                die_if_kernel("lddfmna from kernel", regs);
-       perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, 0, regs, sfar);
+       perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, sfar);
        if (test_thread_flag(TIF_32BIT))
                pc = (u32)pc;
        if (get_user(insn, (u32 __user *) pc) != -EFAULT) {
@@ -647,7 +647,7 @@ void handle_stdfmna(struct pt_regs *regs, unsigned long sfar, unsigned long sfsr
 
        if (tstate & TSTATE_PRIV)
                die_if_kernel("stdfmna from kernel", regs);
-       perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, 0, regs, sfar);
+       perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, sfar);
        if (test_thread_flag(TIF_32BIT))
                pc = (u32)pc;
        if (get_user(insn, (u32 __user *) pc) != -EFAULT) {
index 36357717d691019ff4772c7a4aa2993c68f13c35..32b626c9d815601e445b0d47e76cc0ebbab84e75 100644 (file)
@@ -802,7 +802,7 @@ int vis_emul(struct pt_regs *regs, unsigned int insn)
 
        BUG_ON(regs->tstate & TSTATE_PRIV);
 
-       perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, 0, regs, 0);
+       perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, 0);
 
        if (test_thread_flag(TIF_32BIT))
                pc = (u32)pc;
index a3fccde894ece2791cef55922928d9eea7abd239..aa4d55b0bdf0326370ec6d54a4759b04215d61cb 100644 (file)
@@ -164,7 +164,7 @@ int do_mathemu(struct pt_regs *regs, struct task_struct *fpt)
        int retcode = 0;                               /* assume all succeed */
        unsigned long insn;
 
-       perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, 0, regs, 0);
+       perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, 0);
 
 #ifdef DEBUG_MATHEMU
        printk("In do_mathemu()... pc is %08lx\n", regs->pc);
index 56d2c44747b8fcf3fd69324319b36ebd4b7ebce7..e575bd2fe38167eadf61386b86df733807ef7ead 100644 (file)
@@ -184,7 +184,7 @@ int do_mathemu(struct pt_regs *regs, struct fpustate *f)
 
        if (tstate & TSTATE_PRIV)
                die_if_kernel("unfinished/unimplemented FPop from kernel", regs);
-       perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, 0, regs, 0);
+       perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, 0);
        if (test_thread_flag(TIF_32BIT))
                pc = (u32)pc;
        if (get_user(insn, (u32 __user *) pc) != -EFAULT) {
index 7543ddbdadb271b18e79ccbaf126ed3ba3650991..aa1c1b1ce5cc05ce4daa3f67161b565a1101ea4c 100644 (file)
@@ -251,7 +251,7 @@ asmlinkage void do_sparc_fault(struct pt_regs *regs, int text_fault, int write,
         if (in_atomic() || !mm)
                 goto no_context;
 
-       perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address);
+       perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
 
        down_read(&mm->mmap_sem);
 
@@ -301,12 +301,10 @@ good_area:
        }
        if (fault & VM_FAULT_MAJOR) {
                current->maj_flt++;
-               perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0,
-                             regs, address);
+               perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, address);
        } else {
                current->min_flt++;
-               perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0,
-                             regs, address);
+               perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, address);
        }
        up_read(&mm->mmap_sem);
        return;
index f92ce56a8b22c117ff288c7e8e2622c29897961f..504c0622f7296c42bb09cae18bc487630e6344a4 100644 (file)
@@ -325,7 +325,7 @@ asmlinkage void __kprobes do_sparc64_fault(struct pt_regs *regs)
        if (in_atomic() || !mm)
                goto intr_or_no_mm;
 
-       perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address);
+       perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
 
        if (!down_read_trylock(&mm->mmap_sem)) {
                if ((regs->tstate & TSTATE_PRIV) &&
@@ -433,12 +433,10 @@ good_area:
        }
        if (fault & VM_FAULT_MAJOR) {
                current->maj_flt++;
-               perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0,
-                             regs, address);
+               perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, address);
        } else {
                current->min_flt++;
-               perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0,
-                             regs, address);
+               perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, address);
        }
        up_read(&mm->mmap_sem);
 
index 5745ce8bf1089cd2399415641d1a0c25f39cf6c1..bba3cf88e6249187a00fba403ed533648a728673 100644 (file)
@@ -60,23 +60,24 @@ static inline void native_halt(void)
 #include <asm/paravirt.h>
 #else
 #ifndef __ASSEMBLY__
+#include <linux/types.h>
 
-static inline unsigned long arch_local_save_flags(void)
+static inline notrace unsigned long arch_local_save_flags(void)
 {
        return native_save_fl();
 }
 
-static inline void arch_local_irq_restore(unsigned long flags)
+static inline notrace void arch_local_irq_restore(unsigned long flags)
 {
        native_restore_fl(flags);
 }
 
-static inline void arch_local_irq_disable(void)
+static inline notrace void arch_local_irq_disable(void)
 {
        native_irq_disable();
 }
 
-static inline void arch_local_irq_enable(void)
+static inline notrace void arch_local_irq_enable(void)
 {
        native_irq_enable();
 }
@@ -102,7 +103,7 @@ static inline void halt(void)
 /*
  * For spinlocks, etc:
  */
-static inline unsigned long arch_local_irq_save(void)
+static inline notrace unsigned long arch_local_irq_save(void)
 {
        unsigned long flags = arch_local_save_flags();
        arch_local_irq_disable();
index d9d4dae305f6991efa446ec0ef4e7fad8054bdc3..094fb30817abb8ea9fb6ce475a87e1c886df90b9 100644 (file)
@@ -152,6 +152,11 @@ extern unsigned long perf_misc_flags(struct pt_regs *regs);
        (regs)->bp = caller_frame_pointer();                    \
        (regs)->cs = __KERNEL_CS;                               \
        regs->flags = 0;                                        \
+       asm volatile(                                           \
+               _ASM_MOV "%%"_ASM_SP ", %0\n"                   \
+               : "=m" ((regs)->sp)                             \
+               :: "memory"                                     \
+       );                                                      \
 }
 
 #else
index 56fd9e3abbdaaa83296647e09eb9cf142c54de25..4f7e67e2345e24a3b6625db4cb7a94173e264485 100644 (file)
 #define P4_CONFIG_HT_SHIFT             63
 #define P4_CONFIG_HT                   (1ULL << P4_CONFIG_HT_SHIFT)
 
+/*
+ * If an event has alias it should be marked
+ * with a special bit. (Don't forget to check
+ * P4_PEBS_CONFIG_MASK and related bits on
+ * modification.)
+ */
+#define P4_CONFIG_ALIASABLE            (1 << 9)
+
 /*
  * The bits we allow to pass for RAW events
  */
        (p4_config_pack_escr(P4_CONFIG_MASK_ESCR))      | \
        (p4_config_pack_cccr(P4_CONFIG_MASK_CCCR))
 
+/*
+ * In case of event aliasing we need to preserve some
+ * caller bits, otherwise the mapping won't be complete.
+ */
+#define P4_CONFIG_EVENT_ALIAS_MASK                       \
+       (p4_config_pack_escr(P4_CONFIG_MASK_ESCR)       | \
+        p4_config_pack_cccr(P4_CCCR_EDGE               | \
+                            P4_CCCR_THRESHOLD_MASK     | \
+                            P4_CCCR_COMPLEMENT         | \
+                            P4_CCCR_COMPARE))
+
+#define  P4_CONFIG_EVENT_ALIAS_IMMUTABLE_BITS            \
+       ((P4_CONFIG_HT)                                 | \
+        p4_config_pack_escr(P4_ESCR_T0_OS              | \
+                            P4_ESCR_T0_USR             | \
+                            P4_ESCR_T1_OS              | \
+                            P4_ESCR_T1_USR)            | \
+        p4_config_pack_cccr(P4_CCCR_OVF                | \
+                            P4_CCCR_CASCADE            | \
+                            P4_CCCR_FORCE_OVF          | \
+                            P4_CCCR_THREAD_ANY         | \
+                            P4_CCCR_OVF_PMI_T0         | \
+                            P4_CCCR_OVF_PMI_T1         | \
+                            P4_CONFIG_ALIASABLE))
+
 static inline bool p4_is_event_cascaded(u64 config)
 {
        u32 cccr = p4_config_unpack_cccr(config);
index 99ddd148a760ff73561d42a1723ff4f0b158210e..36361bf6fdd1ed2e977d87a6c0332410a742c658 100644 (file)
@@ -555,6 +555,9 @@ struct __large_struct { unsigned long buf[100]; };
 
 #endif /* CONFIG_X86_WP_WORKS_OK */
 
+extern unsigned long
+copy_from_user_nmi(void *to, const void __user *from, unsigned long n);
+
 /*
  * movsl can be slow when source and dest are not both 8-byte aligned
  */
index 3a0338b4b1790714293dbe213342fba56f13373c..4ee3abf20ed6118a45e0e6068962f4c181eeecaf 100644 (file)
@@ -22,7 +22,6 @@
 #include <linux/sched.h>
 #include <linux/uaccess.h>
 #include <linux/slab.h>
-#include <linux/highmem.h>
 #include <linux/cpu.h>
 #include <linux/bitops.h>
 
@@ -45,38 +44,27 @@ do {                                                                \
 #endif
 
 /*
- * best effort, GUP based copy_from_user() that assumes IRQ or NMI context
+ *          |   NHM/WSM    |      SNB     |
+ * register -------------------------------
+ *          |  HT  | no HT |  HT  | no HT |
+ *-----------------------------------------
+ * offcore  | core | core  | cpu  | core  |
+ * lbr_sel  | core | core  | cpu  | core  |
+ * ld_lat   | cpu  | core  | cpu  | core  |
+ *-----------------------------------------
+ *
+ * Given that there is a small number of shared regs,
+ * we can pre-allocate their slot in the per-cpu
+ * per-core reg tables.
  */
-static unsigned long
-copy_from_user_nmi(void *to, const void __user *from, unsigned long n)
-{
-       unsigned long offset, addr = (unsigned long)from;
-       unsigned long size, len = 0;
-       struct page *page;
-       void *map;
-       int ret;
-
-       do {
-               ret = __get_user_pages_fast(addr, 1, 0, &page);
-               if (!ret)
-                       break;
-
-               offset = addr & (PAGE_SIZE - 1);
-               size = min(PAGE_SIZE - offset, n - len);
-
-               map = kmap_atomic(page);
-               memcpy(to, map+offset, size);
-               kunmap_atomic(map);
-               put_page(page);
+enum extra_reg_type {
+       EXTRA_REG_NONE  = -1,   /* not used */
 
-               len  += size;
-               to   += size;
-               addr += size;
+       EXTRA_REG_RSP_0 = 0,    /* offcore_response_0 */
+       EXTRA_REG_RSP_1 = 1,    /* offcore_response_1 */
 
-       } while (len < n);
-
-       return len;
-}
+       EXTRA_REG_MAX           /* number of entries needed */
+};
 
 struct event_constraint {
        union {
@@ -132,11 +120,10 @@ struct cpu_hw_events {
        struct perf_branch_entry        lbr_entries[MAX_LBR_ENTRIES];
 
        /*
-        * Intel percore register state.
-        * Coordinate shared resources between HT threads.
+        * manage shared (per-core, per-cpu) registers
+        * used on Intel NHM/WSM/SNB
         */
-       int                             percore_used; /* Used by this CPU? */
-       struct intel_percore            *per_core;
+       struct intel_shared_regs        *shared_regs;
 
        /*
         * AMD specific bits
@@ -186,27 +173,46 @@ struct cpu_hw_events {
 #define for_each_event_constraint(e, c)        \
        for ((e) = (c); (e)->weight; (e)++)
 
+/*
+ * Per register state.
+ */
+struct er_account {
+       raw_spinlock_t          lock;   /* per-core: protect structure */
+       u64                     config; /* extra MSR config */
+       u64                     reg;    /* extra MSR number */
+       atomic_t                ref;    /* reference count */
+};
+
 /*
  * Extra registers for specific events.
+ *
  * Some events need large masks and require external MSRs.
- * Define a mapping to these extra registers.
+ * Those extra MSRs end up being shared for all events on
+ * a PMU and sometimes between PMU of sibling HT threads.
+ * In either case, the kernel needs to handle conflicting
+ * accesses to those extra, shared, regs. The data structure
+ * to manage those registers is stored in cpu_hw_event.
  */
 struct extra_reg {
        unsigned int            event;
        unsigned int            msr;
        u64                     config_mask;
        u64                     valid_mask;
+       int                     idx;  /* per_xxx->regs[] reg index */
 };
 
-#define EVENT_EXTRA_REG(e, ms, m, vm) {        \
+#define EVENT_EXTRA_REG(e, ms, m, vm, i) {     \
        .event = (e),           \
        .msr = (ms),            \
        .config_mask = (m),     \
        .valid_mask = (vm),     \
+       .idx = EXTRA_REG_##i    \
        }
-#define INTEL_EVENT_EXTRA_REG(event, msr, vm)  \
-       EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT, vm)
-#define EVENT_EXTRA_END EVENT_EXTRA_REG(0, 0, 0, 0)
+
+#define INTEL_EVENT_EXTRA_REG(event, msr, vm, idx)     \
+       EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT, vm, idx)
+
+#define EVENT_EXTRA_END EVENT_EXTRA_REG(0, 0, 0, 0, RSP_0)
 
 union perf_capabilities {
        struct {
@@ -252,7 +258,6 @@ struct x86_pmu {
        void            (*put_event_constraints)(struct cpu_hw_events *cpuc,
                                                 struct perf_event *event);
        struct event_constraint *event_constraints;
-       struct event_constraint *percore_constraints;
        void            (*quirks)(void);
        int             perfctr_second_write;
 
@@ -286,8 +291,12 @@ struct x86_pmu {
         * Extra registers for events
         */
        struct extra_reg *extra_regs;
+       unsigned int er_flags;
 };
 
+#define ERF_NO_HT_SHARING      1
+#define ERF_HAS_RSP_1          2
+
 static struct x86_pmu x86_pmu __read_mostly;
 
 static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = {
@@ -393,10 +402,10 @@ static inline unsigned int x86_pmu_event_addr(int index)
  */
 static int x86_pmu_extra_regs(u64 config, struct perf_event *event)
 {
+       struct hw_perf_event_extra *reg;
        struct extra_reg *er;
 
-       event->hw.extra_reg = 0;
-       event->hw.extra_config = 0;
+       reg = &event->hw.extra_reg;
 
        if (!x86_pmu.extra_regs)
                return 0;
@@ -406,8 +415,10 @@ static int x86_pmu_extra_regs(u64 config, struct perf_event *event)
                        continue;
                if (event->attr.config1 & ~er->valid_mask)
                        return -EINVAL;
-               event->hw.extra_reg = er->msr;
-               event->hw.extra_config = event->attr.config1;
+
+               reg->idx = er->idx;
+               reg->config = event->attr.config1;
+               reg->reg = er->msr;
                break;
        }
        return 0;
@@ -706,6 +717,9 @@ static int __x86_pmu_event_init(struct perf_event *event)
        event->hw.last_cpu = -1;
        event->hw.last_tag = ~0ULL;
 
+       /* mark unused */
+       event->hw.extra_reg.idx = EXTRA_REG_NONE;
+
        return x86_pmu.hw_config(event);
 }
 
@@ -747,8 +761,8 @@ static void x86_pmu_disable(struct pmu *pmu)
 static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc,
                                          u64 enable_mask)
 {
-       if (hwc->extra_reg)
-               wrmsrl(hwc->extra_reg, hwc->extra_config);
+       if (hwc->extra_reg.reg)
+               wrmsrl(hwc->extra_reg.reg, hwc->extra_reg.config);
        wrmsrl(hwc->config_base, hwc->config | enable_mask);
 }
 
@@ -1332,7 +1346,7 @@ static int x86_pmu_handle_irq(struct pt_regs *regs)
                if (!x86_perf_event_set_period(event))
                        continue;
 
-               if (perf_event_overflow(event, 1, &data, regs))
+               if (perf_event_overflow(event, &data, regs))
                        x86_pmu_stop(event, 0);
        }
 
@@ -1637,6 +1651,40 @@ static int x86_pmu_commit_txn(struct pmu *pmu)
        perf_pmu_enable(pmu);
        return 0;
 }
+/*
+ * a fake_cpuc is used to validate event groups. Due to
+ * the extra reg logic, we need to also allocate a fake
+ * per_core and per_cpu structure. Otherwise, group events
+ * using extra reg may conflict without the kernel being
+ * able to catch this when the last event gets added to
+ * the group.
+ */
+static void free_fake_cpuc(struct cpu_hw_events *cpuc)
+{
+       kfree(cpuc->shared_regs);
+       kfree(cpuc);
+}
+
+static struct cpu_hw_events *allocate_fake_cpuc(void)
+{
+       struct cpu_hw_events *cpuc;
+       int cpu = raw_smp_processor_id();
+
+       cpuc = kzalloc(sizeof(*cpuc), GFP_KERNEL);
+       if (!cpuc)
+               return ERR_PTR(-ENOMEM);
+
+       /* only needed, if we have extra_regs */
+       if (x86_pmu.extra_regs) {
+               cpuc->shared_regs = allocate_shared_regs(cpu);
+               if (!cpuc->shared_regs)
+                       goto error;
+       }
+       return cpuc;
+error:
+       free_fake_cpuc(cpuc);
+       return ERR_PTR(-ENOMEM);
+}
 
 /*
  * validate that we can schedule this event
@@ -1647,9 +1695,9 @@ static int validate_event(struct perf_event *event)
        struct event_constraint *c;
        int ret = 0;
 
-       fake_cpuc = kmalloc(sizeof(*fake_cpuc), GFP_KERNEL | __GFP_ZERO);
-       if (!fake_cpuc)
-               return -ENOMEM;
+       fake_cpuc = allocate_fake_cpuc();
+       if (IS_ERR(fake_cpuc))
+               return PTR_ERR(fake_cpuc);
 
        c = x86_pmu.get_event_constraints(fake_cpuc, event);
 
@@ -1659,7 +1707,7 @@ static int validate_event(struct perf_event *event)
        if (x86_pmu.put_event_constraints)
                x86_pmu.put_event_constraints(fake_cpuc, event);
 
-       kfree(fake_cpuc);
+       free_fake_cpuc(fake_cpuc);
 
        return ret;
 }
@@ -1679,36 +1727,32 @@ static int validate_group(struct perf_event *event)
 {
        struct perf_event *leader = event->group_leader;
        struct cpu_hw_events *fake_cpuc;
-       int ret, n;
-
-       ret = -ENOMEM;
-       fake_cpuc = kmalloc(sizeof(*fake_cpuc), GFP_KERNEL | __GFP_ZERO);
-       if (!fake_cpuc)
-               goto out;
+       int ret = -ENOSPC, n;
 
+       fake_cpuc = allocate_fake_cpuc();
+       if (IS_ERR(fake_cpuc))
+               return PTR_ERR(fake_cpuc);
        /*
         * the event is not yet connected with its
         * siblings therefore we must first collect
         * existing siblings, then add the new event
         * before we can simulate the scheduling
         */
-       ret = -ENOSPC;
        n = collect_events(fake_cpuc, leader, true);
        if (n < 0)
-               goto out_free;
+               goto out;
 
        fake_cpuc->n_events = n;
        n = collect_events(fake_cpuc, event, false);
        if (n < 0)
-               goto out_free;
+               goto out;
 
        fake_cpuc->n_events = n;
 
        ret = x86_pmu.schedule_events(fake_cpuc, n, NULL);
 
-out_free:
-       kfree(fake_cpuc);
 out:
+       free_fake_cpuc(fake_cpuc);
        return ret;
 }
 
index fe29c1d2219ecfa80d325d856a5db56241213150..941caa2e449b30a6c549a6db50457828c13e6fbb 100644 (file)
@@ -89,6 +89,20 @@ static __initconst const u64 amd_hw_cache_event_ids
                [ C(RESULT_MISS)   ] = -1,
        },
  },
+ [ C(NODE) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0xb8e9, /* CPU Request to Memory, l+r */
+               [ C(RESULT_MISS)   ] = 0x98e9, /* CPU Request to Memory, r   */
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = -1,
+               [ C(RESULT_MISS)   ] = -1,
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = -1,
+               [ C(RESULT_MISS)   ] = -1,
+       },
+ },
 };
 
 /*
index 41178c826c48507cac37d32f234b0f66a4c99708..45fbb8f7f549e1cd447228731df4c8d952c4b6e3 100644 (file)
@@ -1,25 +1,15 @@
 #ifdef CONFIG_CPU_SUP_INTEL
 
-#define MAX_EXTRA_REGS 2
-
-/*
- * Per register state.
- */
-struct er_account {
-       int                     ref;            /* reference count */
-       unsigned int            extra_reg;      /* extra MSR number */
-       u64                     extra_config;   /* extra MSR config */
-};
-
 /*
- * Per core state
- * This used to coordinate shared registers for HT threads.
+ * Per core/cpu state
+ *
+ * Used to coordinate shared registers between HT threads or
+ * among events on a single PMU.
  */
-struct intel_percore {
-       raw_spinlock_t          lock;           /* protect structure */
-       struct er_account       regs[MAX_EXTRA_REGS];
-       int                     refcnt;         /* number of threads */
-       unsigned                core_id;
+struct intel_shared_regs {
+       struct er_account       regs[EXTRA_REG_MAX];
+       int                     refcnt;         /* per-core: #HT threads */
+       unsigned                core_id;        /* per-core: core id */
 };
 
 /*
@@ -88,16 +78,10 @@ static struct event_constraint intel_nehalem_event_constraints[] __read_mostly =
 
 static struct extra_reg intel_nehalem_extra_regs[] __read_mostly =
 {
-       INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff),
+       INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0),
        EVENT_EXTRA_END
 };
 
-static struct event_constraint intel_nehalem_percore_constraints[] __read_mostly =
-{
-       INTEL_EVENT_CONSTRAINT(0xb7, 0),
-       EVENT_CONSTRAINT_END
-};
-
 static struct event_constraint intel_westmere_event_constraints[] __read_mostly =
 {
        FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
@@ -116,8 +100,6 @@ static struct event_constraint intel_snb_event_constraints[] __read_mostly =
        FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
        /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */
        INTEL_EVENT_CONSTRAINT(0x48, 0x4), /* L1D_PEND_MISS.PENDING */
-       INTEL_EVENT_CONSTRAINT(0xb7, 0x1), /* OFF_CORE_RESPONSE_0 */
-       INTEL_EVENT_CONSTRAINT(0xbb, 0x8), /* OFF_CORE_RESPONSE_1 */
        INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */
        INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */
        EVENT_CONSTRAINT_END
@@ -125,15 +107,13 @@ static struct event_constraint intel_snb_event_constraints[] __read_mostly =
 
 static struct extra_reg intel_westmere_extra_regs[] __read_mostly =
 {
-       INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff),
-       INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0xffff),
+       INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0),
+       INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0xffff, RSP_1),
        EVENT_EXTRA_END
 };
 
-static struct event_constraint intel_westmere_percore_constraints[] __read_mostly =
+static struct event_constraint intel_v1_event_constraints[] __read_mostly =
 {
-       INTEL_EVENT_CONSTRAINT(0xb7, 0),
-       INTEL_EVENT_CONSTRAINT(0xbb, 0),
        EVENT_CONSTRAINT_END
 };
 
@@ -145,6 +125,12 @@ static struct event_constraint intel_gen_event_constraints[] __read_mostly =
        EVENT_CONSTRAINT_END
 };
 
+static struct extra_reg intel_snb_extra_regs[] __read_mostly = {
+       INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0x3fffffffffull, RSP_0),
+       INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0x3fffffffffull, RSP_1),
+       EVENT_EXTRA_END
+};
+
 static u64 intel_pmu_event_map(int hw_event)
 {
        return intel_perfmon_event_map[hw_event];
@@ -245,6 +231,21 @@ static __initconst const u64 snb_hw_cache_event_ids
                [ C(RESULT_MISS)   ] = -1,
        },
  },
+ [ C(NODE) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = -1,
+               [ C(RESULT_MISS)   ] = -1,
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = -1,
+               [ C(RESULT_MISS)   ] = -1,
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = -1,
+               [ C(RESULT_MISS)   ] = -1,
+       },
+ },
+
 };
 
 static __initconst const u64 westmere_hw_cache_event_ids
@@ -346,6 +347,20 @@ static __initconst const u64 westmere_hw_cache_event_ids
                [ C(RESULT_MISS)   ] = -1,
        },
  },
+ [ C(NODE) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0x01b7,
+               [ C(RESULT_MISS)   ] = 0x01b7,
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = 0x01b7,
+               [ C(RESULT_MISS)   ] = 0x01b7,
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = 0x01b7,
+               [ C(RESULT_MISS)   ] = 0x01b7,
+       },
+ },
 };
 
 /*
@@ -398,7 +413,21 @@ static __initconst const u64 nehalem_hw_cache_extra_regs
                [ C(RESULT_ACCESS) ] = NHM_DMND_PREFETCH|NHM_L3_ACCESS,
                [ C(RESULT_MISS)   ] = NHM_DMND_PREFETCH|NHM_L3_MISS,
        },
- }
+ },
+ [ C(NODE) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = NHM_DMND_READ|NHM_ALL_DRAM,
+               [ C(RESULT_MISS)   ] = NHM_DMND_READ|NHM_REMOTE_DRAM,
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = NHM_DMND_WRITE|NHM_ALL_DRAM,
+               [ C(RESULT_MISS)   ] = NHM_DMND_WRITE|NHM_REMOTE_DRAM,
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = NHM_DMND_PREFETCH|NHM_ALL_DRAM,
+               [ C(RESULT_MISS)   ] = NHM_DMND_PREFETCH|NHM_REMOTE_DRAM,
+       },
+ },
 };
 
 static __initconst const u64 nehalem_hw_cache_event_ids
@@ -500,6 +529,20 @@ static __initconst const u64 nehalem_hw_cache_event_ids
                [ C(RESULT_MISS)   ] = -1,
        },
  },
+ [ C(NODE) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0x01b7,
+               [ C(RESULT_MISS)   ] = 0x01b7,
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = 0x01b7,
+               [ C(RESULT_MISS)   ] = 0x01b7,
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = 0x01b7,
+               [ C(RESULT_MISS)   ] = 0x01b7,
+       },
+ },
 };
 
 static __initconst const u64 core2_hw_cache_event_ids
@@ -1003,7 +1046,7 @@ again:
 
                data.period = event->hw.last_period;
 
-               if (perf_event_overflow(event, 1, &data, regs))
+               if (perf_event_overflow(event, &data, regs))
                        x86_pmu_stop(event, 0);
        }
 
@@ -1037,65 +1080,121 @@ intel_bts_constraints(struct perf_event *event)
        return NULL;
 }
 
+static bool intel_try_alt_er(struct perf_event *event, int orig_idx)
+{
+       if (!(x86_pmu.er_flags & ERF_HAS_RSP_1))
+               return false;
+
+       if (event->hw.extra_reg.idx == EXTRA_REG_RSP_0) {
+               event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
+               event->hw.config |= 0x01bb;
+               event->hw.extra_reg.idx = EXTRA_REG_RSP_1;
+               event->hw.extra_reg.reg = MSR_OFFCORE_RSP_1;
+       } else if (event->hw.extra_reg.idx == EXTRA_REG_RSP_1) {
+               event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
+               event->hw.config |= 0x01b7;
+               event->hw.extra_reg.idx = EXTRA_REG_RSP_0;
+               event->hw.extra_reg.reg = MSR_OFFCORE_RSP_0;
+       }
+
+       if (event->hw.extra_reg.idx == orig_idx)
+               return false;
+
+       return true;
+}
+
+/*
+ * manage allocation of shared extra msr for certain events
+ *
+ * sharing can be:
+ * per-cpu: to be shared between the various events on a single PMU
+ * per-core: per-cpu + shared by HT threads
+ */
 static struct event_constraint *
-intel_percore_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
+__intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc,
+                                  struct perf_event *event)
 {
-       struct hw_perf_event *hwc = &event->hw;
-       unsigned int e = hwc->config & ARCH_PERFMON_EVENTSEL_EVENT;
-       struct event_constraint *c;
-       struct intel_percore *pc;
+       struct event_constraint *c = &emptyconstraint;
+       struct hw_perf_event_extra *reg = &event->hw.extra_reg;
        struct er_account *era;
-       int i;
-       int free_slot;
-       int found;
+       unsigned long flags;
+       int orig_idx = reg->idx;
 
-       if (!x86_pmu.percore_constraints || hwc->extra_alloc)
-               return NULL;
+       /* already allocated shared msr */
+       if (reg->alloc)
+               return &unconstrained;
 
-       for (c = x86_pmu.percore_constraints; c->cmask; c++) {
-               if (e != c->code)
-                       continue;
+again:
+       era = &cpuc->shared_regs->regs[reg->idx];
+       /*
+        * we use spin_lock_irqsave() to avoid lockdep issues when
+        * passing a fake cpuc
+        */
+       raw_spin_lock_irqsave(&era->lock, flags);
+
+       if (!atomic_read(&era->ref) || era->config == reg->config) {
+
+               /* lock in msr value */
+               era->config = reg->config;
+               era->reg = reg->reg;
+
+               /* one more user */
+               atomic_inc(&era->ref);
+
+               /* no need to reallocate during incremental event scheduling */
+               reg->alloc = 1;
 
                /*
-                * Allocate resource per core.
+                * All events using extra_reg are unconstrained.
+                * Avoids calling x86_get_event_constraints()
+                *
+                * Must revisit if extra_reg controlling events
+                * ever have constraints. Worst case we go through
+                * the regular event constraint table.
                 */
-               pc = cpuc->per_core;
-               if (!pc)
-                       break;
-               c = &emptyconstraint;
-               raw_spin_lock(&pc->lock);
-               free_slot = -1;
-               found = 0;
-               for (i = 0; i < MAX_EXTRA_REGS; i++) {
-                       era = &pc->regs[i];
-                       if (era->ref > 0 && hwc->extra_reg == era->extra_reg) {
-                               /* Allow sharing same config */
-                               if (hwc->extra_config == era->extra_config) {
-                                       era->ref++;
-                                       cpuc->percore_used = 1;
-                                       hwc->extra_alloc = 1;
-                                       c = NULL;
-                               }
-                               /* else conflict */
-                               found = 1;
-                               break;
-                       } else if (era->ref == 0 && free_slot == -1)
-                               free_slot = i;
-               }
-               if (!found && free_slot != -1) {
-                       era = &pc->regs[free_slot];
-                       era->ref = 1;
-                       era->extra_reg = hwc->extra_reg;
-                       era->extra_config = hwc->extra_config;
-                       cpuc->percore_used = 1;
-                       hwc->extra_alloc = 1;
-                       c = NULL;
-               }
-               raw_spin_unlock(&pc->lock);
-               return c;
+               c = &unconstrained;
+       } else if (intel_try_alt_er(event, orig_idx)) {
+               raw_spin_unlock(&era->lock);
+               goto again;
        }
+       raw_spin_unlock_irqrestore(&era->lock, flags);
 
-       return NULL;
+       return c;
+}
+
+static void
+__intel_shared_reg_put_constraints(struct cpu_hw_events *cpuc,
+                                  struct hw_perf_event_extra *reg)
+{
+       struct er_account *era;
+
+       /*
+        * only put constraint if extra reg was actually
+        * allocated. Also takes care of event which do
+        * not use an extra shared reg
+        */
+       if (!reg->alloc)
+               return;
+
+       era = &cpuc->shared_regs->regs[reg->idx];
+
+       /* one fewer user */
+       atomic_dec(&era->ref);
+
+       /* allocate again next time */
+       reg->alloc = 0;
+}
+
+static struct event_constraint *
+intel_shared_regs_constraints(struct cpu_hw_events *cpuc,
+                             struct perf_event *event)
+{
+       struct event_constraint *c = NULL;
+
+       if (event->hw.extra_reg.idx != EXTRA_REG_NONE)
+               c = __intel_shared_reg_get_constraints(cpuc, event);
+
+       return c;
 }
 
 static struct event_constraint *
@@ -1111,49 +1210,28 @@ intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event
        if (c)
                return c;
 
-       c = intel_percore_constraints(cpuc, event);
+       c = intel_shared_regs_constraints(cpuc, event);
        if (c)
                return c;
 
        return x86_get_event_constraints(cpuc, event);
 }
 
-static void intel_put_event_constraints(struct cpu_hw_events *cpuc,
+static void
+intel_put_shared_regs_event_constraints(struct cpu_hw_events *cpuc,
                                        struct perf_event *event)
 {
-       struct extra_reg *er;
-       struct intel_percore *pc;
-       struct er_account *era;
-       struct hw_perf_event *hwc = &event->hw;
-       int i, allref;
+       struct hw_perf_event_extra *reg;
 
-       if (!cpuc->percore_used)
-               return;
-
-       for (er = x86_pmu.extra_regs; er->msr; er++) {
-               if (er->event != (hwc->config & er->config_mask))
-                       continue;
+       reg = &event->hw.extra_reg;
+       if (reg->idx != EXTRA_REG_NONE)
+               __intel_shared_reg_put_constraints(cpuc, reg);
+}
 
-               pc = cpuc->per_core;
-               raw_spin_lock(&pc->lock);
-               for (i = 0; i < MAX_EXTRA_REGS; i++) {
-                       era = &pc->regs[i];
-                       if (era->ref > 0 &&
-                           era->extra_config == hwc->extra_config &&
-                           era->extra_reg == er->msr) {
-                               era->ref--;
-                               hwc->extra_alloc = 0;
-                               break;
-                       }
-               }
-               allref = 0;
-               for (i = 0; i < MAX_EXTRA_REGS; i++)
-                       allref += pc->regs[i].ref;
-               if (allref == 0)
-                       cpuc->percore_used = 0;
-               raw_spin_unlock(&pc->lock);
-               break;
-       }
+static void intel_put_event_constraints(struct cpu_hw_events *cpuc,
+                                       struct perf_event *event)
+{
+       intel_put_shared_regs_event_constraints(cpuc, event);
 }
 
 static int intel_pmu_hw_config(struct perf_event *event)
@@ -1231,20 +1309,36 @@ static __initconst const struct x86_pmu core_pmu = {
        .event_constraints      = intel_core_event_constraints,
 };
 
+static struct intel_shared_regs *allocate_shared_regs(int cpu)
+{
+       struct intel_shared_regs *regs;
+       int i;
+
+       regs = kzalloc_node(sizeof(struct intel_shared_regs),
+                           GFP_KERNEL, cpu_to_node(cpu));
+       if (regs) {
+               /*
+                * initialize the locks to keep lockdep happy
+                */
+               for (i = 0; i < EXTRA_REG_MAX; i++)
+                       raw_spin_lock_init(&regs->regs[i].lock);
+
+               regs->core_id = -1;
+       }
+       return regs;
+}
+
 static int intel_pmu_cpu_prepare(int cpu)
 {
        struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
 
-       if (!cpu_has_ht_siblings())
+       if (!x86_pmu.extra_regs)
                return NOTIFY_OK;
 
-       cpuc->per_core = kzalloc_node(sizeof(struct intel_percore),
-                                     GFP_KERNEL, cpu_to_node(cpu));
-       if (!cpuc->per_core)
+       cpuc->shared_regs = allocate_shared_regs(cpu);
+       if (!cpuc->shared_regs)
                return NOTIFY_BAD;
 
-       raw_spin_lock_init(&cpuc->per_core->lock);
-       cpuc->per_core->core_id = -1;
        return NOTIFY_OK;
 }
 
@@ -1260,32 +1354,34 @@ static void intel_pmu_cpu_starting(int cpu)
         */
        intel_pmu_lbr_reset();
 
-       if (!cpu_has_ht_siblings())
+       if (!cpuc->shared_regs || (x86_pmu.er_flags & ERF_NO_HT_SHARING))
                return;
 
        for_each_cpu(i, topology_thread_cpumask(cpu)) {
-               struct intel_percore *pc = per_cpu(cpu_hw_events, i).per_core;
+               struct intel_shared_regs *pc;
 
+               pc = per_cpu(cpu_hw_events, i).shared_regs;
                if (pc && pc->core_id == core_id) {
-                       kfree(cpuc->per_core);
-                       cpuc->per_core = pc;
+                       kfree(cpuc->shared_regs);
+                       cpuc->shared_regs = pc;
                        break;
                }
        }
 
-       cpuc->per_core->core_id = core_id;
-       cpuc->per_core->refcnt++;
+       cpuc->shared_regs->core_id = core_id;
+       cpuc->shared_regs->refcnt++;
 }
 
 static void intel_pmu_cpu_dying(int cpu)
 {
        struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
-       struct intel_percore *pc = cpuc->per_core;
+       struct intel_shared_regs *pc;
 
+       pc = cpuc->shared_regs;
        if (pc) {
                if (pc->core_id == -1 || --pc->refcnt == 0)
                        kfree(pc);
-               cpuc->per_core = NULL;
+               cpuc->shared_regs = NULL;
        }
 
        fini_debug_store_on_cpu(cpu);
@@ -1436,7 +1532,6 @@ static __init int intel_pmu_init(void)
 
                x86_pmu.event_constraints = intel_nehalem_event_constraints;
                x86_pmu.pebs_constraints = intel_nehalem_pebs_event_constraints;
-               x86_pmu.percore_constraints = intel_nehalem_percore_constraints;
                x86_pmu.enable_all = intel_pmu_nhm_enable_all;
                x86_pmu.extra_regs = intel_nehalem_extra_regs;
 
@@ -1481,10 +1576,10 @@ static __init int intel_pmu_init(void)
                intel_pmu_lbr_init_nhm();
 
                x86_pmu.event_constraints = intel_westmere_event_constraints;
-               x86_pmu.percore_constraints = intel_westmere_percore_constraints;
                x86_pmu.enable_all = intel_pmu_nhm_enable_all;
                x86_pmu.pebs_constraints = intel_westmere_pebs_event_constraints;
                x86_pmu.extra_regs = intel_westmere_extra_regs;
+               x86_pmu.er_flags |= ERF_HAS_RSP_1;
 
                /* UOPS_ISSUED.STALLED_CYCLES */
                intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x180010e;
@@ -1502,6 +1597,10 @@ static __init int intel_pmu_init(void)
 
                x86_pmu.event_constraints = intel_snb_event_constraints;
                x86_pmu.pebs_constraints = intel_snb_pebs_events;
+               x86_pmu.extra_regs = intel_snb_extra_regs;
+               /* all extra regs are per-cpu when HT is on */
+               x86_pmu.er_flags |= ERF_HAS_RSP_1;
+               x86_pmu.er_flags |= ERF_NO_HT_SHARING;
 
                /* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */
                intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x180010e;
@@ -1512,11 +1611,19 @@ static __init int intel_pmu_init(void)
                break;
 
        default:
-               /*
-                * default constraints for v2 and up
-                */
-               x86_pmu.event_constraints = intel_gen_event_constraints;
-               pr_cont("generic architected perfmon, ");
+               switch (x86_pmu.version) {
+               case 1:
+                       x86_pmu.event_constraints = intel_v1_event_constraints;
+                       pr_cont("generic architected perfmon v1, ");
+                       break;
+               default:
+                       /*
+                        * default constraints for v2 and up
+                        */
+                       x86_pmu.event_constraints = intel_gen_event_constraints;
+                       pr_cont("generic architected perfmon, ");
+                       break;
+               }
        }
        return 0;
 }
@@ -1528,4 +1635,8 @@ static int intel_pmu_init(void)
        return 0;
 }
 
+static struct intel_shared_regs *allocate_shared_regs(int cpu)
+{
+       return NULL;
+}
 #endif /* CONFIG_CPU_SUP_INTEL */
index bab491b8ee25e37b81d33e8051699f099326c717..1b1ef3addcfdbff04f692ec6c19c984d9a912b0a 100644 (file)
@@ -340,7 +340,7 @@ static int intel_pmu_drain_bts_buffer(void)
         */
        perf_prepare_sample(&header, &data, event, &regs);
 
-       if (perf_output_begin(&handle, event, header.size * (top - at), 1, 1))
+       if (perf_output_begin(&handle, event, header.size * (top - at)))
                return 1;
 
        for (; at < top; at++) {
@@ -616,7 +616,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
        else
                regs.flags &= ~PERF_EFLAGS_EXACT;
 
-       if (perf_event_overflow(event, 1, &data, &regs))
+       if (perf_event_overflow(event, &data, &regs))
                x86_pmu_stop(event, 0);
 }
 
index ead584fb6a7d4870a606b4dace503c8c63763293..7809d2bcb2091fe5975053234c85c09f3ffa1bd0 100644 (file)
@@ -554,13 +554,102 @@ static __initconst const u64 p4_hw_cache_event_ids
                [ C(RESULT_MISS)   ] = -1,
        },
  },
+ [ C(NODE) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = -1,
+               [ C(RESULT_MISS)   ] = -1,
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = -1,
+               [ C(RESULT_MISS)   ] = -1,
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = -1,
+               [ C(RESULT_MISS)   ] = -1,
+       },
+ },
 };
 
+/*
+ * Because of Netburst being quite restricted in how many
+ * identical events may run simultaneously, we introduce event aliases,
+ * ie the different events which have the same functionality but
+ * utilize non-intersected resources (ESCR/CCCR/counter registers).
+ *
+ * This allow us to relax restrictions a bit and run two or more
+ * identical events together.
+ *
+ * Never set any custom internal bits such as P4_CONFIG_HT,
+ * P4_CONFIG_ALIASABLE or bits for P4_PEBS_METRIC, they are
+ * either up to date automatically or not applicable at all.
+ */
+struct p4_event_alias {
+       u64 original;
+       u64 alternative;
+} p4_event_aliases[] = {
+       {
+               /*
+                * Non-halted cycles can be substituted with non-sleeping cycles (see
+                * Intel SDM Vol3b for details). We need this alias to be able
+                * to run nmi-watchdog and 'perf top' (or any other user space tool
+                * which is interested in running PERF_COUNT_HW_CPU_CYCLES)
+                * simultaneously.
+                */
+       .original       =
+               p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_GLOBAL_POWER_EVENTS)         |
+                                   P4_ESCR_EMASK_BIT(P4_EVENT_GLOBAL_POWER_EVENTS, RUNNING)),
+       .alternative    =
+               p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_EXECUTION_EVENT)             |
+                                   P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS0)|
+                                   P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS1)|
+                                   P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS2)|
+                                   P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS3)|
+                                   P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS0) |
+                                   P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS1) |
+                                   P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS2) |
+                                   P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS3))|
+               p4_config_pack_cccr(P4_CCCR_THRESHOLD(15) | P4_CCCR_COMPLEMENT          |
+                                   P4_CCCR_COMPARE),
+       },
+};
+
+static u64 p4_get_alias_event(u64 config)
+{
+       u64 config_match;
+       int i;
+
+       /*
+        * Only event with special mark is allowed,
+        * we're to be sure it didn't come as malformed
+        * RAW event.
+        */
+       if (!(config & P4_CONFIG_ALIASABLE))
+               return 0;
+
+       config_match = config & P4_CONFIG_EVENT_ALIAS_MASK;
+
+       for (i = 0; i < ARRAY_SIZE(p4_event_aliases); i++) {
+               if (config_match == p4_event_aliases[i].original) {
+                       config_match = p4_event_aliases[i].alternative;
+                       break;
+               } else if (config_match == p4_event_aliases[i].alternative) {
+                       config_match = p4_event_aliases[i].original;
+                       break;
+               }
+       }
+
+       if (i >= ARRAY_SIZE(p4_event_aliases))
+               return 0;
+
+       return config_match | (config & P4_CONFIG_EVENT_ALIAS_IMMUTABLE_BITS);
+}
+
 static u64 p4_general_events[PERF_COUNT_HW_MAX] = {
   /* non-halted CPU clocks */
   [PERF_COUNT_HW_CPU_CYCLES] =
        p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_GLOBAL_POWER_EVENTS)         |
-               P4_ESCR_EMASK_BIT(P4_EVENT_GLOBAL_POWER_EVENTS, RUNNING)),
+               P4_ESCR_EMASK_BIT(P4_EVENT_GLOBAL_POWER_EVENTS, RUNNING))       |
+               P4_CONFIG_ALIASABLE,
 
   /*
    * retired instructions
@@ -945,7 +1034,7 @@ static int p4_pmu_handle_irq(struct pt_regs *regs)
 
                if (!x86_perf_event_set_period(event))
                        continue;
-               if (perf_event_overflow(event, 1, &data, regs))
+               if (perf_event_overflow(event, &data, regs))
                        x86_pmu_stop(event, 0);
        }
 
@@ -1120,6 +1209,8 @@ static int p4_pmu_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign
        struct p4_event_bind *bind;
        unsigned int i, thread, num;
        int cntr_idx, escr_idx;
+       u64 config_alias;
+       int pass;
 
        bitmap_zero(used_mask, X86_PMC_IDX_MAX);
        bitmap_zero(escr_mask, P4_ESCR_MSR_TABLE_SIZE);
@@ -1128,6 +1219,17 @@ static int p4_pmu_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign
 
                hwc = &cpuc->event_list[i]->hw;
                thread = p4_ht_thread(cpu);
+               pass = 0;
+
+again:
+               /*
+                * It's possible to hit a circular lock
+                * between original and alternative events
+                * if both are scheduled already.
+                */
+               if (pass > 2)
+                       goto done;
+
                bind = p4_config_get_bind(hwc->config);
                escr_idx = p4_get_escr_idx(bind->escr_msr[thread]);
                if (unlikely(escr_idx == -1))
@@ -1141,8 +1243,17 @@ static int p4_pmu_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign
                }
 
                cntr_idx = p4_next_cntr(thread, used_mask, bind);
-               if (cntr_idx == -1 || test_bit(escr_idx, escr_mask))
-                       goto done;
+               if (cntr_idx == -1 || test_bit(escr_idx, escr_mask)) {
+                       /*
+                        * Check whether an event alias is still available.
+                        */
+                       config_alias = p4_get_alias_event(hwc->config);
+                       if (!config_alias)
+                               goto done;
+                       hwc->config = config_alias;
+                       pass++;
+                       goto again;
+               }
 
                p4_pmu_swap_config_ts(hwc, cpu);
                if (assign)
index e71c98d3c0d2d7ff4fc2085d646d31b56bc6a476..19853ad8afc5ff940cbf6806b6ae283695296cd7 100644 (file)
@@ -104,34 +104,6 @@ in_irq_stack(unsigned long *stack, unsigned long *irq_stack,
        return (stack >= irq_stack && stack < irq_stack_end);
 }
 
-/*
- * We are returning from the irq stack and go to the previous one.
- * If the previous stack is also in the irq stack, then bp in the first
- * frame of the irq stack points to the previous, interrupted one.
- * Otherwise we have another level of indirection: We first save
- * the bp of the previous stack, then we switch the stack to the irq one
- * and save a new bp that links to the previous one.
- * (See save_args())
- */
-static inline unsigned long
-fixup_bp_irq_link(unsigned long bp, unsigned long *stack,
-                 unsigned long *irq_stack, unsigned long *irq_stack_end)
-{
-#ifdef CONFIG_FRAME_POINTER
-       struct stack_frame *frame = (struct stack_frame *)bp;
-       unsigned long next;
-
-       if (!in_irq_stack(stack, irq_stack, irq_stack_end)) {
-               if (!probe_kernel_address(&frame->next_frame, next))
-                       return next;
-               else
-                       WARN_ONCE(1, "Perf: bad frame pointer = %p in "
-                                 "callchain\n", &frame->next_frame);
-       }
-#endif
-       return bp;
-}
-
 /*
  * x86-64 can have up to three kernel stacks:
  * process stack
@@ -155,9 +127,12 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
                task = current;
 
        if (!stack) {
-               stack = &dummy;
-               if (task && task != current)
+               if (regs)
+                       stack = (unsigned long *)regs->sp;
+               else if (task && task != current)
                        stack = (unsigned long *)task->thread.sp;
+               else
+                       stack = &dummy;
        }
 
        if (!bp)
@@ -205,8 +180,6 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
                                 * pointer (index -1 to end) in the IRQ stack:
                                 */
                                stack = (unsigned long *) (irq_stack_end[-1]);
-                               bp = fixup_bp_irq_link(bp, stack, irq_stack,
-                                                      irq_stack_end);
                                irq_stack_end = NULL;
                                ops->stack(data, "EOI");
                                continue;
index 8a445a0c989e095bc84557861d4a4e73e09e3e90..d656f68371a4afdac2cd98b7a06cfac88f4f5769 100644 (file)
@@ -297,27 +297,26 @@ ENDPROC(native_usergs_sysret64)
        .endm
 
 /* save partial stack frame */
-       .pushsection .kprobes.text, "ax"
-ENTRY(save_args)
-       XCPT_FRAME
+       .macro SAVE_ARGS_IRQ
        cld
-       /*
-        * start from rbp in pt_regs and jump over
-        * return address.
-        */
-       movq_cfi rdi, RDI+8-RBP
-       movq_cfi rsi, RSI+8-RBP
-       movq_cfi rdx, RDX+8-RBP
-       movq_cfi rcx, RCX+8-RBP
-       movq_cfi rax, RAX+8-RBP
-       movq_cfi  r8,  R8+8-RBP
-       movq_cfi  r9,  R9+8-RBP
-       movq_cfi r10, R10+8-RBP
-       movq_cfi r11, R11+8-RBP
-
-       leaq -RBP+8(%rsp),%rdi  /* arg1 for handler */
-       movq_cfi rbp, 8         /* push %rbp */
-       leaq 8(%rsp), %rbp              /* mov %rsp, %ebp */
+       /* start from rbp in pt_regs and jump over */
+       movq_cfi rdi, RDI-RBP
+       movq_cfi rsi, RSI-RBP
+       movq_cfi rdx, RDX-RBP
+       movq_cfi rcx, RCX-RBP
+       movq_cfi rax, RAX-RBP
+       movq_cfi  r8,  R8-RBP
+       movq_cfi  r9,  R9-RBP
+       movq_cfi r10, R10-RBP
+       movq_cfi r11, R11-RBP
+
+       /* Save rbp so that we can unwind from get_irq_regs() */
+       movq_cfi rbp, 0
+
+       /* Save previous stack value */
+       movq %rsp, %rsi
+
+       leaq -RBP(%rsp),%rdi    /* arg1 for handler */
        testl $3, CS(%rdi)
        je 1f
        SWAPGS
@@ -329,19 +328,14 @@ ENTRY(save_args)
         */
 1:     incl PER_CPU_VAR(irq_count)
        jne 2f
-       popq_cfi %rax                   /* move return address... */
        mov PER_CPU_VAR(irq_stack_ptr),%rsp
        EMPTY_FRAME 0
-       pushq_cfi %rbp                  /* backlink for unwinder */
-       pushq_cfi %rax                  /* ... to the new stack */
-       /*
-        * We entered an interrupt context - irqs are off:
-        */
-2:     TRACE_IRQS_OFF
-       ret
-       CFI_ENDPROC
-END(save_args)
-       .popsection
+
+2:     /* Store previous stack value */
+       pushq %rsi
+       /* We entered an interrupt context - irqs are off: */
+       TRACE_IRQS_OFF
+       .endm
 
 ENTRY(save_rest)
        PARTIAL_FRAME 1 REST_SKIP+8
@@ -791,7 +785,7 @@ END(interrupt)
        /* reserve pt_regs for scratch regs and rbp */
        subq $ORIG_RAX-RBP, %rsp
        CFI_ADJUST_CFA_OFFSET ORIG_RAX-RBP
-       call save_args
+       SAVE_ARGS_IRQ
        PARTIAL_FRAME 0
        call \func
        .endm
@@ -814,15 +808,14 @@ ret_from_intr:
        DISABLE_INTERRUPTS(CLBR_NONE)
        TRACE_IRQS_OFF
        decl PER_CPU_VAR(irq_count)
-       leaveq
 
-       CFI_RESTORE             rbp
+       /* Restore saved previous stack */
+       popq %rsi
+       leaq 16(%rsi), %rsp
+
        CFI_DEF_CFA_REGISTER    rsp
-       CFI_ADJUST_CFA_OFFSET   -8
+       CFI_ADJUST_CFA_OFFSET   -16
 
-       /* we did not save rbx, restore only from ARGOFFSET */
-       addq $8, %rsp
-       CFI_ADJUST_CFA_OFFSET   -8
 exit_intr:
        GET_THREAD_INFO(%rcx)
        testl $3,CS-ARGOFFSET(%rsp)
index 5f9ecff328b5f9604eb4d053e21b19c7d2618c88..00354d4919a973fdc307427b8a2cd17bfaf94aa6 100644 (file)
@@ -608,7 +608,7 @@ int kgdb_arch_init(void)
        return register_die_notifier(&kgdb_notifier);
 }
 
-static void kgdb_hw_overflow_handler(struct perf_event *event, int nmi,
+static void kgdb_hw_overflow_handler(struct perf_event *event,
                struct perf_sample_data *data, struct pt_regs *regs)
 {
        struct task_struct *tsk = current;
@@ -638,7 +638,7 @@ void kgdb_arch_late(void)
        for (i = 0; i < HBP_NUM; i++) {
                if (breakinfo[i].pev)
                        continue;
-               breakinfo[i].pev = register_wide_hw_breakpoint(&attr, NULL);
+               breakinfo[i].pev = register_wide_hw_breakpoint(&attr, NULL, NULL);
                if (IS_ERR((void * __force)breakinfo[i].pev)) {
                        printk(KERN_ERR "kgdb: Could not allocate hw"
                               "breakpoints\nDisabling the kernel debugger\n");
index 807c2a2b80f12d711240cc56b4b292b9cb616b89..82528799c5deece6e4da9758a520b82420763bfc 100644 (file)
@@ -528,7 +528,7 @@ static int genregs_set(struct task_struct *target,
        return ret;
 }
 
-static void ptrace_triggered(struct perf_event *bp, int nmi,
+static void ptrace_triggered(struct perf_event *bp,
                             struct perf_sample_data *data,
                             struct pt_regs *regs)
 {
@@ -715,7 +715,8 @@ static int ptrace_set_breakpoint_addr(struct task_struct *tsk, int nr,
                attr.bp_type = HW_BREAKPOINT_W;
                attr.disabled = 1;
 
-               bp = register_user_hw_breakpoint(&attr, ptrace_triggered, tsk);
+               bp = register_user_hw_breakpoint(&attr, ptrace_triggered,
+                                                NULL, tsk);
 
                /*
                 * CHECKME: the previous code returned -EIO if the addr wasn't
index 55d9bc03f696c1d37638575bd504fbf8bc2e1444..fdd0c6430e5abc97c878d192439447298b2fd06c 100644 (file)
@@ -66,7 +66,7 @@ void save_stack_trace(struct stack_trace *trace)
 }
 EXPORT_SYMBOL_GPL(save_stack_trace);
 
-void save_stack_trace_regs(struct stack_trace *trace, struct pt_regs *regs)
+void save_stack_trace_regs(struct pt_regs *regs, struct stack_trace *trace)
 {
        dump_trace(current, regs, NULL, 0, &save_stack_ops, trace);
        if (trace->nr_entries < trace->max_entries)
index f2479f19ddde246f8ff2c40789b0da944479f78f..6ba477342b8e236406c716e5229b5653cd4f9a16 100644 (file)
@@ -18,7 +18,7 @@ obj-$(CONFIG_SMP) += msr-smp.o cache-smp.o
 
 lib-y := delay.o
 lib-y += thunk_$(BITS).o
-lib-y += usercopy_$(BITS).o getuser.o putuser.o
+lib-y += usercopy_$(BITS).o usercopy.o getuser.o putuser.o
 lib-y += memcpy_$(BITS).o
 lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o
 
diff --git a/arch/x86/lib/usercopy.c b/arch/x86/lib/usercopy.c
new file mode 100644 (file)
index 0000000..97be9cb
--- /dev/null
@@ -0,0 +1,43 @@
+/*
+ * User address space access functions.
+ *
+ *  For licencing details see kernel-base/COPYING
+ */
+
+#include <linux/highmem.h>
+#include <linux/module.h>
+
+/*
+ * best effort, GUP based copy_from_user() that is NMI-safe
+ */
+unsigned long
+copy_from_user_nmi(void *to, const void __user *from, unsigned long n)
+{
+       unsigned long offset, addr = (unsigned long)from;
+       unsigned long size, len = 0;
+       struct page *page;
+       void *map;
+       int ret;
+
+       do {
+               ret = __get_user_pages_fast(addr, 1, 0, &page);
+               if (!ret)
+                       break;
+
+               offset = addr & (PAGE_SIZE - 1);
+               size = min(PAGE_SIZE - offset, n - len);
+
+               map = kmap_atomic(page);
+               memcpy(to, map+offset, size);
+               kunmap_atomic(map);
+               put_page(page);
+
+               len  += size;
+               to   += size;
+               addr += size;
+
+       } while (len < n);
+
+       return len;
+}
+EXPORT_SYMBOL_GPL(copy_from_user_nmi);
index 2dbf6bf4c7e5295906d5f0b25fcb97dd9845d363..4d09df054e391822aa7a5e634a4ff2f4f8afabc0 100644 (file)
@@ -1059,7 +1059,7 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code)
        if (unlikely(error_code & PF_RSVD))
                pgtable_bad(regs, error_code, address);
 
-       perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address);
+       perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
 
        /*
         * If we're in an interrupt, have no user context or are running
@@ -1161,11 +1161,11 @@ good_area:
        if (flags & FAULT_FLAG_ALLOW_RETRY) {
                if (fault & VM_FAULT_MAJOR) {
                        tsk->maj_flt++;
-                       perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0,
+                       perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1,
                                      regs, address);
                } else {
                        tsk->min_flt++;
-                       perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0,
+                       perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1,
                                      regs, address);
                }
                if (fault & VM_FAULT_RETRY) {
index 704a37cedddb59404a3c1fc773e44853b2089939..dab41876cdd561294dafc02cd08d71a7a508500b 100644 (file)
@@ -185,7 +185,7 @@ void kmemcheck_error_save(enum kmemcheck_shadow state,
        e->trace.entries = e->trace_entries;
        e->trace.max_entries = ARRAY_SIZE(e->trace_entries);
        e->trace.skip = 0;
-       save_stack_trace_regs(&e->trace, regs);
+       save_stack_trace_regs(regs, &e->trace);
 
        /* Round address down to nearest 16 bytes */
        shadow_copy = kmemcheck_shadow_lookup(address
index a5b64ab4cd6e3ef34be5beda88100e72091437c8..bff89dfe36198225f2d7695e5d2ff01ce85e899f 100644 (file)
 #include <linux/oprofile.h>
 #include <linux/sched.h>
 #include <linux/mm.h>
+#include <linux/compat.h>
+#include <linux/uaccess.h>
+
 #include <asm/ptrace.h>
-#include <asm/uaccess.h>
 #include <asm/stacktrace.h>
-#include <linux/compat.h>
 
 static int backtrace_stack(void *data, char *name)
 {
@@ -40,13 +41,13 @@ static struct stacktrace_ops backtrace_ops = {
 static struct stack_frame_ia32 *
 dump_user_backtrace_32(struct stack_frame_ia32 *head)
 {
+       /* Also check accessibility of one struct frame_head beyond: */
        struct stack_frame_ia32 bufhead[2];
        struct stack_frame_ia32 *fp;
+       unsigned long bytes;
 
-       /* Also check accessibility of one struct frame_head beyond */
-       if (!access_ok(VERIFY_READ, head, sizeof(bufhead)))
-               return NULL;
-       if (__copy_from_user_inatomic(bufhead, head, sizeof(bufhead)))
+       bytes = copy_from_user_nmi(bufhead, head, sizeof(bufhead));
+       if (bytes != sizeof(bufhead))
                return NULL;
 
        fp = (struct stack_frame_ia32 *) compat_ptr(bufhead[0].next_frame);
@@ -87,12 +88,12 @@ x86_backtrace_32(struct pt_regs * const regs, unsigned int depth)
 
 static struct stack_frame *dump_user_backtrace(struct stack_frame *head)
 {
+       /* Also check accessibility of one struct frame_head beyond: */
        struct stack_frame bufhead[2];
+       unsigned long bytes;
 
-       /* Also check accessibility of one struct stack_frame beyond */
-       if (!access_ok(VERIFY_READ, head, sizeof(bufhead)))
-               return NULL;
-       if (__copy_from_user_inatomic(bufhead, head, sizeof(bufhead)))
+       bytes = copy_from_user_nmi(bufhead, head, sizeof(bufhead));
+       if (bytes != sizeof(bufhead))
                return NULL;
 
        oprofile_add_trace(bufhead[0].return_address);
index 9046f7b2ed79f7a52354a2dbe45db20310cdca65..94796f39bc47caafcb5771ba693b64e9691880ad 100644 (file)
@@ -31,7 +31,7 @@ static int num_counters;
 /*
  * Overflow callback for oprofile.
  */
-static void op_overflow_handler(struct perf_event *event, int unused,
+static void op_overflow_handler(struct perf_event *event,
                        struct perf_sample_data *data, struct pt_regs *regs)
 {
        int id;
@@ -79,7 +79,7 @@ static int op_create_counter(int cpu, int event)
 
        pevent = perf_event_create_kernel_counter(&counter_config[event].attr,
                                                  cpu, NULL,
-                                                 op_overflow_handler);
+                                                 op_overflow_handler, NULL);
 
        if (IS_ERR(pevent))
                return PTR_ERR(pevent);
index 9d88e1cb5dbbc557e14502005806002b7cdd8bc5..f0c0e8a47ae61f67ed0bbf5b3f723acfdc5df435 100644 (file)
@@ -19,6 +19,8 @@
 
 #include <asm/ftrace.h>
 
+struct ftrace_hash;
+
 #ifdef CONFIG_FUNCTION_TRACER
 
 extern int ftrace_enabled;
@@ -29,8 +31,6 @@ ftrace_enable_sysctl(struct ctl_table *table, int write,
 
 typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip);
 
-struct ftrace_hash;
-
 enum {
        FTRACE_OPS_FL_ENABLED           = 1 << 0,
        FTRACE_OPS_FL_GLOBAL            = 1 << 1,
@@ -123,7 +123,8 @@ stack_trace_sysctl(struct ctl_table *table, int write,
 struct ftrace_func_command {
        struct list_head        list;
        char                    *name;
-       int                     (*func)(char *func, char *cmd,
+       int                     (*func)(struct ftrace_hash *hash,
+                                       char *func, char *cmd,
                                        char *params, int enable);
 };
 
index 59d3ef100eb9310a9c2c0cff903d6cae259d0810..96efa6794ea5293a59f40638d344005b2497f0bc 100644 (file)
@@ -76,6 +76,7 @@ struct trace_iterator {
        struct trace_entry      *ent;
        unsigned long           lost_events;
        int                     leftover;
+       int                     ent_size;
        int                     cpu;
        u64                     ts;
 
@@ -129,6 +130,10 @@ void trace_current_buffer_unlock_commit(struct ring_buffer *buffer,
 void trace_nowake_buffer_unlock_commit(struct ring_buffer *buffer,
                                       struct ring_buffer_event *event,
                                        unsigned long flags, int pc);
+void trace_nowake_buffer_unlock_commit_regs(struct ring_buffer *buffer,
+                                           struct ring_buffer_event *event,
+                                           unsigned long flags, int pc,
+                                           struct pt_regs *regs);
 void trace_current_buffer_discard_commit(struct ring_buffer *buffer,
                                         struct ring_buffer_event *event);
 
index d1e55fed2c7dc0281cd0cf653bb5a56a399265cb..6ae9c631a1be51c7221321a9a2ceb787217e4ff0 100644 (file)
@@ -73,6 +73,7 @@ static inline unsigned long hw_breakpoint_len(struct perf_event *bp)
 extern struct perf_event *
 register_user_hw_breakpoint(struct perf_event_attr *attr,
                            perf_overflow_handler_t triggered,
+                           void *context,
                            struct task_struct *tsk);
 
 /* FIXME: only change from the attr, and don't unregister */
@@ -85,11 +86,13 @@ modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *attr);
 extern struct perf_event *
 register_wide_hw_breakpoint_cpu(struct perf_event_attr *attr,
                                perf_overflow_handler_t triggered,
+                               void *context,
                                int cpu);
 
 extern struct perf_event * __percpu *
 register_wide_hw_breakpoint(struct perf_event_attr *attr,
-                           perf_overflow_handler_t triggered);
+                           perf_overflow_handler_t triggered,
+                           void *context);
 
 extern int register_perf_hw_breakpoint(struct perf_event *bp);
 extern int __register_perf_hw_breakpoint(struct perf_event *bp);
@@ -115,6 +118,7 @@ static inline int __init init_hw_breakpoint(void) { return 0; }
 static inline struct perf_event *
 register_user_hw_breakpoint(struct perf_event_attr *attr,
                            perf_overflow_handler_t triggered,
+                           void *context,
                            struct task_struct *tsk)    { return NULL; }
 static inline int
 modify_user_hw_breakpoint(struct perf_event *bp,
@@ -122,10 +126,12 @@ modify_user_hw_breakpoint(struct perf_event *bp,
 static inline struct perf_event *
 register_wide_hw_breakpoint_cpu(struct perf_event_attr *attr,
                                perf_overflow_handler_t  triggered,
+                               void *context,
                                int cpu)                { return NULL; }
 static inline struct perf_event * __percpu *
 register_wide_hw_breakpoint(struct perf_event_attr *attr,
-                           perf_overflow_handler_t triggered)  { return NULL; }
+                           perf_overflow_handler_t triggered,
+                           void *context)              { return NULL; }
 static inline int
 register_perf_hw_breakpoint(struct perf_event *bp)     { return -ENOSYS; }
 static inline int
index e0786e35f247664a8fd9838dced3a46d4f71257c..3f2711ccf910e58a0993b115e1fa6cfbdeb9394b 100644 (file)
@@ -61,7 +61,7 @@ enum perf_hw_id {
 /*
  * Generalized hardware cache events:
  *
- *       { L1-D, L1-I, LLC, ITLB, DTLB, BPU } x
+ *       { L1-D, L1-I, LLC, ITLB, DTLB, BPU, NODE } x
  *       { read, write, prefetch } x
  *       { accesses, misses }
  */
@@ -72,6 +72,7 @@ enum perf_hw_cache_id {
        PERF_COUNT_HW_CACHE_DTLB                = 3,
        PERF_COUNT_HW_CACHE_ITLB                = 4,
        PERF_COUNT_HW_CACHE_BPU                 = 5,
+       PERF_COUNT_HW_CACHE_NODE                = 6,
 
        PERF_COUNT_HW_CACHE_MAX,                /* non-ABI */
 };
@@ -536,6 +537,16 @@ struct perf_branch_stack {
 
 struct task_struct;
 
+/*
+ * extra PMU register associated with an event
+ */
+struct hw_perf_event_extra {
+       u64             config; /* register value */
+       unsigned int    reg;    /* register address or index */
+       int             alloc;  /* extra register already allocated */
+       int             idx;    /* index in shared_regs->regs[] */
+};
+
 /**
  * struct hw_perf_event - performance event hardware details:
  */
@@ -549,9 +560,7 @@ struct hw_perf_event {
                        unsigned long   event_base;
                        int             idx;
                        int             last_cpu;
-                       unsigned int    extra_reg;
-                       u64             extra_config;
-                       int             extra_alloc;
+                       struct hw_perf_event_extra extra_reg;
                };
                struct { /* software */
                        struct hrtimer  hrtimer;
@@ -680,36 +689,9 @@ enum perf_event_active_state {
 };
 
 struct file;
-
-#define PERF_BUFFER_WRITABLE           0x01
-
-struct perf_buffer {
-       atomic_t                        refcount;
-       struct rcu_head                 rcu_head;
-#ifdef CONFIG_PERF_USE_VMALLOC
-       struct work_struct              work;
-       int                             page_order;     /* allocation order  */
-#endif
-       int                             nr_pages;       /* nr of data pages  */
-       int                             writable;       /* are we writable   */
-
-       atomic_t                        poll;           /* POLL_ for wakeups */
-
-       local_t                         head;           /* write position    */
-       local_t                         nest;           /* nested writers    */
-       local_t                         events;         /* event limit       */
-       local_t                         wakeup;         /* wakeup stamp      */
-       local_t                         lost;           /* nr records lost   */
-
-       long                            watermark;      /* wakeup watermark  */
-
-       struct perf_event_mmap_page     *user_page;
-       void                            *data_pages[0];
-};
-
 struct perf_sample_data;
 
-typedef void (*perf_overflow_handler_t)(struct perf_event *, int,
+typedef void (*perf_overflow_handler_t)(struct perf_event *,
                                        struct perf_sample_data *,
                                        struct pt_regs *regs);
 
@@ -745,6 +727,8 @@ struct perf_cgroup {
 };
 #endif
 
+struct ring_buffer;
+
 /**
  * struct perf_event - performance event kernel representation:
  */
@@ -834,7 +818,7 @@ struct perf_event {
        atomic_t                        mmap_count;
        int                             mmap_locked;
        struct user_struct              *mmap_user;
-       struct perf_buffer              *buffer;
+       struct ring_buffer              *rb;
 
        /* poll related */
        wait_queue_head_t               waitq;
@@ -855,6 +839,7 @@ struct perf_event {
        u64                             id;
 
        perf_overflow_handler_t         overflow_handler;
+       void                            *overflow_handler_context;
 
 #ifdef CONFIG_EVENT_TRACING
        struct ftrace_event_call        *tp_event;
@@ -919,8 +904,8 @@ struct perf_event_context {
        u64                             parent_gen;
        u64                             generation;
        int                             pin_count;
-       struct rcu_head                 rcu_head;
        int                             nr_cgroups; /* cgroup events present */
+       struct rcu_head                 rcu_head;
 };
 
 /*
@@ -945,13 +930,11 @@ struct perf_cpu_context {
 
 struct perf_output_handle {
        struct perf_event               *event;
-       struct perf_buffer              *buffer;
+       struct ring_buffer              *rb;
        unsigned long                   wakeup;
        unsigned long                   size;
        void                            *addr;
        int                             page;
-       int                             nmi;
-       int                             sample;
 };
 
 #ifdef CONFIG_PERF_EVENTS
@@ -972,13 +955,15 @@ extern void perf_pmu_disable(struct pmu *pmu);
 extern void perf_pmu_enable(struct pmu *pmu);
 extern int perf_event_task_disable(void);
 extern int perf_event_task_enable(void);
+extern int perf_event_refresh(struct perf_event *event, int refresh);
 extern void perf_event_update_userpage(struct perf_event *event);
 extern int perf_event_release_kernel(struct perf_event *event);
 extern struct perf_event *
 perf_event_create_kernel_counter(struct perf_event_attr *attr,
                                int cpu,
                                struct task_struct *task,
-                               perf_overflow_handler_t callback);
+                               perf_overflow_handler_t callback,
+                               void *context);
 extern u64 perf_event_read_value(struct perf_event *event,
                                 u64 *enabled, u64 *running);
 
@@ -1018,7 +1003,7 @@ extern void perf_prepare_sample(struct perf_event_header *header,
                                struct perf_event *event,
                                struct pt_regs *regs);
 
-extern int perf_event_overflow(struct perf_event *event, int nmi,
+extern int perf_event_overflow(struct perf_event *event,
                                 struct perf_sample_data *data,
                                 struct pt_regs *regs);
 
@@ -1037,7 +1022,7 @@ static inline int is_software_event(struct perf_event *event)
 
 extern struct jump_label_key perf_swevent_enabled[PERF_COUNT_SW_MAX];
 
-extern void __perf_sw_event(u32, u64, int, struct pt_regs *, u64);
+extern void __perf_sw_event(u32, u64, struct pt_regs *, u64);
 
 #ifndef perf_arch_fetch_caller_regs
 static inline void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip) { }
@@ -1059,7 +1044,7 @@ static inline void perf_fetch_caller_regs(struct pt_regs *regs)
 }
 
 static __always_inline void
-perf_sw_event(u32 event_id, u64 nr, int nmi, struct pt_regs *regs, u64 addr)
+perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr)
 {
        struct pt_regs hot_regs;
 
@@ -1068,7 +1053,7 @@ perf_sw_event(u32 event_id, u64 nr, int nmi, struct pt_regs *regs, u64 addr)
                        perf_fetch_caller_regs(&hot_regs);
                        regs = &hot_regs;
                }
-               __perf_sw_event(event_id, nr, nmi, regs, addr);
+               __perf_sw_event(event_id, nr, regs, addr);
        }
 }
 
@@ -1082,7 +1067,7 @@ static inline void perf_event_task_sched_in(struct task_struct *task)
 
 static inline void perf_event_task_sched_out(struct task_struct *task, struct task_struct *next)
 {
-       perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 1, NULL, 0);
+       perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, NULL, 0);
 
        __perf_event_task_sched_out(task, next);
 }
@@ -1143,8 +1128,7 @@ extern void perf_bp_event(struct perf_event *event, void *data);
 #endif
 
 extern int perf_output_begin(struct perf_output_handle *handle,
-                            struct perf_event *event, unsigned int size,
-                            int nmi, int sample);
+                            struct perf_event *event, unsigned int size);
 extern void perf_output_end(struct perf_output_handle *handle);
 extern void perf_output_copy(struct perf_output_handle *handle,
                             const void *buf, unsigned int len);
@@ -1166,10 +1150,13 @@ static inline void perf_event_delayed_put(struct task_struct *task)     { }
 static inline void perf_event_print_debug(void)                                { }
 static inline int perf_event_task_disable(void)                                { return -EINVAL; }
 static inline int perf_event_task_enable(void)                         { return -EINVAL; }
+static inline int perf_event_refresh(struct perf_event *event, int refresh)
+{
+       return -EINVAL;
+}
 
 static inline void
-perf_sw_event(u32 event_id, u64 nr, int nmi,
-                    struct pt_regs *regs, u64 addr)                    { }
+perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr)    { }
 static inline void
 perf_bp_event(struct perf_event *event, void *data)                    { }
 
index ab38ac80b0f9c7c4979a30475626c9bf2fbffa36..b891de96000f35e3eaad5ecd7a6b5128fa6c65ff 100644 (file)
@@ -169,7 +169,7 @@ void ring_buffer_set_clock(struct ring_buffer *buffer,
 size_t ring_buffer_page_len(void *page);
 
 
-void *ring_buffer_alloc_read_page(struct ring_buffer *buffer);
+void *ring_buffer_alloc_read_page(struct ring_buffer *buffer, int cpu);
 void ring_buffer_free_read_page(struct ring_buffer *buffer, void *data);
 int ring_buffer_read_page(struct ring_buffer *buffer, void **data_page,
                          size_t len, int cpu, int full);
index 25310f1d7f3773c540e51e7103a4edfa98db7a33..115b570e3bff3d47df43794b5781cfeaa3aa6245 100644 (file)
@@ -14,8 +14,8 @@ struct stack_trace {
 };
 
 extern void save_stack_trace(struct stack_trace *trace);
-extern void save_stack_trace_regs(struct stack_trace *trace,
-                                 struct pt_regs *regs);
+extern void save_stack_trace_regs(struct pt_regs *regs,
+                                 struct stack_trace *trace);
 extern void save_stack_trace_tsk(struct task_struct *tsk,
                                struct stack_trace *trace);
 
index cd9dbb913c777db20ecead3796bee04a6764e73b..d5fe7af0de2ee2d5181569775565dc8a767bdaba 100644 (file)
@@ -49,12 +49,13 @@ asynchronous and synchronous parts of the kernel.
 */
 
 #include <linux/async.h>
+#include <linux/atomic.h>
+#include <linux/ktime.h>
 #include <linux/module.h>
 #include <linux/wait.h>
 #include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/workqueue.h>
-#include <asm/atomic.h>
 
 static async_cookie_t next_cookie = 1;
 
@@ -128,7 +129,8 @@ static void async_run_entry_fn(struct work_struct *work)
 
        /* 2) run (and print duration) */
        if (initcall_debug && system_state == SYSTEM_BOOTING) {
-               printk("calling  %lli_%pF @ %i\n", (long long)entry->cookie,
+               printk(KERN_DEBUG "calling  %lli_%pF @ %i\n",
+                       (long long)entry->cookie,
                        entry->func, task_pid_nr(current));
                calltime = ktime_get();
        }
@@ -136,7 +138,7 @@ static void async_run_entry_fn(struct work_struct *work)
        if (initcall_debug && system_state == SYSTEM_BOOTING) {
                rettime = ktime_get();
                delta = ktime_sub(rettime, calltime);
-               printk("initcall %lli_%pF returned 0 after %lld usecs\n",
+               printk(KERN_DEBUG "initcall %lli_%pF returned 0 after %lld usecs\n",
                        (long long)entry->cookie,
                        entry->func,
                        (long long)ktime_to_ns(delta) >> 10);
@@ -270,7 +272,7 @@ void async_synchronize_cookie_domain(async_cookie_t cookie,
        ktime_t starttime, delta, endtime;
 
        if (initcall_debug && system_state == SYSTEM_BOOTING) {
-               printk("async_waiting @ %i\n", task_pid_nr(current));
+               printk(KERN_DEBUG "async_waiting @ %i\n", task_pid_nr(current));
                starttime = ktime_get();
        }
 
@@ -280,7 +282,7 @@ void async_synchronize_cookie_domain(async_cookie_t cookie,
                endtime = ktime_get();
                delta = ktime_sub(endtime, starttime);
 
-               printk("async_continuing @ %i after %lli usec\n",
+               printk(KERN_DEBUG "async_continuing @ %i after %lli usec\n",
                        task_pid_nr(current),
                        (long long)ktime_to_ns(delta) >> 10);
        }
index 1ce23d3d8394f3c1533b2e777b2da79ce6ce565d..89e5e8aa4c36605de76beda71b43576debb3536e 100644 (file)
@@ -2,5 +2,5 @@ ifdef CONFIG_FUNCTION_TRACER
 CFLAGS_REMOVE_core.o = -pg
 endif
 
-obj-y := core.o
+obj-y := core.o ring_buffer.o
 obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
index 9efe7108ccaf8debca9bd9a70e2bf6265a50949c..b8785e26ee1cd28c33a1c0429a49bb515c34c8d2 100644 (file)
@@ -36,6 +36,8 @@
 #include <linux/ftrace_event.h>
 #include <linux/hw_breakpoint.h>
 
+#include "internal.h"
+
 #include <asm/irq_regs.h>
 
 struct remote_function_call {
@@ -200,6 +202,22 @@ __get_cpu_context(struct perf_event_context *ctx)
        return this_cpu_ptr(ctx->pmu->pmu_cpu_context);
 }
 
+static void perf_ctx_lock(struct perf_cpu_context *cpuctx,
+                         struct perf_event_context *ctx)
+{
+       raw_spin_lock(&cpuctx->ctx.lock);
+       if (ctx)
+               raw_spin_lock(&ctx->lock);
+}
+
+static void perf_ctx_unlock(struct perf_cpu_context *cpuctx,
+                           struct perf_event_context *ctx)
+{
+       if (ctx)
+               raw_spin_unlock(&ctx->lock);
+       raw_spin_unlock(&cpuctx->ctx.lock);
+}
+
 #ifdef CONFIG_CGROUP_PERF
 
 /*
@@ -340,11 +358,8 @@ void perf_cgroup_switch(struct task_struct *task, int mode)
        rcu_read_lock();
 
        list_for_each_entry_rcu(pmu, &pmus, entry) {
-
                cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
 
-               perf_pmu_disable(cpuctx->ctx.pmu);
-
                /*
                 * perf_cgroup_events says at least one
                 * context on this CPU has cgroup events.
@@ -353,6 +368,8 @@ void perf_cgroup_switch(struct task_struct *task, int mode)
                 * events for a context.
                 */
                if (cpuctx->ctx.nr_cgroups > 0) {
+                       perf_ctx_lock(cpuctx, cpuctx->task_ctx);
+                       perf_pmu_disable(cpuctx->ctx.pmu);
 
                        if (mode & PERF_CGROUP_SWOUT) {
                                cpu_ctx_sched_out(cpuctx, EVENT_ALL);
@@ -372,9 +389,9 @@ void perf_cgroup_switch(struct task_struct *task, int mode)
                                cpuctx->cgrp = perf_cgroup_from_task(task);
                                cpu_ctx_sched_in(cpuctx, EVENT_ALL, task);
                        }
+                       perf_pmu_enable(cpuctx->ctx.pmu);
+                       perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
                }
-
-               perf_pmu_enable(cpuctx->ctx.pmu);
        }
 
        rcu_read_unlock();
@@ -731,6 +748,7 @@ static u64 perf_event_time(struct perf_event *event)
 
 /*
  * Update the total_time_enabled and total_time_running fields for a event.
+ * The caller of this function needs to hold the ctx->lock.
  */
 static void update_event_times(struct perf_event *event)
 {
@@ -1105,6 +1123,10 @@ static int __perf_remove_from_context(void *info)
        raw_spin_lock(&ctx->lock);
        event_sched_out(event, cpuctx, ctx);
        list_del_event(event, ctx);
+       if (!ctx->nr_events && cpuctx->task_ctx == ctx) {
+               ctx->is_active = 0;
+               cpuctx->task_ctx = NULL;
+       }
        raw_spin_unlock(&ctx->lock);
 
        return 0;
@@ -1454,8 +1476,24 @@ static void add_event_to_ctx(struct perf_event *event,
        event->tstamp_stopped = tstamp;
 }
 
-static void perf_event_context_sched_in(struct perf_event_context *ctx,
-                                       struct task_struct *tsk);
+static void task_ctx_sched_out(struct perf_event_context *ctx);
+static void
+ctx_sched_in(struct perf_event_context *ctx,
+            struct perf_cpu_context *cpuctx,
+            enum event_type_t event_type,
+            struct task_struct *task);
+
+static void perf_event_sched_in(struct perf_cpu_context *cpuctx,
+                               struct perf_event_context *ctx,
+                               struct task_struct *task)
+{
+       cpu_ctx_sched_in(cpuctx, EVENT_PINNED, task);
+       if (ctx)
+               ctx_sched_in(ctx, cpuctx, EVENT_PINNED, task);
+       cpu_ctx_sched_in(cpuctx, EVENT_FLEXIBLE, task);
+       if (ctx)
+               ctx_sched_in(ctx, cpuctx, EVENT_FLEXIBLE, task);
+}
 
 /*
  * Cross CPU call to install and enable a performance event
@@ -1466,20 +1504,37 @@ static int  __perf_install_in_context(void *info)
 {
        struct perf_event *event = info;
        struct perf_event_context *ctx = event->ctx;
-       struct perf_event *leader = event->group_leader;
        struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
-       int err;
+       struct perf_event_context *task_ctx = cpuctx->task_ctx;
+       struct task_struct *task = current;
+
+       perf_ctx_lock(cpuctx, task_ctx);
+       perf_pmu_disable(cpuctx->ctx.pmu);
 
        /*
-        * In case we're installing a new context to an already running task,
-        * could also happen before perf_event_task_sched_in() on architectures
-        * which do context switches with IRQs enabled.
+        * If there was an active task_ctx schedule it out.
         */
-       if (ctx->task && !cpuctx->task_ctx)
-               perf_event_context_sched_in(ctx, ctx->task);
+       if (task_ctx)
+               task_ctx_sched_out(task_ctx);
+
+       /*
+        * If the context we're installing events in is not the
+        * active task_ctx, flip them.
+        */
+       if (ctx->task && task_ctx != ctx) {
+               if (task_ctx)
+                       raw_spin_unlock(&task_ctx->lock);
+               raw_spin_lock(&ctx->lock);
+               task_ctx = ctx;
+       }
+
+       if (task_ctx) {
+               cpuctx->task_ctx = task_ctx;
+               task = task_ctx->task;
+       }
+
+       cpu_ctx_sched_out(cpuctx, EVENT_ALL);
 
-       raw_spin_lock(&ctx->lock);
-       ctx->is_active = 1;
        update_context_time(ctx);
        /*
         * update cgrp time only if current cgrp
@@ -1490,43 +1545,13 @@ static int  __perf_install_in_context(void *info)
 
        add_event_to_ctx(event, ctx);
 
-       if (!event_filter_match(event))
-               goto unlock;
-
-       /*
-        * Don't put the event on if it is disabled or if
-        * it is in a group and the group isn't on.
-        */
-       if (event->state != PERF_EVENT_STATE_INACTIVE ||
-           (leader != event && leader->state != PERF_EVENT_STATE_ACTIVE))
-               goto unlock;
-
        /*
-        * An exclusive event can't go on if there are already active
-        * hardware events, and no hardware event can go on if there
-        * is already an exclusive event on.
+        * Schedule everything back in
         */
-       if (!group_can_go_on(event, cpuctx, 1))
-               err = -EEXIST;
-       else
-               err = event_sched_in(event, cpuctx, ctx);
-
-       if (err) {
-               /*
-                * This event couldn't go on.  If it is in a group
-                * then we have to pull the whole group off.
-                * If the event group is pinned then put it in error state.
-                */
-               if (leader != event)
-                       group_sched_out(leader, cpuctx, ctx);
-               if (leader->attr.pinned) {
-                       update_group_times(leader);
-                       leader->state = PERF_EVENT_STATE_ERROR;
-               }
-       }
+       perf_event_sched_in(cpuctx, task_ctx, task);
 
-unlock:
-       raw_spin_unlock(&ctx->lock);
+       perf_pmu_enable(cpuctx->ctx.pmu);
+       perf_ctx_unlock(cpuctx, task_ctx);
 
        return 0;
 }
@@ -1739,7 +1764,7 @@ out:
        raw_spin_unlock_irq(&ctx->lock);
 }
 
-static int perf_event_refresh(struct perf_event *event, int refresh)
+int perf_event_refresh(struct perf_event *event, int refresh)
 {
        /*
         * not supported on inherited events
@@ -1752,36 +1777,35 @@ static int perf_event_refresh(struct perf_event *event, int refresh)
 
        return 0;
 }
+EXPORT_SYMBOL_GPL(perf_event_refresh);
 
 static void ctx_sched_out(struct perf_event_context *ctx,
                          struct perf_cpu_context *cpuctx,
                          enum event_type_t event_type)
 {
        struct perf_event *event;
+       int is_active = ctx->is_active;
 
-       raw_spin_lock(&ctx->lock);
-       perf_pmu_disable(ctx->pmu);
-       ctx->is_active = 0;
+       ctx->is_active &= ~event_type;
        if (likely(!ctx->nr_events))
-               goto out;
+               return;
+
        update_context_time(ctx);
        update_cgrp_time_from_cpuctx(cpuctx);
-
        if (!ctx->nr_active)
-               goto out;
+               return;
 
-       if (event_type & EVENT_PINNED) {
+       perf_pmu_disable(ctx->pmu);
+       if ((is_active & EVENT_PINNED) && (event_type & EVENT_PINNED)) {
                list_for_each_entry(event, &ctx->pinned_groups, group_entry)
                        group_sched_out(event, cpuctx, ctx);
        }
 
-       if (event_type & EVENT_FLEXIBLE) {
+       if ((is_active & EVENT_FLEXIBLE) && (event_type & EVENT_FLEXIBLE)) {
                list_for_each_entry(event, &ctx->flexible_groups, group_entry)
                        group_sched_out(event, cpuctx, ctx);
        }
-out:
        perf_pmu_enable(ctx->pmu);
-       raw_spin_unlock(&ctx->lock);
 }
 
 /*
@@ -1929,8 +1953,10 @@ static void perf_event_context_sched_out(struct task_struct *task, int ctxn,
        rcu_read_unlock();
 
        if (do_switch) {
+               raw_spin_lock(&ctx->lock);
                ctx_sched_out(ctx, cpuctx, EVENT_ALL);
                cpuctx->task_ctx = NULL;
+               raw_spin_unlock(&ctx->lock);
        }
 }
 
@@ -1965,8 +1991,7 @@ void __perf_event_task_sched_out(struct task_struct *task,
                perf_cgroup_sched_out(task);
 }
 
-static void task_ctx_sched_out(struct perf_event_context *ctx,
-                              enum event_type_t event_type)
+static void task_ctx_sched_out(struct perf_event_context *ctx)
 {
        struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
 
@@ -1976,7 +2001,7 @@ static void task_ctx_sched_out(struct perf_event_context *ctx,
        if (WARN_ON_ONCE(ctx != cpuctx->task_ctx))
                return;
 
-       ctx_sched_out(ctx, cpuctx, event_type);
+       ctx_sched_out(ctx, cpuctx, EVENT_ALL);
        cpuctx->task_ctx = NULL;
 }
 
@@ -2055,11 +2080,11 @@ ctx_sched_in(struct perf_event_context *ctx,
             struct task_struct *task)
 {
        u64 now;
+       int is_active = ctx->is_active;
 
-       raw_spin_lock(&ctx->lock);
-       ctx->is_active = 1;
+       ctx->is_active |= event_type;
        if (likely(!ctx->nr_events))
-               goto out;
+               return;
 
        now = perf_clock();
        ctx->timestamp = now;
@@ -2068,15 +2093,12 @@ ctx_sched_in(struct perf_event_context *ctx,
         * First go through the list and put on any pinned groups
         * in order to give them the best chance of going on.
         */
-       if (event_type & EVENT_PINNED)
+       if (!(is_active & EVENT_PINNED) && (event_type & EVENT_PINNED))
                ctx_pinned_sched_in(ctx, cpuctx);
 
        /* Then walk through the lower prio flexible groups */
-       if (event_type & EVENT_FLEXIBLE)
+       if (!(is_active & EVENT_FLEXIBLE) && (event_type & EVENT_FLEXIBLE))
                ctx_flexible_sched_in(ctx, cpuctx);
-
-out:
-       raw_spin_unlock(&ctx->lock);
 }
 
 static void cpu_ctx_sched_in(struct perf_cpu_context *cpuctx,
@@ -2088,19 +2110,6 @@ static void cpu_ctx_sched_in(struct perf_cpu_context *cpuctx,
        ctx_sched_in(ctx, cpuctx, event_type, task);
 }
 
-static void task_ctx_sched_in(struct perf_event_context *ctx,
-                             enum event_type_t event_type)
-{
-       struct perf_cpu_context *cpuctx;
-
-       cpuctx = __get_cpu_context(ctx);
-       if (cpuctx->task_ctx == ctx)
-               return;
-
-       ctx_sched_in(ctx, cpuctx, event_type, NULL);
-       cpuctx->task_ctx = ctx;
-}
-
 static void perf_event_context_sched_in(struct perf_event_context *ctx,
                                        struct task_struct *task)
 {
@@ -2110,6 +2119,7 @@ static void perf_event_context_sched_in(struct perf_event_context *ctx,
        if (cpuctx->task_ctx == ctx)
                return;
 
+       perf_ctx_lock(cpuctx, ctx);
        perf_pmu_disable(ctx->pmu);
        /*
         * We want to keep the following priority order:
@@ -2118,18 +2128,18 @@ static void perf_event_context_sched_in(struct perf_event_context *ctx,
         */
        cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE);
 
-       ctx_sched_in(ctx, cpuctx, EVENT_PINNED, task);
-       cpu_ctx_sched_in(cpuctx, EVENT_FLEXIBLE, task);
-       ctx_sched_in(ctx, cpuctx, EVENT_FLEXIBLE, task);
+       perf_event_sched_in(cpuctx, ctx, task);
 
        cpuctx->task_ctx = ctx;
 
+       perf_pmu_enable(ctx->pmu);
+       perf_ctx_unlock(cpuctx, ctx);
+
        /*
         * Since these rotations are per-cpu, we need to ensure the
         * cpu-context we got scheduled on is actually rotating.
         */
        perf_pmu_rotate_start(ctx->pmu);
-       perf_pmu_enable(ctx->pmu);
 }
 
 /*
@@ -2269,7 +2279,6 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx, u64 period)
        u64 interrupts, now;
        s64 delta;
 
-       raw_spin_lock(&ctx->lock);
        list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
                if (event->state != PERF_EVENT_STATE_ACTIVE)
                        continue;
@@ -2301,7 +2310,6 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx, u64 period)
                if (delta > 0)
                        perf_adjust_period(event, period, delta);
        }
-       raw_spin_unlock(&ctx->lock);
 }
 
 /*
@@ -2309,16 +2317,12 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx, u64 period)
  */
 static void rotate_ctx(struct perf_event_context *ctx)
 {
-       raw_spin_lock(&ctx->lock);
-
        /*
         * Rotate the first entry last of non-pinned groups. Rotation might be
         * disabled by the inheritance code.
         */
        if (!ctx->rotate_disable)
                list_rotate_left(&ctx->flexible_groups);
-
-       raw_spin_unlock(&ctx->lock);
 }
 
 /*
@@ -2345,6 +2349,7 @@ static void perf_rotate_context(struct perf_cpu_context *cpuctx)
                        rotate = 1;
        }
 
+       perf_ctx_lock(cpuctx, cpuctx->task_ctx);
        perf_pmu_disable(cpuctx->ctx.pmu);
        perf_ctx_adjust_freq(&cpuctx->ctx, interval);
        if (ctx)
@@ -2355,21 +2360,20 @@ static void perf_rotate_context(struct perf_cpu_context *cpuctx)
 
        cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE);
        if (ctx)
-               task_ctx_sched_out(ctx, EVENT_FLEXIBLE);
+               ctx_sched_out(ctx, cpuctx, EVENT_FLEXIBLE);
 
        rotate_ctx(&cpuctx->ctx);
        if (ctx)
                rotate_ctx(ctx);
 
-       cpu_ctx_sched_in(cpuctx, EVENT_FLEXIBLE, current);
-       if (ctx)
-               task_ctx_sched_in(ctx, EVENT_FLEXIBLE);
+       perf_event_sched_in(cpuctx, ctx, current);
 
 done:
        if (remove)
                list_del_init(&cpuctx->rotation_list);
 
        perf_pmu_enable(cpuctx->ctx.pmu);
+       perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
 }
 
 void perf_event_task_tick(void)
@@ -2424,9 +2428,9 @@ static void perf_event_enable_on_exec(struct perf_event_context *ctx)
         * in.
         */
        perf_cgroup_sched_out(current);
-       task_ctx_sched_out(ctx, EVENT_ALL);
 
        raw_spin_lock(&ctx->lock);
+       task_ctx_sched_out(ctx);
 
        list_for_each_entry(event, &ctx->pinned_groups, group_entry) {
                ret = event_enable_on_exec(event, ctx);
@@ -2835,16 +2839,12 @@ retry:
                unclone_ctx(ctx);
                ++ctx->pin_count;
                raw_spin_unlock_irqrestore(&ctx->lock, flags);
-       }
-
-       if (!ctx) {
+       } else {
                ctx = alloc_perf_context(pmu, task);
                err = -ENOMEM;
                if (!ctx)
                        goto errout;
 
-               get_ctx(ctx);
-
                err = 0;
                mutex_lock(&task->perf_event_mutex);
                /*
@@ -2856,14 +2856,14 @@ retry:
                else if (task->perf_event_ctxp[ctxn])
                        err = -EAGAIN;
                else {
+                       get_ctx(ctx);
                        ++ctx->pin_count;
                        rcu_assign_pointer(task->perf_event_ctxp[ctxn], ctx);
                }
                mutex_unlock(&task->perf_event_mutex);
 
                if (unlikely(err)) {
-                       put_task_struct(task);
-                       kfree(ctx);
+                       put_ctx(ctx);
 
                        if (err == -EAGAIN)
                                goto retry;
@@ -2890,7 +2890,7 @@ static void free_event_rcu(struct rcu_head *head)
        kfree(event);
 }
 
-static void perf_buffer_put(struct perf_buffer *buffer);
+static void ring_buffer_put(struct ring_buffer *rb);
 
 static void free_event(struct perf_event *event)
 {
@@ -2913,9 +2913,9 @@ static void free_event(struct perf_event *event)
                }
        }
 
-       if (event->buffer) {
-               perf_buffer_put(event->buffer);
-               event->buffer = NULL;
+       if (event->rb) {
+               ring_buffer_put(event->rb);
+               event->rb = NULL;
        }
 
        if (is_cgroup_event(event))
@@ -2934,12 +2934,6 @@ int perf_event_release_kernel(struct perf_event *event)
 {
        struct perf_event_context *ctx = event->ctx;
 
-       /*
-        * Remove from the PMU, can't get re-enabled since we got
-        * here because the last ref went.
-        */
-       perf_event_disable(event);
-
        WARN_ON_ONCE(ctx->parent_ctx);
        /*
         * There are two ways this annotation is useful:
@@ -2956,8 +2950,8 @@ int perf_event_release_kernel(struct perf_event *event)
        mutex_lock_nested(&ctx->mutex, SINGLE_DEPTH_NESTING);
        raw_spin_lock_irq(&ctx->lock);
        perf_group_detach(event);
-       list_del_event(event, ctx);
        raw_spin_unlock_irq(&ctx->lock);
+       perf_remove_from_context(event);
        mutex_unlock(&ctx->mutex);
 
        free_event(event);
@@ -3149,13 +3143,13 @@ perf_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
 static unsigned int perf_poll(struct file *file, poll_table *wait)
 {
        struct perf_event *event = file->private_data;
-       struct perf_buffer *buffer;
+       struct ring_buffer *rb;
        unsigned int events = POLL_HUP;
 
        rcu_read_lock();
-       buffer = rcu_dereference(event->buffer);
-       if (buffer)
-               events = atomic_xchg(&buffer->poll, 0);
+       rb = rcu_dereference(event->rb);
+       if (rb)
+               events = atomic_xchg(&rb->poll, 0);
        rcu_read_unlock();
 
        poll_wait(file, &event->waitq, wait);
@@ -3358,6 +3352,18 @@ static int perf_event_index(struct perf_event *event)
        return event->hw.idx + 1 - PERF_EVENT_INDEX_OFFSET;
 }
 
+static void calc_timer_values(struct perf_event *event,
+                               u64 *running,
+                               u64 *enabled)
+{
+       u64 now, ctx_time;
+
+       now = perf_clock();
+       ctx_time = event->shadow_ctx_time + now;
+       *enabled = ctx_time - event->tstamp_enabled;
+       *running = ctx_time - event->tstamp_running;
+}
+
 /*
  * Callers need to ensure there can be no nesting of this function, otherwise
  * the seqlock logic goes bad. We can not serialize this because the arch
@@ -3366,14 +3372,25 @@ static int perf_event_index(struct perf_event *event)
 void perf_event_update_userpage(struct perf_event *event)
 {
        struct perf_event_mmap_page *userpg;
-       struct perf_buffer *buffer;
+       struct ring_buffer *rb;
+       u64 enabled, running;
 
        rcu_read_lock();
-       buffer = rcu_dereference(event->buffer);
-       if (!buffer)
+       /*
+        * compute total_time_enabled, total_time_running
+        * based on snapshot values taken when the event
+        * was last scheduled in.
+        *
+        * we cannot simply called update_context_time()
+        * because of locking issue as we can be called in
+        * NMI context
+        */
+       calc_timer_values(event, &enabled, &running);
+       rb = rcu_dereference(event->rb);
+       if (!rb)
                goto unlock;
 
-       userpg = buffer->user_page;
+       userpg = rb->user_page;
 
        /*
         * Disable preemption so as to not let the corresponding user-space
@@ -3387,10 +3404,10 @@ void perf_event_update_userpage(struct perf_event *event)
        if (event->state == PERF_EVENT_STATE_ACTIVE)
                userpg->offset -= local64_read(&event->hw.prev_count);
 
-       userpg->time_enabled = event->total_time_enabled +
+       userpg->time_enabled = enabled +
                        atomic64_read(&event->child_total_time_enabled);
 
-       userpg->time_running = event->total_time_running +
+       userpg->time_running = running +
                        atomic64_read(&event->child_total_time_running);
 
        barrier();
@@ -3400,220 +3417,10 @@ unlock:
        rcu_read_unlock();
 }
 
-static unsigned long perf_data_size(struct perf_buffer *buffer);
-
-static void
-perf_buffer_init(struct perf_buffer *buffer, long watermark, int flags)
-{
-       long max_size = perf_data_size(buffer);
-
-       if (watermark)
-               buffer->watermark = min(max_size, watermark);
-
-       if (!buffer->watermark)
-               buffer->watermark = max_size / 2;
-
-       if (flags & PERF_BUFFER_WRITABLE)
-               buffer->writable = 1;
-
-       atomic_set(&buffer->refcount, 1);
-}
-
-#ifndef CONFIG_PERF_USE_VMALLOC
-
-/*
- * Back perf_mmap() with regular GFP_KERNEL-0 pages.
- */
-
-static struct page *
-perf_mmap_to_page(struct perf_buffer *buffer, unsigned long pgoff)
-{
-       if (pgoff > buffer->nr_pages)
-               return NULL;
-
-       if (pgoff == 0)
-               return virt_to_page(buffer->user_page);
-
-       return virt_to_page(buffer->data_pages[pgoff - 1]);
-}
-
-static void *perf_mmap_alloc_page(int cpu)
-{
-       struct page *page;
-       int node;
-
-       node = (cpu == -1) ? cpu : cpu_to_node(cpu);
-       page = alloc_pages_node(node, GFP_KERNEL | __GFP_ZERO, 0);
-       if (!page)
-               return NULL;
-
-       return page_address(page);
-}
-
-static struct perf_buffer *
-perf_buffer_alloc(int nr_pages, long watermark, int cpu, int flags)
-{
-       struct perf_buffer *buffer;
-       unsigned long size;
-       int i;
-
-       size = sizeof(struct perf_buffer);
-       size += nr_pages * sizeof(void *);
-
-       buffer = kzalloc(size, GFP_KERNEL);
-       if (!buffer)
-               goto fail;
-
-       buffer->user_page = perf_mmap_alloc_page(cpu);
-       if (!buffer->user_page)
-               goto fail_user_page;
-
-       for (i = 0; i < nr_pages; i++) {
-               buffer->data_pages[i] = perf_mmap_alloc_page(cpu);
-               if (!buffer->data_pages[i])
-                       goto fail_data_pages;
-       }
-
-       buffer->nr_pages = nr_pages;
-
-       perf_buffer_init(buffer, watermark, flags);
-
-       return buffer;
-
-fail_data_pages:
-       for (i--; i >= 0; i--)
-               free_page((unsigned long)buffer->data_pages[i]);
-
-       free_page((unsigned long)buffer->user_page);
-
-fail_user_page:
-       kfree(buffer);
-
-fail:
-       return NULL;
-}
-
-static void perf_mmap_free_page(unsigned long addr)
-{
-       struct page *page = virt_to_page((void *)addr);
-
-       page->mapping = NULL;
-       __free_page(page);
-}
-
-static void perf_buffer_free(struct perf_buffer *buffer)
-{
-       int i;
-
-       perf_mmap_free_page((unsigned long)buffer->user_page);
-       for (i = 0; i < buffer->nr_pages; i++)
-               perf_mmap_free_page((unsigned long)buffer->data_pages[i]);
-       kfree(buffer);
-}
-
-static inline int page_order(struct perf_buffer *buffer)
-{
-       return 0;
-}
-
-#else
-
-/*
- * Back perf_mmap() with vmalloc memory.
- *
- * Required for architectures that have d-cache aliasing issues.
- */
-
-static inline int page_order(struct perf_buffer *buffer)
-{
-       return buffer->page_order;
-}
-
-static struct page *
-perf_mmap_to_page(struct perf_buffer *buffer, unsigned long pgoff)
-{
-       if (pgoff > (1UL << page_order(buffer)))
-               return NULL;
-
-       return vmalloc_to_page((void *)buffer->user_page + pgoff * PAGE_SIZE);
-}
-
-static void perf_mmap_unmark_page(void *addr)
-{
-       struct page *page = vmalloc_to_page(addr);
-
-       page->mapping = NULL;
-}
-
-static void perf_buffer_free_work(struct work_struct *work)
-{
-       struct perf_buffer *buffer;
-       void *base;
-       int i, nr;
-
-       buffer = container_of(work, struct perf_buffer, work);
-       nr = 1 << page_order(buffer);
-
-       base = buffer->user_page;
-       for (i = 0; i < nr + 1; i++)
-               perf_mmap_unmark_page(base + (i * PAGE_SIZE));
-
-       vfree(base);
-       kfree(buffer);
-}
-
-static void perf_buffer_free(struct perf_buffer *buffer)
-{
-       schedule_work(&buffer->work);
-}
-
-static struct perf_buffer *
-perf_buffer_alloc(int nr_pages, long watermark, int cpu, int flags)
-{
-       struct perf_buffer *buffer;
-       unsigned long size;
-       void *all_buf;
-
-       size = sizeof(struct perf_buffer);
-       size += sizeof(void *);
-
-       buffer = kzalloc(size, GFP_KERNEL);
-       if (!buffer)
-               goto fail;
-
-       INIT_WORK(&buffer->work, perf_buffer_free_work);
-
-       all_buf = vmalloc_user((nr_pages + 1) * PAGE_SIZE);
-       if (!all_buf)
-               goto fail_all_buf;
-
-       buffer->user_page = all_buf;
-       buffer->data_pages[0] = all_buf + PAGE_SIZE;
-       buffer->page_order = ilog2(nr_pages);
-       buffer->nr_pages = 1;
-
-       perf_buffer_init(buffer, watermark, flags);
-
-       return buffer;
-
-fail_all_buf:
-       kfree(buffer);
-
-fail:
-       return NULL;
-}
-
-#endif
-
-static unsigned long perf_data_size(struct perf_buffer *buffer)
-{
-       return buffer->nr_pages << (PAGE_SHIFT + page_order(buffer));
-}
-
 static int perf_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 {
        struct perf_event *event = vma->vm_file->private_data;
-       struct perf_buffer *buffer;
+       struct ring_buffer *rb;
        int ret = VM_FAULT_SIGBUS;
 
        if (vmf->flags & FAULT_FLAG_MKWRITE) {
@@ -3623,14 +3430,14 @@ static int perf_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
        }
 
        rcu_read_lock();
-       buffer = rcu_dereference(event->buffer);
-       if (!buffer)
+       rb = rcu_dereference(event->rb);
+       if (!rb)
                goto unlock;
 
        if (vmf->pgoff && (vmf->flags & FAULT_FLAG_WRITE))
                goto unlock;
 
-       vmf->page = perf_mmap_to_page(buffer, vmf->pgoff);
+       vmf->page = perf_mmap_to_page(rb, vmf->pgoff);
        if (!vmf->page)
                goto unlock;
 
@@ -3645,35 +3452,35 @@ unlock:
        return ret;
 }
 
-static void perf_buffer_free_rcu(struct rcu_head *rcu_head)
+static void rb_free_rcu(struct rcu_head *rcu_head)
 {
-       struct perf_buffer *buffer;
+       struct ring_buffer *rb;
 
-       buffer = container_of(rcu_head, struct perf_buffer, rcu_head);
-       perf_buffer_free(buffer);
+       rb = container_of(rcu_head, struct ring_buffer, rcu_head);
+       rb_free(rb);
 }
 
-static struct perf_buffer *perf_buffer_get(struct perf_event *event)
+static struct ring_buffer *ring_buffer_get(struct perf_event *event)
 {
-       struct perf_buffer *buffer;
+       struct ring_buffer *rb;
 
        rcu_read_lock();
-       buffer = rcu_dereference(event->buffer);
-       if (buffer) {
-               if (!atomic_inc_not_zero(&buffer->refcount))
-                       buffer = NULL;
+       rb = rcu_dereference(event->rb);
+       if (rb) {
+               if (!atomic_inc_not_zero(&rb->refcount))
+                       rb = NULL;
        }
        rcu_read_unlock();
 
-       return buffer;
+       return rb;
 }
 
-static void perf_buffer_put(struct perf_buffer *buffer)
+static void ring_buffer_put(struct ring_buffer *rb)
 {
-       if (!atomic_dec_and_test(&buffer->refcount))
+       if (!atomic_dec_and_test(&rb->refcount))
                return;
 
-       call_rcu(&buffer->rcu_head, perf_buffer_free_rcu);
+       call_rcu(&rb->rcu_head, rb_free_rcu);
 }
 
 static void perf_mmap_open(struct vm_area_struct *vma)
@@ -3688,16 +3495,16 @@ static void perf_mmap_close(struct vm_area_struct *vma)
        struct perf_event *event = vma->vm_file->private_data;
 
        if (atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex)) {
-               unsigned long size = perf_data_size(event->buffer);
+               unsigned long size = perf_data_size(event->rb);
                struct user_struct *user = event->mmap_user;
-               struct perf_buffer *buffer = event->buffer;
+               struct ring_buffer *rb = event->rb;
 
                atomic_long_sub((size >> PAGE_SHIFT) + 1, &user->locked_vm);
                vma->vm_mm->locked_vm -= event->mmap_locked;
-               rcu_assign_pointer(event->buffer, NULL);
+               rcu_assign_pointer(event->rb, NULL);
                mutex_unlock(&event->mmap_mutex);
 
-               perf_buffer_put(buffer);
+               ring_buffer_put(rb);
                free_uid(user);
        }
 }
@@ -3715,7 +3522,7 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
        unsigned long user_locked, user_lock_limit;
        struct user_struct *user = current_user();
        unsigned long locked, lock_limit;
-       struct perf_buffer *buffer;
+       struct ring_buffer *rb;
        unsigned long vma_size;
        unsigned long nr_pages;
        long user_extra, extra;
@@ -3724,7 +3531,7 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
        /*
         * Don't allow mmap() of inherited per-task counters. This would
         * create a performance issue due to all children writing to the
-        * same buffer.
+        * same rb.
         */
        if (event->cpu == -1 && event->attr.inherit)
                return -EINVAL;
@@ -3736,7 +3543,7 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
        nr_pages = (vma_size / PAGE_SIZE) - 1;
 
        /*
-        * If we have buffer pages ensure they're a power-of-two number, so we
+        * If we have rb pages ensure they're a power-of-two number, so we
         * can do bitmasks instead of modulo.
         */
        if (nr_pages != 0 && !is_power_of_2(nr_pages))
@@ -3750,9 +3557,9 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
 
        WARN_ON_ONCE(event->ctx->parent_ctx);
        mutex_lock(&event->mmap_mutex);
-       if (event->buffer) {
-               if (event->buffer->nr_pages == nr_pages)
-                       atomic_inc(&event->buffer->refcount);
+       if (event->rb) {
+               if (event->rb->nr_pages == nr_pages)
+                       atomic_inc(&event->rb->refcount);
                else
                        ret = -EINVAL;
                goto unlock;
@@ -3782,18 +3589,20 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
                goto unlock;
        }
 
-       WARN_ON(event->buffer);
+       WARN_ON(event->rb);
 
        if (vma->vm_flags & VM_WRITE)
-               flags |= PERF_BUFFER_WRITABLE;
+               flags |= RING_BUFFER_WRITABLE;
 
-       buffer = perf_buffer_alloc(nr_pages, event->attr.wakeup_watermark,
-                                  event->cpu, flags);
-       if (!buffer) {
+       rb = rb_alloc(nr_pages, 
+               event->attr.watermark ? event->attr.wakeup_watermark : 0,
+               event->cpu, flags);
+
+       if (!rb) {
                ret = -ENOMEM;
                goto unlock;
        }
-       rcu_assign_pointer(event->buffer, buffer);
+       rcu_assign_pointer(event->rb, rb);
 
        atomic_long_add(user_extra, &user->locked_vm);
        event->mmap_locked = extra;
@@ -3892,117 +3701,6 @@ int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *cbs)
 }
 EXPORT_SYMBOL_GPL(perf_unregister_guest_info_callbacks);
 
-/*
- * Output
- */
-static bool perf_output_space(struct perf_buffer *buffer, unsigned long tail,
-                             unsigned long offset, unsigned long head)
-{
-       unsigned long mask;
-
-       if (!buffer->writable)
-               return true;
-
-       mask = perf_data_size(buffer) - 1;
-
-       offset = (offset - tail) & mask;
-       head   = (head   - tail) & mask;
-
-       if ((int)(head - offset) < 0)
-               return false;
-
-       return true;
-}
-
-static void perf_output_wakeup(struct perf_output_handle *handle)
-{
-       atomic_set(&handle->buffer->poll, POLL_IN);
-
-       if (handle->nmi) {
-               handle->event->pending_wakeup = 1;
-               irq_work_queue(&handle->event->pending);
-       } else
-               perf_event_wakeup(handle->event);
-}
-
-/*
- * We need to ensure a later event_id doesn't publish a head when a former
- * event isn't done writing. However since we need to deal with NMIs we
- * cannot fully serialize things.
- *
- * We only publish the head (and generate a wakeup) when the outer-most
- * event completes.
- */
-static void perf_output_get_handle(struct perf_output_handle *handle)
-{
-       struct perf_buffer *buffer = handle->buffer;
-
-       preempt_disable();
-       local_inc(&buffer->nest);
-       handle->wakeup = local_read(&buffer->wakeup);
-}
-
-static void perf_output_put_handle(struct perf_output_handle *handle)
-{
-       struct perf_buffer *buffer = handle->buffer;
-       unsigned long head;
-
-again:
-       head = local_read(&buffer->head);
-
-       /*
-        * IRQ/NMI can happen here, which means we can miss a head update.
-        */
-
-       if (!local_dec_and_test(&buffer->nest))
-               goto out;
-
-       /*
-        * Publish the known good head. Rely on the full barrier implied
-        * by atomic_dec_and_test() order the buffer->head read and this
-        * write.
-        */
-       buffer->user_page->data_head = head;
-
-       /*
-        * Now check if we missed an update, rely on the (compiler)
-        * barrier in atomic_dec_and_test() to re-read buffer->head.
-        */
-       if (unlikely(head != local_read(&buffer->head))) {
-               local_inc(&buffer->nest);
-               goto again;
-       }
-
-       if (handle->wakeup != local_read(&buffer->wakeup))
-               perf_output_wakeup(handle);
-
-out:
-       preempt_enable();
-}
-
-__always_inline void perf_output_copy(struct perf_output_handle *handle,
-                     const void *buf, unsigned int len)
-{
-       do {
-               unsigned long size = min_t(unsigned long, handle->size, len);
-
-               memcpy(handle->addr, buf, size);
-
-               len -= size;
-               handle->addr += size;
-               buf += size;
-               handle->size -= size;
-               if (!handle->size) {
-                       struct perf_buffer *buffer = handle->buffer;
-
-                       handle->page++;
-                       handle->page &= buffer->nr_pages - 1;
-                       handle->addr = buffer->data_pages[handle->page];
-                       handle->size = PAGE_SIZE << page_order(buffer);
-               }
-       } while (len);
-}
-
 static void __perf_event_header__init_id(struct perf_event_header *header,
                                         struct perf_sample_data *data,
                                         struct perf_event *event)
@@ -4033,9 +3731,9 @@ static void __perf_event_header__init_id(struct perf_event_header *header,
        }
 }
 
-static void perf_event_header__init_id(struct perf_event_header *header,
-                                      struct perf_sample_data *data,
-                                      struct perf_event *event)
+void perf_event_header__init_id(struct perf_event_header *header,
+                               struct perf_sample_data *data,
+                               struct perf_event *event)
 {
        if (event->attr.sample_id_all)
                __perf_event_header__init_id(header, data, event);
@@ -4062,121 +3760,14 @@ static void __perf_event__output_id_sample(struct perf_output_handle *handle,
                perf_output_put(handle, data->cpu_entry);
 }
 
-static void perf_event__output_id_sample(struct perf_event *event,
-                                        struct perf_output_handle *handle,
-                                        struct perf_sample_data *sample)
+void perf_event__output_id_sample(struct perf_event *event,
+                                 struct perf_output_handle *handle,
+                                 struct perf_sample_data *sample)
 {
        if (event->attr.sample_id_all)
                __perf_event__output_id_sample(handle, sample);
 }
 
-int perf_output_begin(struct perf_output_handle *handle,
-                     struct perf_event *event, unsigned int size,
-                     int nmi, int sample)
-{
-       struct perf_buffer *buffer;
-       unsigned long tail, offset, head;
-       int have_lost;
-       struct perf_sample_data sample_data;
-       struct {
-               struct perf_event_header header;
-               u64                      id;
-               u64                      lost;
-       } lost_event;
-
-       rcu_read_lock();
-       /*
-        * For inherited events we send all the output towards the parent.
-        */
-       if (event->parent)
-               event = event->parent;
-
-       buffer = rcu_dereference(event->buffer);
-       if (!buffer)
-               goto out;
-
-       handle->buffer  = buffer;
-       handle->event   = event;
-       handle->nmi     = nmi;
-       handle->sample  = sample;
-
-       if (!buffer->nr_pages)
-               goto out;
-
-       have_lost = local_read(&buffer->lost);
-       if (have_lost) {
-               lost_event.header.size = sizeof(lost_event);
-               perf_event_header__init_id(&lost_event.header, &sample_data,
-                                          event);
-               size += lost_event.header.size;
-       }
-
-       perf_output_get_handle(handle);
-
-       do {
-               /*
-                * Userspace could choose to issue a mb() before updating the
-                * tail pointer. So that all reads will be completed before the
-                * write is issued.
-                */
-               tail = ACCESS_ONCE(buffer->user_page->data_tail);
-               smp_rmb();
-               offset = head = local_read(&buffer->head);
-               head += size;
-               if (unlikely(!perf_output_space(buffer, tail, offset, head)))
-                       goto fail;
-       } while (local_cmpxchg(&buffer->head, offset, head) != offset);
-
-       if (head - local_read(&buffer->wakeup) > buffer->watermark)
-               local_add(buffer->watermark, &buffer->wakeup);
-
-       handle->page = offset >> (PAGE_SHIFT + page_order(buffer));
-       handle->page &= buffer->nr_pages - 1;
-       handle->size = offset & ((PAGE_SIZE << page_order(buffer)) - 1);
-       handle->addr = buffer->data_pages[handle->page];
-       handle->addr += handle->size;
-       handle->size = (PAGE_SIZE << page_order(buffer)) - handle->size;
-
-       if (have_lost) {
-               lost_event.header.type = PERF_RECORD_LOST;
-               lost_event.header.misc = 0;
-               lost_event.id          = event->id;
-               lost_event.lost        = local_xchg(&buffer->lost, 0);
-
-               perf_output_put(handle, lost_event);
-               perf_event__output_id_sample(event, handle, &sample_data);
-       }
-
-       return 0;
-
-fail:
-       local_inc(&buffer->lost);
-       perf_output_put_handle(handle);
-out:
-       rcu_read_unlock();
-
-       return -ENOSPC;
-}
-
-void perf_output_end(struct perf_output_handle *handle)
-{
-       struct perf_event *event = handle->event;
-       struct perf_buffer *buffer = handle->buffer;
-
-       int wakeup_events = event->attr.wakeup_events;
-
-       if (handle->sample && wakeup_events) {
-               int events = local_inc_return(&buffer->events);
-               if (events >= wakeup_events) {
-                       local_sub(wakeup_events, &buffer->events);
-                       local_inc(&buffer->wakeup);
-               }
-       }
-
-       perf_output_put_handle(handle);
-       rcu_read_unlock();
-}
-
 static void perf_output_read_one(struct perf_output_handle *handle,
                                 struct perf_event *event,
                                 u64 enabled, u64 running)
@@ -4197,7 +3788,7 @@ static void perf_output_read_one(struct perf_output_handle *handle,
        if (read_format & PERF_FORMAT_ID)
                values[n++] = primary_event_id(event);
 
-       perf_output_copy(handle, values, n * sizeof(u64));
+       __output_copy(handle, values, n * sizeof(u64));
 }
 
 /*
@@ -4227,7 +3818,7 @@ static void perf_output_read_group(struct perf_output_handle *handle,
        if (read_format & PERF_FORMAT_ID)
                values[n++] = primary_event_id(leader);
 
-       perf_output_copy(handle, values, n * sizeof(u64));
+       __output_copy(handle, values, n * sizeof(u64));
 
        list_for_each_entry(sub, &leader->sibling_list, group_entry) {
                n = 0;
@@ -4239,7 +3830,7 @@ static void perf_output_read_group(struct perf_output_handle *handle,
                if (read_format & PERF_FORMAT_ID)
                        values[n++] = primary_event_id(sub);
 
-               perf_output_copy(handle, values, n * sizeof(u64));
+               __output_copy(handle, values, n * sizeof(u64));
        }
 }
 
@@ -4249,7 +3840,7 @@ static void perf_output_read_group(struct perf_output_handle *handle,
 static void perf_output_read(struct perf_output_handle *handle,
                             struct perf_event *event)
 {
-       u64 enabled = 0, running = 0, now, ctx_time;
+       u64 enabled = 0, running = 0;
        u64 read_format = event->attr.read_format;
 
        /*
@@ -4261,12 +3852,8 @@ static void perf_output_read(struct perf_output_handle *handle,
         * because of locking issue as we are called in
         * NMI context
         */
-       if (read_format & PERF_FORMAT_TOTAL_TIMES) {
-               now = perf_clock();
-               ctx_time = event->shadow_ctx_time + now;
-               enabled = ctx_time - event->tstamp_enabled;
-               running = ctx_time - event->tstamp_running;
-       }
+       if (read_format & PERF_FORMAT_TOTAL_TIMES)
+               calc_timer_values(event, &enabled, &running);
 
        if (event->attr.read_format & PERF_FORMAT_GROUP)
                perf_output_read_group(handle, event, enabled, running);
@@ -4319,7 +3906,7 @@ void perf_output_sample(struct perf_output_handle *handle,
 
                        size *= sizeof(u64);
 
-                       perf_output_copy(handle, data->callchain, size);
+                       __output_copy(handle, data->callchain, size);
                } else {
                        u64 nr = 0;
                        perf_output_put(handle, nr);
@@ -4329,8 +3916,8 @@ void perf_output_sample(struct perf_output_handle *handle,
        if (sample_type & PERF_SAMPLE_RAW) {
                if (data->raw) {
                        perf_output_put(handle, data->raw->size);
-                       perf_output_copy(handle, data->raw->data,
-                                        data->raw->size);
+                       __output_copy(handle, data->raw->data,
+                                          data->raw->size);
                } else {
                        struct {
                                u32     size;
@@ -4342,6 +3929,20 @@ void perf_output_sample(struct perf_output_handle *handle,
                        perf_output_put(handle, raw);
                }
        }
+
+       if (!event->attr.watermark) {
+               int wakeup_events = event->attr.wakeup_events;
+
+               if (wakeup_events) {
+                       struct ring_buffer *rb = handle->rb;
+                       int events = local_inc_return(&rb->events);
+
+                       if (events >= wakeup_events) {
+                               local_sub(wakeup_events, &rb->events);
+                               local_inc(&rb->wakeup);
+                       }
+               }
+       }
 }
 
 void perf_prepare_sample(struct perf_event_header *header,
@@ -4386,7 +3987,7 @@ void perf_prepare_sample(struct perf_event_header *header,
        }
 }
 
-static void perf_event_output(struct perf_event *event, int nmi,
+static void perf_event_output(struct perf_event *event,
                                struct perf_sample_data *data,
                                struct pt_regs *regs)
 {
@@ -4398,7 +3999,7 @@ static void perf_event_output(struct perf_event *event, int nmi,
 
        perf_prepare_sample(&header, data, event, regs);
 
-       if (perf_output_begin(&handle, event, header.size, nmi, 1))
+       if (perf_output_begin(&handle, event, header.size))
                goto exit;
 
        perf_output_sample(&handle, &header, data, event);
@@ -4438,7 +4039,7 @@ perf_event_read_event(struct perf_event *event,
        int ret;
 
        perf_event_header__init_id(&read_event.header, &sample, event);
-       ret = perf_output_begin(&handle, event, read_event.header.size, 0, 0);
+       ret = perf_output_begin(&handle, event, read_event.header.size);
        if (ret)
                return;
 
@@ -4481,7 +4082,7 @@ static void perf_event_task_output(struct perf_event *event,
        perf_event_header__init_id(&task_event->event_id.header, &sample, event);
 
        ret = perf_output_begin(&handle, event,
-                               task_event->event_id.header.size, 0, 0);
+                               task_event->event_id.header.size);
        if (ret)
                goto out;
 
@@ -4618,7 +4219,7 @@ static void perf_event_comm_output(struct perf_event *event,
 
        perf_event_header__init_id(&comm_event->event_id.header, &sample, event);
        ret = perf_output_begin(&handle, event,
-                               comm_event->event_id.header.size, 0, 0);
+                               comm_event->event_id.header.size);
 
        if (ret)
                goto out;
@@ -4627,7 +4228,7 @@ static void perf_event_comm_output(struct perf_event *event,
        comm_event->event_id.tid = perf_event_tid(event, comm_event->task);
 
        perf_output_put(&handle, comm_event->event_id);
-       perf_output_copy(&handle, comm_event->comm,
+       __output_copy(&handle, comm_event->comm,
                                   comm_event->comm_size);
 
        perf_event__output_id_sample(event, &handle, &sample);
@@ -4765,7 +4366,7 @@ static void perf_event_mmap_output(struct perf_event *event,
 
        perf_event_header__init_id(&mmap_event->event_id.header, &sample, event);
        ret = perf_output_begin(&handle, event,
-                               mmap_event->event_id.header.size, 0, 0);
+                               mmap_event->event_id.header.size);
        if (ret)
                goto out;
 
@@ -4773,7 +4374,7 @@ static void perf_event_mmap_output(struct perf_event *event,
        mmap_event->event_id.tid = perf_event_tid(event, current);
 
        perf_output_put(&handle, mmap_event->event_id);
-       perf_output_copy(&handle, mmap_event->file_name,
+       __output_copy(&handle, mmap_event->file_name,
                                   mmap_event->file_size);
 
        perf_event__output_id_sample(event, &handle, &sample);
@@ -4829,7 +4430,7 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event)
 
        if (file) {
                /*
-                * d_path works from the end of the buffer backwards, so we
+                * d_path works from the end of the rb backwards, so we
                 * need to add enough zero bytes after the string to handle
                 * the 64bit alignment we do later.
                 */
@@ -4960,7 +4561,7 @@ static void perf_log_throttle(struct perf_event *event, int enable)
        perf_event_header__init_id(&throttle_event.header, &sample, event);
 
        ret = perf_output_begin(&handle, event,
-                               throttle_event.header.size, 1, 0);
+                               throttle_event.header.size);
        if (ret)
                return;
 
@@ -4973,7 +4574,7 @@ static void perf_log_throttle(struct perf_event *event, int enable)
  * Generic event overflow handling, sampling.
  */
 
-static int __perf_event_overflow(struct perf_event *event, int nmi,
+static int __perf_event_overflow(struct perf_event *event,
                                   int throttle, struct perf_sample_data *data,
                                   struct pt_regs *regs)
 {
@@ -5016,34 +4617,28 @@ static int __perf_event_overflow(struct perf_event *event, int nmi,
        if (events && atomic_dec_and_test(&event->event_limit)) {
                ret = 1;
                event->pending_kill = POLL_HUP;
-               if (nmi) {
-                       event->pending_disable = 1;
-                       irq_work_queue(&event->pending);
-               } else
-                       perf_event_disable(event);
+               event->pending_disable = 1;
+               irq_work_queue(&event->pending);
        }
 
        if (event->overflow_handler)
-               event->overflow_handler(event, nmi, data, regs);
+               event->overflow_handler(event, data, regs);
        else
-               perf_event_output(event, nmi, data, regs);
+               perf_event_output(event, data, regs);
 
        if (event->fasync && event->pending_kill) {
-               if (nmi) {
-                       event->pending_wakeup = 1;
-                       irq_work_queue(&event->pending);
-               } else
-                       perf_event_wakeup(event);
+               event->pending_wakeup = 1;
+               irq_work_queue(&event->pending);
        }
 
        return ret;
 }
 
-int perf_event_overflow(struct perf_event *event, int nmi,
+int perf_event_overflow(struct perf_event *event,
                          struct perf_sample_data *data,
                          struct pt_regs *regs)
 {
-       return __perf_event_overflow(event, nmi, 1, data, regs);
+       return __perf_event_overflow(event, 1, data, regs);
 }
 
 /*
@@ -5092,7 +4687,7 @@ again:
 }
 
 static void perf_swevent_overflow(struct perf_event *event, u64 overflow,
-                                   int nmi, struct perf_sample_data *data,
+                                   struct perf_sample_data *data,
                                    struct pt_regs *regs)
 {
        struct hw_perf_event *hwc = &event->hw;
@@ -5106,7 +4701,7 @@ static void perf_swevent_overflow(struct perf_event *event, u64 overflow,
                return;
 
        for (; overflow; overflow--) {
-               if (__perf_event_overflow(event, nmi, throttle,
+               if (__perf_event_overflow(event, throttle,
                                            data, regs)) {
                        /*
                         * We inhibit the overflow from happening when
@@ -5119,7 +4714,7 @@ static void perf_swevent_overflow(struct perf_event *event, u64 overflow,
 }
 
 static void perf_swevent_event(struct perf_event *event, u64 nr,
-                              int nmi, struct perf_sample_data *data,
+                              struct perf_sample_data *data,
                               struct pt_regs *regs)
 {
        struct hw_perf_event *hwc = &event->hw;
@@ -5133,12 +4728,12 @@ static void perf_swevent_event(struct perf_event *event, u64 nr,
                return;
 
        if (nr == 1 && hwc->sample_period == 1 && !event->attr.freq)
-               return perf_swevent_overflow(event, 1, nmi, data, regs);
+               return perf_swevent_overflow(event, 1, data, regs);
 
        if (local64_add_negative(nr, &hwc->period_left))
                return;
 
-       perf_swevent_overflow(event, 0, nmi, data, regs);
+       perf_swevent_overflow(event, 0, data, regs);
 }
 
 static int perf_exclude_event(struct perf_event *event,
@@ -5226,7 +4821,7 @@ find_swevent_head(struct swevent_htable *swhash, struct perf_event *event)
 }
 
 static void do_perf_sw_event(enum perf_type_id type, u32 event_id,
-                                   u64 nr, int nmi,
+                                   u64 nr,
                                    struct perf_sample_data *data,
                                    struct pt_regs *regs)
 {
@@ -5242,7 +4837,7 @@ static void do_perf_sw_event(enum perf_type_id type, u32 event_id,
 
        hlist_for_each_entry_rcu(event, node, head, hlist_entry) {
                if (perf_swevent_match(event, type, event_id, data, regs))
-                       perf_swevent_event(event, nr, nmi, data, regs);
+                       perf_swevent_event(event, nr, data, regs);
        }
 end:
        rcu_read_unlock();
@@ -5263,8 +4858,7 @@ inline void perf_swevent_put_recursion_context(int rctx)
        put_recursion_context(swhash->recursion, rctx);
 }
 
-void __perf_sw_event(u32 event_id, u64 nr, int nmi,
-                           struct pt_regs *regs, u64 addr)
+void __perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr)
 {
        struct perf_sample_data data;
        int rctx;
@@ -5276,7 +4870,7 @@ void __perf_sw_event(u32 event_id, u64 nr, int nmi,
 
        perf_sample_data_init(&data, addr);
 
-       do_perf_sw_event(PERF_TYPE_SOFTWARE, event_id, nr, nmi, &data, regs);
+       do_perf_sw_event(PERF_TYPE_SOFTWARE, event_id, nr, &data, regs);
 
        perf_swevent_put_recursion_context(rctx);
        preempt_enable_notrace();
@@ -5524,7 +5118,7 @@ void perf_tp_event(u64 addr, u64 count, void *record, int entry_size,
 
        hlist_for_each_entry_rcu(event, node, head, hlist_entry) {
                if (perf_tp_event_match(event, &data, regs))
-                       perf_swevent_event(event, count, 1, &data, regs);
+                       perf_swevent_event(event, count, &data, regs);
        }
 
        perf_swevent_put_recursion_context(rctx);
@@ -5617,7 +5211,7 @@ void perf_bp_event(struct perf_event *bp, void *data)
        perf_sample_data_init(&sample, bp->attr.bp_addr);
 
        if (!bp->hw.state && !perf_exclude_event(bp, regs))
-               perf_swevent_event(bp, 1, 1, &sample, regs);
+               perf_swevent_event(bp, 1, &sample, regs);
 }
 #endif
 
@@ -5646,7 +5240,7 @@ static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer)
 
        if (regs && !perf_exclude_event(event, regs)) {
                if (!(event->attr.exclude_idle && current->pid == 0))
-                       if (perf_event_overflow(event, 0, &data, regs))
+                       if (perf_event_overflow(event, &data, regs))
                                ret = HRTIMER_NORESTART;
        }
 
@@ -5986,6 +5580,7 @@ free_dev:
 }
 
 static struct lock_class_key cpuctx_mutex;
+static struct lock_class_key cpuctx_lock;
 
 int perf_pmu_register(struct pmu *pmu, char *name, int type)
 {
@@ -6036,6 +5631,7 @@ skip_type:
                cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu);
                __perf_event_init_context(&cpuctx->ctx);
                lockdep_set_class(&cpuctx->ctx.mutex, &cpuctx_mutex);
+               lockdep_set_class(&cpuctx->ctx.lock, &cpuctx_lock);
                cpuctx->ctx.type = cpu_context;
                cpuctx->ctx.pmu = pmu;
                cpuctx->jiffies_interval = 1;
@@ -6150,7 +5746,8 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
                 struct task_struct *task,
                 struct perf_event *group_leader,
                 struct perf_event *parent_event,
-                perf_overflow_handler_t overflow_handler)
+                perf_overflow_handler_t overflow_handler,
+                void *context)
 {
        struct pmu *pmu;
        struct perf_event *event;
@@ -6208,10 +5805,13 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
 #endif
        }
 
-       if (!overflow_handler && parent_event)
+       if (!overflow_handler && parent_event) {
                overflow_handler = parent_event->overflow_handler;
+               context = parent_event->overflow_handler_context;
+       }
 
        event->overflow_handler = overflow_handler;
+       event->overflow_handler_context = context;
 
        if (attr->disabled)
                event->state = PERF_EVENT_STATE_OFF;
@@ -6326,13 +5926,6 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr,
        if (ret)
                return -EFAULT;
 
-       /*
-        * If the type exists, the corresponding creation will verify
-        * the attr->config.
-        */
-       if (attr->type >= PERF_TYPE_MAX)
-               return -EINVAL;
-
        if (attr->__reserved_1)
                return -EINVAL;
 
@@ -6354,7 +5947,7 @@ err_size:
 static int
 perf_event_set_output(struct perf_event *event, struct perf_event *output_event)
 {
-       struct perf_buffer *buffer = NULL, *old_buffer = NULL;
+       struct ring_buffer *rb = NULL, *old_rb = NULL;
        int ret = -EINVAL;
 
        if (!output_event)
@@ -6371,7 +5964,7 @@ perf_event_set_output(struct perf_event *event, struct perf_event *output_event)
                goto out;
 
        /*
-        * If its not a per-cpu buffer, it must be the same task.
+        * If its not a per-cpu rb, it must be the same task.
         */
        if (output_event->cpu == -1 && output_event->ctx != event->ctx)
                goto out;
@@ -6383,20 +5976,20 @@ set:
                goto unlock;
 
        if (output_event) {
-               /* get the buffer we want to redirect to */
-               buffer = perf_buffer_get(output_event);
-               if (!buffer)
+               /* get the rb we want to redirect to */
+               rb = ring_buffer_get(output_event);
+               if (!rb)
                        goto unlock;
        }
 
-       old_buffer = event->buffer;
-       rcu_assign_pointer(event->buffer, buffer);
+       old_rb = event->rb;
+       rcu_assign_pointer(event->rb, rb);
        ret = 0;
 unlock:
        mutex_unlock(&event->mmap_mutex);
 
-       if (old_buffer)
-               perf_buffer_put(old_buffer);
+       if (old_rb)
+               ring_buffer_put(old_rb);
 out:
        return ret;
 }
@@ -6478,7 +6071,8 @@ SYSCALL_DEFINE5(perf_event_open,
                }
        }
 
-       event = perf_event_alloc(&attr, cpu, task, group_leader, NULL, NULL);
+       event = perf_event_alloc(&attr, cpu, task, group_leader, NULL,
+                                NULL, NULL);
        if (IS_ERR(event)) {
                err = PTR_ERR(event);
                goto err_task;
@@ -6663,7 +6257,8 @@ err_fd:
 struct perf_event *
 perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
                                 struct task_struct *task,
-                                perf_overflow_handler_t overflow_handler)
+                                perf_overflow_handler_t overflow_handler,
+                                void *context)
 {
        struct perf_event_context *ctx;
        struct perf_event *event;
@@ -6673,7 +6268,8 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
         * Get the target context (task or percpu):
         */
 
-       event = perf_event_alloc(attr, cpu, task, NULL, NULL, overflow_handler);
+       event = perf_event_alloc(attr, cpu, task, NULL, NULL,
+                                overflow_handler, context);
        if (IS_ERR(event)) {
                err = PTR_ERR(event);
                goto err;
@@ -6780,7 +6376,6 @@ static void perf_event_exit_task_context(struct task_struct *child, int ctxn)
         * our context.
         */
        child_ctx = rcu_dereference_raw(child->perf_event_ctxp[ctxn]);
-       task_ctx_sched_out(child_ctx, EVENT_ALL);
 
        /*
         * Take the context lock here so that if find_get_context is
@@ -6788,6 +6383,7 @@ static void perf_event_exit_task_context(struct task_struct *child, int ctxn)
         * incremented the context's refcount before we do put_ctx below.
         */
        raw_spin_lock(&child_ctx->lock);
+       task_ctx_sched_out(child_ctx);
        child->perf_event_ctxp[ctxn] = NULL;
        /*
         * If this context is a clone; unclone it so it can't get
@@ -6957,7 +6553,7 @@ inherit_event(struct perf_event *parent_event,
                                           parent_event->cpu,
                                           child,
                                           group_leader, parent_event,
-                                          NULL);
+                                          NULL, NULL);
        if (IS_ERR(child_event))
                return child_event;
        get_ctx(child_ctx);
@@ -6984,6 +6580,8 @@ inherit_event(struct perf_event *parent_event,
 
        child_event->ctx = child_ctx;
        child_event->overflow_handler = parent_event->overflow_handler;
+       child_event->overflow_handler_context
+               = parent_event->overflow_handler_context;
 
        /*
         * Precalculate sample_data sizes
index 086adf25a55e3aaecf3eb3172a7569b2c1a209e0..b7971d6f38bf191d885a821fcce5a0d411869511 100644 (file)
@@ -431,9 +431,11 @@ int register_perf_hw_breakpoint(struct perf_event *bp)
 struct perf_event *
 register_user_hw_breakpoint(struct perf_event_attr *attr,
                            perf_overflow_handler_t triggered,
+                           void *context,
                            struct task_struct *tsk)
 {
-       return perf_event_create_kernel_counter(attr, -1, tsk, triggered);
+       return perf_event_create_kernel_counter(attr, -1, tsk, triggered,
+                                               context);
 }
 EXPORT_SYMBOL_GPL(register_user_hw_breakpoint);
 
@@ -502,7 +504,8 @@ EXPORT_SYMBOL_GPL(unregister_hw_breakpoint);
  */
 struct perf_event * __percpu *
 register_wide_hw_breakpoint(struct perf_event_attr *attr,
-                           perf_overflow_handler_t triggered)
+                           perf_overflow_handler_t triggered,
+                           void *context)
 {
        struct perf_event * __percpu *cpu_events, **pevent, *bp;
        long err;
@@ -515,7 +518,8 @@ register_wide_hw_breakpoint(struct perf_event_attr *attr,
        get_online_cpus();
        for_each_online_cpu(cpu) {
                pevent = per_cpu_ptr(cpu_events, cpu);
-               bp = perf_event_create_kernel_counter(attr, cpu, NULL, triggered);
+               bp = perf_event_create_kernel_counter(attr, cpu, NULL,
+                                                     triggered, context);
 
                *pevent = bp;
 
diff --git a/kernel/events/internal.h b/kernel/events/internal.h
new file mode 100644 (file)
index 0000000..09097dd
--- /dev/null
@@ -0,0 +1,96 @@
+#ifndef _KERNEL_EVENTS_INTERNAL_H
+#define _KERNEL_EVENTS_INTERNAL_H
+
+#define RING_BUFFER_WRITABLE           0x01
+
+struct ring_buffer {
+       atomic_t                        refcount;
+       struct rcu_head                 rcu_head;
+#ifdef CONFIG_PERF_USE_VMALLOC
+       struct work_struct              work;
+       int                             page_order;     /* allocation order  */
+#endif
+       int                             nr_pages;       /* nr of data pages  */
+       int                             writable;       /* are we writable   */
+
+       atomic_t                        poll;           /* POLL_ for wakeups */
+
+       local_t                         head;           /* write position    */
+       local_t                         nest;           /* nested writers    */
+       local_t                         events;         /* event limit       */
+       local_t                         wakeup;         /* wakeup stamp      */
+       local_t                         lost;           /* nr records lost   */
+
+       long                            watermark;      /* wakeup watermark  */
+
+       struct perf_event_mmap_page     *user_page;
+       void                            *data_pages[0];
+};
+
+extern void rb_free(struct ring_buffer *rb);
+extern struct ring_buffer *
+rb_alloc(int nr_pages, long watermark, int cpu, int flags);
+extern void perf_event_wakeup(struct perf_event *event);
+
+extern void
+perf_event_header__init_id(struct perf_event_header *header,
+                          struct perf_sample_data *data,
+                          struct perf_event *event);
+extern void
+perf_event__output_id_sample(struct perf_event *event,
+                            struct perf_output_handle *handle,
+                            struct perf_sample_data *sample);
+
+extern struct page *
+perf_mmap_to_page(struct ring_buffer *rb, unsigned long pgoff);
+
+#ifdef CONFIG_PERF_USE_VMALLOC
+/*
+ * Back perf_mmap() with vmalloc memory.
+ *
+ * Required for architectures that have d-cache aliasing issues.
+ */
+
+static inline int page_order(struct ring_buffer *rb)
+{
+       return rb->page_order;
+}
+
+#else
+
+static inline int page_order(struct ring_buffer *rb)
+{
+       return 0;
+}
+#endif
+
+static unsigned long perf_data_size(struct ring_buffer *rb)
+{
+       return rb->nr_pages << (PAGE_SHIFT + page_order(rb));
+}
+
+static inline void
+__output_copy(struct perf_output_handle *handle,
+                  const void *buf, unsigned int len)
+{
+       do {
+               unsigned long size = min_t(unsigned long, handle->size, len);
+
+               memcpy(handle->addr, buf, size);
+
+               len -= size;
+               handle->addr += size;
+               buf += size;
+               handle->size -= size;
+               if (!handle->size) {
+                       struct ring_buffer *rb = handle->rb;
+
+                       handle->page++;
+                       handle->page &= rb->nr_pages - 1;
+                       handle->addr = rb->data_pages[handle->page];
+                       handle->size = PAGE_SIZE << page_order(rb);
+               }
+       } while (len);
+}
+
+#endif /* _KERNEL_EVENTS_INTERNAL_H */
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
new file mode 100644 (file)
index 0000000..a2a2920
--- /dev/null
@@ -0,0 +1,380 @@
+/*
+ * Performance events ring-buffer code:
+ *
+ *  Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
+ *  Copyright (C) 2008-2011 Red Hat, Inc., Ingo Molnar
+ *  Copyright (C) 2008-2011 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
+ *  Copyright  Â©  2009 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
+ *
+ * For licensing details see kernel-base/COPYING
+ */
+
+#include <linux/perf_event.h>
+#include <linux/vmalloc.h>
+#include <linux/slab.h>
+
+#include "internal.h"
+
+static bool perf_output_space(struct ring_buffer *rb, unsigned long tail,
+                             unsigned long offset, unsigned long head)
+{
+       unsigned long mask;
+
+       if (!rb->writable)
+               return true;
+
+       mask = perf_data_size(rb) - 1;
+
+       offset = (offset - tail) & mask;
+       head   = (head   - tail) & mask;
+
+       if ((int)(head - offset) < 0)
+               return false;
+
+       return true;
+}
+
+static void perf_output_wakeup(struct perf_output_handle *handle)
+{
+       atomic_set(&handle->rb->poll, POLL_IN);
+
+       handle->event->pending_wakeup = 1;
+       irq_work_queue(&handle->event->pending);
+}
+
+/*
+ * We need to ensure a later event_id doesn't publish a head when a former
+ * event isn't done writing. However since we need to deal with NMIs we
+ * cannot fully serialize things.
+ *
+ * We only publish the head (and generate a wakeup) when the outer-most
+ * event completes.
+ */
+static void perf_output_get_handle(struct perf_output_handle *handle)
+{
+       struct ring_buffer *rb = handle->rb;
+
+       preempt_disable();
+       local_inc(&rb->nest);
+       handle->wakeup = local_read(&rb->wakeup);
+}
+
+static void perf_output_put_handle(struct perf_output_handle *handle)
+{
+       struct ring_buffer *rb = handle->rb;
+       unsigned long head;
+
+again:
+       head = local_read(&rb->head);
+
+       /*
+        * IRQ/NMI can happen here, which means we can miss a head update.
+        */
+
+       if (!local_dec_and_test(&rb->nest))
+               goto out;
+
+       /*
+        * Publish the known good head. Rely on the full barrier implied
+        * by atomic_dec_and_test() order the rb->head read and this
+        * write.
+        */
+       rb->user_page->data_head = head;
+
+       /*
+        * Now check if we missed an update, rely on the (compiler)
+        * barrier in atomic_dec_and_test() to re-read rb->head.
+        */
+       if (unlikely(head != local_read(&rb->head))) {
+               local_inc(&rb->nest);
+               goto again;
+       }
+
+       if (handle->wakeup != local_read(&rb->wakeup))
+               perf_output_wakeup(handle);
+
+out:
+       preempt_enable();
+}
+
+int perf_output_begin(struct perf_output_handle *handle,
+                     struct perf_event *event, unsigned int size)
+{
+       struct ring_buffer *rb;
+       unsigned long tail, offset, head;
+       int have_lost;
+       struct perf_sample_data sample_data;
+       struct {
+               struct perf_event_header header;
+               u64                      id;
+               u64                      lost;
+       } lost_event;
+
+       rcu_read_lock();
+       /*
+        * For inherited events we send all the output towards the parent.
+        */
+       if (event->parent)
+               event = event->parent;
+
+       rb = rcu_dereference(event->rb);
+       if (!rb)
+               goto out;
+
+       handle->rb      = rb;
+       handle->event   = event;
+
+       if (!rb->nr_pages)
+               goto out;
+
+       have_lost = local_read(&rb->lost);
+       if (have_lost) {
+               lost_event.header.size = sizeof(lost_event);
+               perf_event_header__init_id(&lost_event.header, &sample_data,
+                                          event);
+               size += lost_event.header.size;
+       }
+
+       perf_output_get_handle(handle);
+
+       do {
+               /*
+                * Userspace could choose to issue a mb() before updating the
+                * tail pointer. So that all reads will be completed before the
+                * write is issued.
+                */
+               tail = ACCESS_ONCE(rb->user_page->data_tail);
+               smp_rmb();
+               offset = head = local_read(&rb->head);
+               head += size;
+               if (unlikely(!perf_output_space(rb, tail, offset, head)))
+                       goto fail;
+       } while (local_cmpxchg(&rb->head, offset, head) != offset);
+
+       if (head - local_read(&rb->wakeup) > rb->watermark)
+               local_add(rb->watermark, &rb->wakeup);
+
+       handle->page = offset >> (PAGE_SHIFT + page_order(rb));
+       handle->page &= rb->nr_pages - 1;
+       handle->size = offset & ((PAGE_SIZE << page_order(rb)) - 1);
+       handle->addr = rb->data_pages[handle->page];
+       handle->addr += handle->size;
+       handle->size = (PAGE_SIZE << page_order(rb)) - handle->size;
+
+       if (have_lost) {
+               lost_event.header.type = PERF_RECORD_LOST;
+               lost_event.header.misc = 0;
+               lost_event.id          = event->id;
+               lost_event.lost        = local_xchg(&rb->lost, 0);
+
+               perf_output_put(handle, lost_event);
+               perf_event__output_id_sample(event, handle, &sample_data);
+       }
+
+       return 0;
+
+fail:
+       local_inc(&rb->lost);
+       perf_output_put_handle(handle);
+out:
+       rcu_read_unlock();
+
+       return -ENOSPC;
+}
+
+void perf_output_copy(struct perf_output_handle *handle,
+                     const void *buf, unsigned int len)
+{
+       __output_copy(handle, buf, len);
+}
+
+void perf_output_end(struct perf_output_handle *handle)
+{
+       perf_output_put_handle(handle);
+       rcu_read_unlock();
+}
+
+static void
+ring_buffer_init(struct ring_buffer *rb, long watermark, int flags)
+{
+       long max_size = perf_data_size(rb);
+
+       if (watermark)
+               rb->watermark = min(max_size, watermark);
+
+       if (!rb->watermark)
+               rb->watermark = max_size / 2;
+
+       if (flags & RING_BUFFER_WRITABLE)
+               rb->writable = 1;
+
+       atomic_set(&rb->refcount, 1);
+}
+
+#ifndef CONFIG_PERF_USE_VMALLOC
+
+/*
+ * Back perf_mmap() with regular GFP_KERNEL-0 pages.
+ */
+
+struct page *
+perf_mmap_to_page(struct ring_buffer *rb, unsigned long pgoff)
+{
+       if (pgoff > rb->nr_pages)
+               return NULL;
+
+       if (pgoff == 0)
+               return virt_to_page(rb->user_page);
+
+       return virt_to_page(rb->data_pages[pgoff - 1]);
+}
+
+static void *perf_mmap_alloc_page(int cpu)
+{
+       struct page *page;
+       int node;
+
+       node = (cpu == -1) ? cpu : cpu_to_node(cpu);
+       page = alloc_pages_node(node, GFP_KERNEL | __GFP_ZERO, 0);
+       if (!page)
+               return NULL;
+
+       return page_address(page);
+}
+
+struct ring_buffer *rb_alloc(int nr_pages, long watermark, int cpu, int flags)
+{
+       struct ring_buffer *rb;
+       unsigned long size;
+       int i;
+
+       size = sizeof(struct ring_buffer);
+       size += nr_pages * sizeof(void *);
+
+       rb = kzalloc(size, GFP_KERNEL);
+       if (!rb)
+               goto fail;
+
+       rb->user_page = perf_mmap_alloc_page(cpu);
+       if (!rb->user_page)
+               goto fail_user_page;
+
+       for (i = 0; i < nr_pages; i++) {
+               rb->data_pages[i] = perf_mmap_alloc_page(cpu);
+               if (!rb->data_pages[i])
+                       goto fail_data_pages;
+       }
+
+       rb->nr_pages = nr_pages;
+
+       ring_buffer_init(rb, watermark, flags);
+
+       return rb;
+
+fail_data_pages:
+       for (i--; i >= 0; i--)
+               free_page((unsigned long)rb->data_pages[i]);
+
+       free_page((unsigned long)rb->user_page);
+
+fail_user_page:
+       kfree(rb);
+
+fail:
+       return NULL;
+}
+
+static void perf_mmap_free_page(unsigned long addr)
+{
+       struct page *page = virt_to_page((void *)addr);
+
+       page->mapping = NULL;
+       __free_page(page);
+}
+
+void rb_free(struct ring_buffer *rb)
+{
+       int i;
+
+       perf_mmap_free_page((unsigned long)rb->user_page);
+       for (i = 0; i < rb->nr_pages; i++)
+               perf_mmap_free_page((unsigned long)rb->data_pages[i]);
+       kfree(rb);
+}
+
+#else
+
+struct page *
+perf_mmap_to_page(struct ring_buffer *rb, unsigned long pgoff)
+{
+       if (pgoff > (1UL << page_order(rb)))
+               return NULL;
+
+       return vmalloc_to_page((void *)rb->user_page + pgoff * PAGE_SIZE);
+}
+
+static void perf_mmap_unmark_page(void *addr)
+{
+       struct page *page = vmalloc_to_page(addr);
+
+       page->mapping = NULL;
+}
+
+static void rb_free_work(struct work_struct *work)
+{
+       struct ring_buffer *rb;
+       void *base;
+       int i, nr;
+
+       rb = container_of(work, struct ring_buffer, work);
+       nr = 1 << page_order(rb);
+
+       base = rb->user_page;
+       for (i = 0; i < nr + 1; i++)
+               perf_mmap_unmark_page(base + (i * PAGE_SIZE));
+
+       vfree(base);
+       kfree(rb);
+}
+
+void rb_free(struct ring_buffer *rb)
+{
+       schedule_work(&rb->work);
+}
+
+struct ring_buffer *rb_alloc(int nr_pages, long watermark, int cpu, int flags)
+{
+       struct ring_buffer *rb;
+       unsigned long size;
+       void *all_buf;
+
+       size = sizeof(struct ring_buffer);
+       size += sizeof(void *);
+
+       rb = kzalloc(size, GFP_KERNEL);
+       if (!rb)
+               goto fail;
+
+       INIT_WORK(&rb->work, rb_free_work);
+
+       all_buf = vmalloc_user((nr_pages + 1) * PAGE_SIZE);
+       if (!all_buf)
+               goto fail_all_buf;
+
+       rb->user_page = all_buf;
+       rb->data_pages[0] = all_buf + PAGE_SIZE;
+       rb->page_order = ilog2(nr_pages);
+       rb->nr_pages = 1;
+
+       ring_buffer_init(rb, watermark, flags);
+
+       return rb;
+
+fail_all_buf:
+       kfree(rb);
+
+fail:
+       return NULL;
+}
+
+#endif
index 77981813a1e75d6c3c830dac5084bc37e47c1080..b30fd54eb985a85322765c99d64f06437c4f0499 100644 (file)
@@ -1255,19 +1255,29 @@ static int __kprobes in_kprobes_functions(unsigned long addr)
 /*
  * If we have a symbol_name argument, look it up and add the offset field
  * to it. This way, we can specify a relative address to a symbol.
+ * This returns encoded errors if it fails to look up symbol or invalid
+ * combination of parameters.
  */
 static kprobe_opcode_t __kprobes *kprobe_addr(struct kprobe *p)
 {
        kprobe_opcode_t *addr = p->addr;
+
+       if ((p->symbol_name && p->addr) ||
+           (!p->symbol_name && !p->addr))
+               goto invalid;
+
        if (p->symbol_name) {
-               if (addr)
-                       return NULL;
                kprobe_lookup_name(p->symbol_name, addr);
+               if (!addr)
+                       return ERR_PTR(-ENOENT);
        }
 
-       if (!addr)
-               return NULL;
-       return (kprobe_opcode_t *)(((char *)addr) + p->offset);
+       addr = (kprobe_opcode_t *)(((char *)addr) + p->offset);
+       if (addr)
+               return addr;
+
+invalid:
+       return ERR_PTR(-EINVAL);
 }
 
 /* Check passed kprobe is valid and return kprobe in kprobe_table. */
@@ -1311,8 +1321,8 @@ int __kprobes register_kprobe(struct kprobe *p)
        kprobe_opcode_t *addr;
 
        addr = kprobe_addr(p);
-       if (!addr)
-               return -EINVAL;
+       if (IS_ERR(addr))
+               return PTR_ERR(addr);
        p->addr = addr;
 
        ret = check_kprobe_rereg(p);
@@ -1335,6 +1345,8 @@ int __kprobes register_kprobe(struct kprobe *p)
         */
        probed_mod = __module_text_address((unsigned long) p->addr);
        if (probed_mod) {
+               /* Return -ENOENT if fail. */
+               ret = -ENOENT;
                /*
                 * We must hold a refcount of the probed module while updating
                 * its code to prohibit unexpected unloading.
@@ -1351,6 +1363,7 @@ int __kprobes register_kprobe(struct kprobe *p)
                        module_put(probed_mod);
                        goto fail_with_jump_label;
                }
+               /* ret will be updated by following code */
        }
        preempt_enable();
        jump_label_unlock();
@@ -1399,7 +1412,7 @@ out:
 fail_with_jump_label:
        preempt_enable();
        jump_label_unlock();
-       return -EINVAL;
+       return ret;
 }
 EXPORT_SYMBOL_GPL(register_kprobe);
 
@@ -1686,8 +1699,8 @@ int __kprobes register_kretprobe(struct kretprobe *rp)
 
        if (kretprobe_blacklist_size) {
                addr = kprobe_addr(&rp->kp);
-               if (!addr)
-                       return -EINVAL;
+               if (IS_ERR(addr))
+                       return PTR_ERR(addr);
 
                for (i = 0; kretprobe_blacklist[i].name != NULL; i++) {
                        if (kretprobe_blacklist[i].addr == addr)
index c518b05fd062d07238f77562a3416d15022ca535..84b9e076812eba9ede91b10efa1221e1b7f8a72c 100644 (file)
@@ -2220,7 +2220,7 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
 
        if (task_cpu(p) != new_cpu) {
                p->se.nr_migrations++;
-               perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS, 1, 1, NULL, 0);
+               perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS, 1, NULL, 0);
        }
 
        __set_task_cpu(p, new_cpu);
index eb212f8f8bc801dab1ee7622936ba8caaec4369f..d20c6983aad903636d920737dfd8e9fd0be5b4d4 100644 (file)
@@ -26,12 +26,18 @@ void print_stack_trace(struct stack_trace *trace, int spaces)
 EXPORT_SYMBOL_GPL(print_stack_trace);
 
 /*
- * Architectures that do not implement save_stack_trace_tsk get this
- * weak alias and a once-per-bootup warning (whenever this facility
- * is utilized - for example by procfs):
+ * Architectures that do not implement save_stack_trace_tsk or
+ * save_stack_trace_regs get this weak alias and a once-per-bootup warning
+ * (whenever this facility is utilized - for example by procfs):
  */
 __weak void
 save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
 {
        WARN_ONCE(1, KERN_INFO "save_stack_trace_tsk() not implemented yet.\n");
 }
+
+__weak void
+save_stack_trace_regs(struct pt_regs *regs, struct stack_trace *trace)
+{
+       WARN_ONCE(1, KERN_INFO "save_stack_trace_regs() not implemented yet.\n");
+}
index 908038f57440e5eec44233351578105ff8c64e08..c3e4575e7829e1e807652ff2bc86fd0220d1a8a8 100644 (file)
@@ -32,7 +32,6 @@
 
 #include <trace/events/sched.h>
 
-#include <asm/ftrace.h>
 #include <asm/setup.h>
 
 #include "trace_output.h"
@@ -82,14 +81,14 @@ static int ftrace_disabled __read_mostly;
 
 static DEFINE_MUTEX(ftrace_lock);
 
-static struct ftrace_ops ftrace_list_end __read_mostly =
-{
+static struct ftrace_ops ftrace_list_end __read_mostly = {
        .func           = ftrace_stub,
 };
 
 static struct ftrace_ops *ftrace_global_list __read_mostly = &ftrace_list_end;
 static struct ftrace_ops *ftrace_ops_list __read_mostly = &ftrace_list_end;
 ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub;
+static ftrace_func_t __ftrace_trace_function_delay __read_mostly = ftrace_stub;
 ftrace_func_t __ftrace_trace_function __read_mostly = ftrace_stub;
 ftrace_func_t ftrace_pid_function __read_mostly = ftrace_stub;
 static struct ftrace_ops global_ops;
@@ -148,9 +147,11 @@ void clear_ftrace_function(void)
 {
        ftrace_trace_function = ftrace_stub;
        __ftrace_trace_function = ftrace_stub;
+       __ftrace_trace_function_delay = ftrace_stub;
        ftrace_pid_function = ftrace_stub;
 }
 
+#undef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST
 #ifndef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST
 /*
  * For those archs that do not test ftrace_trace_stop in their
@@ -209,8 +210,13 @@ static void update_ftrace_function(void)
 
 #ifdef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST
        ftrace_trace_function = func;
+#else
+#ifdef CONFIG_DYNAMIC_FTRACE
+       /* do not update till all functions have been modified */
+       __ftrace_trace_function_delay = func;
 #else
        __ftrace_trace_function = func;
+#endif
        ftrace_trace_function = ftrace_test_stop_func;
 #endif
 }
@@ -785,8 +791,7 @@ static void unregister_ftrace_profiler(void)
        unregister_ftrace_graph();
 }
 #else
-static struct ftrace_ops ftrace_profile_ops __read_mostly =
-{
+static struct ftrace_ops ftrace_profile_ops __read_mostly = {
        .func           = function_profile_call,
 };
 
@@ -806,19 +811,10 @@ ftrace_profile_write(struct file *filp, const char __user *ubuf,
                     size_t cnt, loff_t *ppos)
 {
        unsigned long val;
-       char buf[64];           /* big enough to hold a number */
        int ret;
 
-       if (cnt >= sizeof(buf))
-               return -EINVAL;
-
-       if (copy_from_user(&buf, ubuf, cnt))
-               return -EFAULT;
-
-       buf[cnt] = 0;
-
-       ret = strict_strtoul(buf, 10, &val);
-       if (ret < 0)
+       ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
+       if (ret)
                return ret;
 
        val = !!val;
@@ -1182,8 +1178,14 @@ alloc_and_copy_ftrace_hash(int size_bits, struct ftrace_hash *hash)
        return NULL;
 }
 
+static void
+ftrace_hash_rec_disable(struct ftrace_ops *ops, int filter_hash);
+static void
+ftrace_hash_rec_enable(struct ftrace_ops *ops, int filter_hash);
+
 static int
-ftrace_hash_move(struct ftrace_hash **dst, struct ftrace_hash *src)
+ftrace_hash_move(struct ftrace_ops *ops, int enable,
+                struct ftrace_hash **dst, struct ftrace_hash *src)
 {
        struct ftrace_func_entry *entry;
        struct hlist_node *tp, *tn;
@@ -1193,8 +1195,15 @@ ftrace_hash_move(struct ftrace_hash **dst, struct ftrace_hash *src)
        unsigned long key;
        int size = src->count;
        int bits = 0;
+       int ret;
        int i;
 
+       /*
+        * Remove the current set, update the hash and add
+        * them back.
+        */
+       ftrace_hash_rec_disable(ops, enable);
+
        /*
         * If the new source is empty, just free dst and assign it
         * the empty_hash.
@@ -1215,9 +1224,10 @@ ftrace_hash_move(struct ftrace_hash **dst, struct ftrace_hash *src)
        if (bits > FTRACE_HASH_MAX_BITS)
                bits = FTRACE_HASH_MAX_BITS;
 
+       ret = -ENOMEM;
        new_hash = alloc_ftrace_hash(bits);
        if (!new_hash)
-               return -ENOMEM;
+               goto out;
 
        size = 1 << src->size_bits;
        for (i = 0; i < size; i++) {
@@ -1236,7 +1246,16 @@ ftrace_hash_move(struct ftrace_hash **dst, struct ftrace_hash *src)
        rcu_assign_pointer(*dst, new_hash);
        free_ftrace_hash_rcu(old_hash);
 
-       return 0;
+       ret = 0;
+ out:
+       /*
+        * Enable regardless of ret:
+        *  On success, we enable the new hash.
+        *  On failure, we re-enable the original hash.
+        */
+       ftrace_hash_rec_enable(ops, enable);
+
+       return ret;
 }
 
 /*
@@ -1596,6 +1615,12 @@ static int __ftrace_modify_code(void *data)
 {
        int *command = data;
 
+       /*
+        * Do not call function tracer while we update the code.
+        * We are in stop machine, no worrying about races.
+        */
+       function_trace_stop++;
+
        if (*command & FTRACE_ENABLE_CALLS)
                ftrace_replace_code(1);
        else if (*command & FTRACE_DISABLE_CALLS)
@@ -1609,6 +1634,18 @@ static int __ftrace_modify_code(void *data)
        else if (*command & FTRACE_STOP_FUNC_RET)
                ftrace_disable_ftrace_graph_caller();
 
+#ifndef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST
+       /*
+        * For archs that call ftrace_test_stop_func(), we must
+        * wait till after we update all the function callers
+        * before we update the callback. This keeps different
+        * ops that record different functions from corrupting
+        * each other.
+        */
+       __ftrace_trace_function = __ftrace_trace_function_delay;
+#endif
+       function_trace_stop--;
+
        return 0;
 }
 
@@ -1744,10 +1781,36 @@ static cycle_t          ftrace_update_time;
 static unsigned long   ftrace_update_cnt;
 unsigned long          ftrace_update_tot_cnt;
 
+static int ops_traces_mod(struct ftrace_ops *ops)
+{
+       struct ftrace_hash *hash;
+
+       hash = ops->filter_hash;
+       return !!(!hash || !hash->count);
+}
+
 static int ftrace_update_code(struct module *mod)
 {
        struct dyn_ftrace *p;
        cycle_t start, stop;
+       unsigned long ref = 0;
+
+       /*
+        * When adding a module, we need to check if tracers are
+        * currently enabled and if they are set to trace all functions.
+        * If they are, we need to enable the module functions as well
+        * as update the reference counts for those function records.
+        */
+       if (mod) {
+               struct ftrace_ops *ops;
+
+               for (ops = ftrace_ops_list;
+                    ops != &ftrace_list_end; ops = ops->next) {
+                       if (ops->flags & FTRACE_OPS_FL_ENABLED &&
+                           ops_traces_mod(ops))
+                               ref++;
+               }
+       }
 
        start = ftrace_now(raw_smp_processor_id());
        ftrace_update_cnt = 0;
@@ -1760,7 +1823,7 @@ static int ftrace_update_code(struct module *mod)
 
                p = ftrace_new_addrs;
                ftrace_new_addrs = p->newlist;
-               p->flags = 0L;
+               p->flags = ref;
 
                /*
                 * Do the initial record conversion from mcount jump
@@ -1783,7 +1846,7 @@ static int ftrace_update_code(struct module *mod)
                 * conversion puts the module to the correct state, thus
                 * passing the ftrace_make_call check.
                 */
-               if (ftrace_start_up) {
+               if (ftrace_start_up && ref) {
                        int failed = __ftrace_replace_code(p, 1);
                        if (failed) {
                                ftrace_bug(failed, p->ip);
@@ -2407,10 +2470,9 @@ ftrace_match_module_records(struct ftrace_hash *hash, char *buff, char *mod)
  */
 
 static int
-ftrace_mod_callback(char *func, char *cmd, char *param, int enable)
+ftrace_mod_callback(struct ftrace_hash *hash,
+                   char *func, char *cmd, char *param, int enable)
 {
-       struct ftrace_ops *ops = &global_ops;
-       struct ftrace_hash *hash;
        char *mod;
        int ret = -EINVAL;
 
@@ -2430,11 +2492,6 @@ ftrace_mod_callback(char *func, char *cmd, char *param, int enable)
        if (!strlen(mod))
                return ret;
 
-       if (enable)
-               hash = ops->filter_hash;
-       else
-               hash = ops->notrace_hash;
-
        ret = ftrace_match_module_records(hash, func, mod);
        if (!ret)
                ret = -EINVAL;
@@ -2760,7 +2817,7 @@ static int ftrace_process_regex(struct ftrace_hash *hash,
        mutex_lock(&ftrace_cmd_mutex);
        list_for_each_entry(p, &ftrace_commands, list) {
                if (strcmp(p->name, command) == 0) {
-                       ret = p->func(func, command, next, enable);
+                       ret = p->func(hash, func, command, next, enable);
                        goto out_unlock;
                }
        }
@@ -2857,7 +2914,11 @@ ftrace_set_regex(struct ftrace_ops *ops, unsigned char *buf, int len,
                ftrace_match_records(hash, buf, len);
 
        mutex_lock(&ftrace_lock);
-       ret = ftrace_hash_move(orig_hash, hash);
+       ret = ftrace_hash_move(ops, enable, orig_hash, hash);
+       if (!ret && ops->flags & FTRACE_OPS_FL_ENABLED
+           && ftrace_enabled)
+               ftrace_run_update_code(FTRACE_ENABLE_CALLS);
+
        mutex_unlock(&ftrace_lock);
 
        mutex_unlock(&ftrace_regex_lock);
@@ -3040,18 +3101,12 @@ ftrace_regex_release(struct inode *inode, struct file *file)
                        orig_hash = &iter->ops->notrace_hash;
 
                mutex_lock(&ftrace_lock);
-               /*
-                * Remove the current set, update the hash and add
-                * them back.
-                */
-               ftrace_hash_rec_disable(iter->ops, filter_hash);
-               ret = ftrace_hash_move(orig_hash, iter->hash);
-               if (!ret) {
-                       ftrace_hash_rec_enable(iter->ops, filter_hash);
-                       if (iter->ops->flags & FTRACE_OPS_FL_ENABLED
-                           && ftrace_enabled)
-                               ftrace_run_update_code(FTRACE_ENABLE_CALLS);
-               }
+               ret = ftrace_hash_move(iter->ops, filter_hash,
+                                      orig_hash, iter->hash);
+               if (!ret && (iter->ops->flags & FTRACE_OPS_FL_ENABLED)
+                   && ftrace_enabled)
+                       ftrace_run_update_code(FTRACE_ENABLE_CALLS);
+
                mutex_unlock(&ftrace_lock);
        }
        free_ftrace_hash(iter->hash);
@@ -3330,7 +3385,7 @@ static int ftrace_process_locs(struct module *mod,
 {
        unsigned long *p;
        unsigned long addr;
-       unsigned long flags;
+       unsigned long flags = 0; /* Shut up gcc */
 
        mutex_lock(&ftrace_lock);
        p = start;
@@ -3348,12 +3403,18 @@ static int ftrace_process_locs(struct module *mod,
        }
 
        /*
-        * Disable interrupts to prevent interrupts from executing
-        * code that is being modified.
+        * We only need to disable interrupts on start up
+        * because we are modifying code that an interrupt
+        * may execute, and the modification is not atomic.
+        * But for modules, nothing runs the code we modify
+        * until we are finished with it, and there's no
+        * reason to cause large interrupt latencies while we do it.
         */
-       local_irq_save(flags);
+       if (!mod)
+               local_irq_save(flags);
        ftrace_update_code(mod);
-       local_irq_restore(flags);
+       if (!mod)
+               local_irq_restore(flags);
        mutex_unlock(&ftrace_lock);
 
        return 0;
index b0c7aa4079431fa630e24bc58a4cce3364b70abb..731201bf4acc6af1efd7617bdc533ca1efaacabf 100644 (file)
@@ -997,15 +997,21 @@ static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
                             unsigned nr_pages)
 {
        struct buffer_page *bpage, *tmp;
-       unsigned long addr;
        LIST_HEAD(pages);
        unsigned i;
 
        WARN_ON(!nr_pages);
 
        for (i = 0; i < nr_pages; i++) {
+               struct page *page;
+               /*
+                * __GFP_NORETRY flag makes sure that the allocation fails
+                * gracefully without invoking oom-killer and the system is
+                * not destabilized.
+                */
                bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()),
-                                   GFP_KERNEL, cpu_to_node(cpu_buffer->cpu));
+                                   GFP_KERNEL | __GFP_NORETRY,
+                                   cpu_to_node(cpu_buffer->cpu));
                if (!bpage)
                        goto free_pages;
 
@@ -1013,10 +1019,11 @@ static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
 
                list_add(&bpage->list, &pages);
 
-               addr = __get_free_page(GFP_KERNEL);
-               if (!addr)
+               page = alloc_pages_node(cpu_to_node(cpu_buffer->cpu),
+                                       GFP_KERNEL | __GFP_NORETRY, 0);
+               if (!page)
                        goto free_pages;
-               bpage->page = (void *)addr;
+               bpage->page = page_address(page);
                rb_init_page(bpage->page);
        }
 
@@ -1045,7 +1052,7 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
 {
        struct ring_buffer_per_cpu *cpu_buffer;
        struct buffer_page *bpage;
-       unsigned long addr;
+       struct page *page;
        int ret;
 
        cpu_buffer = kzalloc_node(ALIGN(sizeof(*cpu_buffer), cache_line_size()),
@@ -1067,10 +1074,10 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
        rb_check_bpage(cpu_buffer, bpage);
 
        cpu_buffer->reader_page = bpage;
-       addr = __get_free_page(GFP_KERNEL);
-       if (!addr)
+       page = alloc_pages_node(cpu_to_node(cpu), GFP_KERNEL, 0);
+       if (!page)
                goto fail_free_reader;
-       bpage->page = (void *)addr;
+       bpage->page = page_address(page);
        rb_init_page(bpage->page);
 
        INIT_LIST_HEAD(&cpu_buffer->reader_page->list);
@@ -1314,7 +1321,6 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
        unsigned nr_pages, rm_pages, new_pages;
        struct buffer_page *bpage, *tmp;
        unsigned long buffer_size;
-       unsigned long addr;
        LIST_HEAD(pages);
        int i, cpu;
 
@@ -1375,16 +1381,24 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
 
        for_each_buffer_cpu(buffer, cpu) {
                for (i = 0; i < new_pages; i++) {
+                       struct page *page;
+                       /*
+                        * __GFP_NORETRY flag makes sure that the allocation
+                        * fails gracefully without invoking oom-killer and
+                        * the system is not destabilized.
+                        */
                        bpage = kzalloc_node(ALIGN(sizeof(*bpage),
                                                  cache_line_size()),
-                                           GFP_KERNEL, cpu_to_node(cpu));
+                                           GFP_KERNEL | __GFP_NORETRY,
+                                           cpu_to_node(cpu));
                        if (!bpage)
                                goto free_pages;
                        list_add(&bpage->list, &pages);
-                       addr = __get_free_page(GFP_KERNEL);
-                       if (!addr)
+                       page = alloc_pages_node(cpu_to_node(cpu),
+                                               GFP_KERNEL | __GFP_NORETRY, 0);
+                       if (!page)
                                goto free_pages;
-                       bpage->page = (void *)addr;
+                       bpage->page = page_address(page);
                        rb_init_page(bpage->page);
                }
        }
@@ -3730,16 +3744,17 @@ EXPORT_SYMBOL_GPL(ring_buffer_swap_cpu);
  * Returns:
  *  The page allocated, or NULL on error.
  */
-void *ring_buffer_alloc_read_page(struct ring_buffer *buffer)
+void *ring_buffer_alloc_read_page(struct ring_buffer *buffer, int cpu)
 {
        struct buffer_data_page *bpage;
-       unsigned long addr;
+       struct page *page;
 
-       addr = __get_free_page(GFP_KERNEL);
-       if (!addr)
+       page = alloc_pages_node(cpu_to_node(cpu),
+                               GFP_KERNEL | __GFP_NORETRY, 0);
+       if (!page)
                return NULL;
 
-       bpage = (void *)addr;
+       bpage = page_address(page);
 
        rb_init_page(bpage);
 
@@ -3978,20 +3993,11 @@ rb_simple_write(struct file *filp, const char __user *ubuf,
                size_t cnt, loff_t *ppos)
 {
        unsigned long *p = filp->private_data;
-       char buf[64];
        unsigned long val;
        int ret;
 
-       if (cnt >= sizeof(buf))
-               return -EINVAL;
-
-       if (copy_from_user(&buf, ubuf, cnt))
-               return -EFAULT;
-
-       buf[cnt] = 0;
-
-       ret = strict_strtoul(buf, 10, &val);
-       if (ret < 0)
+       ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
+       if (ret)
                return ret;
 
        if (val)
index 302f8a6146352a998f2730923598346196e84856..a5457d577b98313b1ca8b1670ad32a2140001498 100644 (file)
@@ -106,7 +106,7 @@ static enum event_status read_page(int cpu)
        int inc;
        int i;
 
-       bpage = ring_buffer_alloc_read_page(buffer);
+       bpage = ring_buffer_alloc_read_page(buffer, cpu);
        if (!bpage)
                return EVENT_DROPPED;
 
index ee9c921d7f21e4d0578e35032607e9bc542e219f..e5df02c69b1d6d0d29c64cd9e2165ddfc6d9a3f6 100644 (file)
@@ -343,26 +343,27 @@ unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |
 static int trace_stop_count;
 static DEFINE_SPINLOCK(tracing_start_lock);
 
+static void wakeup_work_handler(struct work_struct *work)
+{
+       wake_up(&trace_wait);
+}
+
+static DECLARE_DELAYED_WORK(wakeup_work, wakeup_work_handler);
+
 /**
  * trace_wake_up - wake up tasks waiting for trace input
  *
- * Simply wakes up any task that is blocked on the trace_wait
- * queue. These is used with trace_poll for tasks polling the trace.
+ * Schedules a delayed work to wake up any task that is blocked on the
+ * trace_wait queue. These is used with trace_poll for tasks polling the
+ * trace.
  */
 void trace_wake_up(void)
 {
-       int cpu;
+       const unsigned long delay = msecs_to_jiffies(2);
 
        if (trace_flags & TRACE_ITER_BLOCK)
                return;
-       /*
-        * The runqueue_is_locked() can fail, but this is the best we
-        * have for now:
-        */
-       cpu = get_cpu();
-       if (!runqueue_is_locked(cpu))
-               wake_up(&trace_wait);
-       put_cpu();
+       schedule_delayed_work(&wakeup_work, delay);
 }
 
 static int __init set_buf_size(char *str)
@@ -424,6 +425,7 @@ static const char *trace_options[] = {
        "graph-time",
        "record-cmd",
        "overwrite",
+       "disable_on_free",
        NULL
 };
 
@@ -1191,6 +1193,18 @@ void trace_nowake_buffer_unlock_commit(struct ring_buffer *buffer,
 }
 EXPORT_SYMBOL_GPL(trace_nowake_buffer_unlock_commit);
 
+void trace_nowake_buffer_unlock_commit_regs(struct ring_buffer *buffer,
+                                           struct ring_buffer_event *event,
+                                           unsigned long flags, int pc,
+                                           struct pt_regs *regs)
+{
+       ring_buffer_unlock_commit(buffer, event);
+
+       ftrace_trace_stack_regs(buffer, flags, 0, pc, regs);
+       ftrace_trace_userstack(buffer, flags, pc);
+}
+EXPORT_SYMBOL_GPL(trace_nowake_buffer_unlock_commit_regs);
+
 void trace_current_buffer_discard_commit(struct ring_buffer *buffer,
                                         struct ring_buffer_event *event)
 {
@@ -1234,30 +1248,103 @@ ftrace(struct trace_array *tr, struct trace_array_cpu *data,
 }
 
 #ifdef CONFIG_STACKTRACE
+
+#define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
+struct ftrace_stack {
+       unsigned long           calls[FTRACE_STACK_MAX_ENTRIES];
+};
+
+static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
+static DEFINE_PER_CPU(int, ftrace_stack_reserve);
+
 static void __ftrace_trace_stack(struct ring_buffer *buffer,
                                 unsigned long flags,
-                                int skip, int pc)
+                                int skip, int pc, struct pt_regs *regs)
 {
        struct ftrace_event_call *call = &event_kernel_stack;
        struct ring_buffer_event *event;
        struct stack_entry *entry;
        struct stack_trace trace;
+       int use_stack;
+       int size = FTRACE_STACK_ENTRIES;
+
+       trace.nr_entries        = 0;
+       trace.skip              = skip;
+
+       /*
+        * Since events can happen in NMIs there's no safe way to
+        * use the per cpu ftrace_stacks. We reserve it and if an interrupt
+        * or NMI comes in, it will just have to use the default
+        * FTRACE_STACK_SIZE.
+        */
+       preempt_disable_notrace();
+
+       use_stack = ++__get_cpu_var(ftrace_stack_reserve);
+       /*
+        * We don't need any atomic variables, just a barrier.
+        * If an interrupt comes in, we don't care, because it would
+        * have exited and put the counter back to what we want.
+        * We just need a barrier to keep gcc from moving things
+        * around.
+        */
+       barrier();
+       if (use_stack == 1) {
+               trace.entries           = &__get_cpu_var(ftrace_stack).calls[0];
+               trace.max_entries       = FTRACE_STACK_MAX_ENTRIES;
+
+               if (regs)
+                       save_stack_trace_regs(regs, &trace);
+               else
+                       save_stack_trace(&trace);
+
+               if (trace.nr_entries > size)
+                       size = trace.nr_entries;
+       } else
+               /* From now on, use_stack is a boolean */
+               use_stack = 0;
+
+       size *= sizeof(unsigned long);
 
        event = trace_buffer_lock_reserve(buffer, TRACE_STACK,
-                                         sizeof(*entry), flags, pc);
+                                         sizeof(*entry) + size, flags, pc);
        if (!event)
-               return;
-       entry   = ring_buffer_event_data(event);
-       memset(&entry->caller, 0, sizeof(entry->caller));
+               goto out;
+       entry = ring_buffer_event_data(event);
 
-       trace.nr_entries        = 0;
-       trace.max_entries       = FTRACE_STACK_ENTRIES;
-       trace.skip              = skip;
-       trace.entries           = entry->caller;
+       memset(&entry->caller, 0, size);
+
+       if (use_stack)
+               memcpy(&entry->caller, trace.entries,
+                      trace.nr_entries * sizeof(unsigned long));
+       else {
+               trace.max_entries       = FTRACE_STACK_ENTRIES;
+               trace.entries           = entry->caller;
+               if (regs)
+                       save_stack_trace_regs(regs, &trace);
+               else
+                       save_stack_trace(&trace);
+       }
+
+       entry->size = trace.nr_entries;
 
-       save_stack_trace(&trace);
        if (!filter_check_discard(call, entry, buffer, event))
                ring_buffer_unlock_commit(buffer, event);
+
+ out:
+       /* Again, don't let gcc optimize things here */
+       barrier();
+       __get_cpu_var(ftrace_stack_reserve)--;
+       preempt_enable_notrace();
+
+}
+
+void ftrace_trace_stack_regs(struct ring_buffer *buffer, unsigned long flags,
+                            int skip, int pc, struct pt_regs *regs)
+{
+       if (!(trace_flags & TRACE_ITER_STACKTRACE))
+               return;
+
+       __ftrace_trace_stack(buffer, flags, skip, pc, regs);
 }
 
 void ftrace_trace_stack(struct ring_buffer *buffer, unsigned long flags,
@@ -1266,13 +1353,13 @@ void ftrace_trace_stack(struct ring_buffer *buffer, unsigned long flags,
        if (!(trace_flags & TRACE_ITER_STACKTRACE))
                return;
 
-       __ftrace_trace_stack(buffer, flags, skip, pc);
+       __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
 }
 
 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
                   int pc)
 {
-       __ftrace_trace_stack(tr->buffer, flags, skip, pc);
+       __ftrace_trace_stack(tr->buffer, flags, skip, pc, NULL);
 }
 
 /**
@@ -1288,7 +1375,7 @@ void trace_dump_stack(void)
        local_save_flags(flags);
 
        /* skipping 3 traces, seems to get us at the caller of this function */
-       __ftrace_trace_stack(global_trace.buffer, flags, 3, preempt_count());
+       __ftrace_trace_stack(global_trace.buffer, flags, 3, preempt_count(), NULL);
 }
 
 static DEFINE_PER_CPU(int, user_stack_count);
@@ -1536,7 +1623,12 @@ peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
 
        ftrace_enable_cpu();
 
-       return event ? ring_buffer_event_data(event) : NULL;
+       if (event) {
+               iter->ent_size = ring_buffer_event_length(event);
+               return ring_buffer_event_data(event);
+       }
+       iter->ent_size = 0;
+       return NULL;
 }
 
 static struct trace_entry *
@@ -2051,6 +2143,9 @@ void trace_default_header(struct seq_file *m)
 {
        struct trace_iterator *iter = m->private;
 
+       if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
+               return;
+
        if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
                /* print nothing if the buffers are empty */
                if (trace_empty(iter))
@@ -2701,20 +2796,11 @@ tracing_ctrl_write(struct file *filp, const char __user *ubuf,
                   size_t cnt, loff_t *ppos)
 {
        struct trace_array *tr = filp->private_data;
-       char buf[64];
        unsigned long val;
        int ret;
 
-       if (cnt >= sizeof(buf))
-               return -EINVAL;
-
-       if (copy_from_user(&buf, ubuf, cnt))
-               return -EFAULT;
-
-       buf[cnt] = 0;
-
-       ret = strict_strtoul(buf, 10, &val);
-       if (ret < 0)
+       ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
+       if (ret)
                return ret;
 
        val = !!val;
@@ -2767,7 +2853,7 @@ int tracer_init(struct tracer *t, struct trace_array *tr)
        return t->init(tr);
 }
 
-static int tracing_resize_ring_buffer(unsigned long size)
+static int __tracing_resize_ring_buffer(unsigned long size)
 {
        int ret;
 
@@ -2819,6 +2905,41 @@ static int tracing_resize_ring_buffer(unsigned long size)
        return ret;
 }
 
+static ssize_t tracing_resize_ring_buffer(unsigned long size)
+{
+       int cpu, ret = size;
+
+       mutex_lock(&trace_types_lock);
+
+       tracing_stop();
+
+       /* disable all cpu buffers */
+       for_each_tracing_cpu(cpu) {
+               if (global_trace.data[cpu])
+                       atomic_inc(&global_trace.data[cpu]->disabled);
+               if (max_tr.data[cpu])
+                       atomic_inc(&max_tr.data[cpu]->disabled);
+       }
+
+       if (size != global_trace.entries)
+               ret = __tracing_resize_ring_buffer(size);
+
+       if (ret < 0)
+               ret = -ENOMEM;
+
+       for_each_tracing_cpu(cpu) {
+               if (global_trace.data[cpu])
+                       atomic_dec(&global_trace.data[cpu]->disabled);
+               if (max_tr.data[cpu])
+                       atomic_dec(&max_tr.data[cpu]->disabled);
+       }
+
+       tracing_start();
+       mutex_unlock(&trace_types_lock);
+
+       return ret;
+}
+
 
 /**
  * tracing_update_buffers - used by tracing facility to expand ring buffers
@@ -2836,7 +2957,7 @@ int tracing_update_buffers(void)
 
        mutex_lock(&trace_types_lock);
        if (!ring_buffer_expanded)
-               ret = tracing_resize_ring_buffer(trace_buf_size);
+               ret = __tracing_resize_ring_buffer(trace_buf_size);
        mutex_unlock(&trace_types_lock);
 
        return ret;
@@ -2860,7 +2981,7 @@ static int tracing_set_tracer(const char *buf)
        mutex_lock(&trace_types_lock);
 
        if (!ring_buffer_expanded) {
-               ret = tracing_resize_ring_buffer(trace_buf_size);
+               ret = __tracing_resize_ring_buffer(trace_buf_size);
                if (ret < 0)
                        goto out;
                ret = 0;
@@ -2966,20 +3087,11 @@ tracing_max_lat_write(struct file *filp, const char __user *ubuf,
                      size_t cnt, loff_t *ppos)
 {
        unsigned long *ptr = filp->private_data;
-       char buf[64];
        unsigned long val;
        int ret;
 
-       if (cnt >= sizeof(buf))
-               return -EINVAL;
-
-       if (copy_from_user(&buf, ubuf, cnt))
-               return -EFAULT;
-
-       buf[cnt] = 0;
-
-       ret = strict_strtoul(buf, 10, &val);
-       if (ret < 0)
+       ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
+       if (ret)
                return ret;
 
        *ptr = val * 1000;
@@ -3434,67 +3546,54 @@ tracing_entries_write(struct file *filp, const char __user *ubuf,
                      size_t cnt, loff_t *ppos)
 {
        unsigned long val;
-       char buf[64];
-       int ret, cpu;
-
-       if (cnt >= sizeof(buf))
-               return -EINVAL;
-
-       if (copy_from_user(&buf, ubuf, cnt))
-               return -EFAULT;
-
-       buf[cnt] = 0;
+       int ret;
 
-       ret = strict_strtoul(buf, 10, &val);
-       if (ret < 0)
+       ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
+       if (ret)
                return ret;
 
        /* must have at least 1 entry */
        if (!val)
                return -EINVAL;
 
-       mutex_lock(&trace_types_lock);
-
-       tracing_stop();
-
-       /* disable all cpu buffers */
-       for_each_tracing_cpu(cpu) {
-               if (global_trace.data[cpu])
-                       atomic_inc(&global_trace.data[cpu]->disabled);
-               if (max_tr.data[cpu])
-                       atomic_inc(&max_tr.data[cpu]->disabled);
-       }
-
        /* value is in KB */
        val <<= 10;
 
-       if (val != global_trace.entries) {
-               ret = tracing_resize_ring_buffer(val);
-               if (ret < 0) {
-                       cnt = ret;
-                       goto out;
-               }
-       }
+       ret = tracing_resize_ring_buffer(val);
+       if (ret < 0)
+               return ret;
 
        *ppos += cnt;
 
-       /* If check pages failed, return ENOMEM */
-       if (tracing_disabled)
-               cnt = -ENOMEM;
- out:
-       for_each_tracing_cpu(cpu) {
-               if (global_trace.data[cpu])
-                       atomic_dec(&global_trace.data[cpu]->disabled);
-               if (max_tr.data[cpu])
-                       atomic_dec(&max_tr.data[cpu]->disabled);
-       }
+       return cnt;
+}
 
-       tracing_start();
-       mutex_unlock(&trace_types_lock);
+static ssize_t
+tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
+                         size_t cnt, loff_t *ppos)
+{
+       /*
+        * There is no need to read what the user has written, this function
+        * is just to make sure that there is no error when "echo" is used
+        */
+
+       *ppos += cnt;
 
        return cnt;
 }
 
+static int
+tracing_free_buffer_release(struct inode *inode, struct file *filp)
+{
+       /* disable tracing ? */
+       if (trace_flags & TRACE_ITER_STOP_ON_FREE)
+               tracing_off();
+       /* resize the ring buffer to 0 */
+       tracing_resize_ring_buffer(0);
+
+       return 0;
+}
+
 static int mark_printk(const char *fmt, ...)
 {
        int ret;
@@ -3640,6 +3739,11 @@ static const struct file_operations tracing_entries_fops = {
        .llseek         = generic_file_llseek,
 };
 
+static const struct file_operations tracing_free_buffer_fops = {
+       .write          = tracing_free_buffer_write,
+       .release        = tracing_free_buffer_release,
+};
+
 static const struct file_operations tracing_mark_fops = {
        .open           = tracing_open_generic,
        .write          = tracing_mark_write,
@@ -3696,7 +3800,7 @@ tracing_buffers_read(struct file *filp, char __user *ubuf,
                return 0;
 
        if (!info->spare)
-               info->spare = ring_buffer_alloc_read_page(info->tr->buffer);
+               info->spare = ring_buffer_alloc_read_page(info->tr->buffer, info->cpu);
        if (!info->spare)
                return -ENOMEM;
 
@@ -3853,7 +3957,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
 
                ref->ref = 1;
                ref->buffer = info->tr->buffer;
-               ref->page = ring_buffer_alloc_read_page(ref->buffer);
+               ref->page = ring_buffer_alloc_read_page(ref->buffer, info->cpu);
                if (!ref->page) {
                        kfree(ref);
                        break;
@@ -3862,8 +3966,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
                r = ring_buffer_read_page(ref->buffer, &ref->page,
                                          len, info->cpu, 1);
                if (r < 0) {
-                       ring_buffer_free_read_page(ref->buffer,
-                                                  ref->page);
+                       ring_buffer_free_read_page(ref->buffer, ref->page);
                        kfree(ref);
                        break;
                }
@@ -4099,19 +4202,10 @@ trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
 {
        struct trace_option_dentry *topt = filp->private_data;
        unsigned long val;
-       char buf[64];
        int ret;
 
-       if (cnt >= sizeof(buf))
-               return -EINVAL;
-
-       if (copy_from_user(&buf, ubuf, cnt))
-               return -EFAULT;
-
-       buf[cnt] = 0;
-
-       ret = strict_strtoul(buf, 10, &val);
-       if (ret < 0)
+       ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
+       if (ret)
                return ret;
 
        if (val != 0 && val != 1)
@@ -4159,20 +4253,11 @@ trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
                         loff_t *ppos)
 {
        long index = (long)filp->private_data;
-       char buf[64];
        unsigned long val;
        int ret;
 
-       if (cnt >= sizeof(buf))
-               return -EINVAL;
-
-       if (copy_from_user(&buf, ubuf, cnt))
-               return -EFAULT;
-
-       buf[cnt] = 0;
-
-       ret = strict_strtoul(buf, 10, &val);
-       if (ret < 0)
+       ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
+       if (ret)
                return ret;
 
        if (val != 0 && val != 1)
@@ -4365,6 +4450,9 @@ static __init int tracer_init_debugfs(void)
        trace_create_file("buffer_size_kb", 0644, d_tracer,
                        &global_trace, &tracing_entries_fops);
 
+       trace_create_file("free_buffer", 0644, d_tracer,
+                       &global_trace, &tracing_free_buffer_fops);
+
        trace_create_file("trace_marker", 0220, d_tracer,
                        NULL, &tracing_mark_fops);
 
index 229f8591f61db792780fb7579a1061458a663b4c..3f381d0b20a823a3abe67bdda1073e60fd0b4360 100644 (file)
@@ -278,6 +278,29 @@ struct tracer {
 };
 
 
+/* Only current can touch trace_recursion */
+#define trace_recursion_inc() do { (current)->trace_recursion++; } while (0)
+#define trace_recursion_dec() do { (current)->trace_recursion--; } while (0)
+
+/* Ring buffer has the 10 LSB bits to count */
+#define trace_recursion_buffer() ((current)->trace_recursion & 0x3ff)
+
+/* for function tracing recursion */
+#define TRACE_INTERNAL_BIT             (1<<11)
+#define TRACE_GLOBAL_BIT               (1<<12)
+/*
+ * Abuse of the trace_recursion.
+ * As we need a way to maintain state if we are tracing the function
+ * graph in irq because we want to trace a particular function that
+ * was called in irq context but we have irq tracing off. Since this
+ * can only be modified by current, we can reuse trace_recursion.
+ */
+#define TRACE_IRQ_BIT                  (1<<13)
+
+#define trace_recursion_set(bit)       do { (current)->trace_recursion |= (bit); } while (0)
+#define trace_recursion_clear(bit)     do { (current)->trace_recursion &= ~(bit); } while (0)
+#define trace_recursion_test(bit)      ((current)->trace_recursion & (bit))
+
 #define TRACE_PIPE_ALL_CPU     -1
 
 int tracer_init(struct tracer *t, struct trace_array *tr);
@@ -389,6 +412,9 @@ void update_max_tr_single(struct trace_array *tr,
 void ftrace_trace_stack(struct ring_buffer *buffer, unsigned long flags,
                        int skip, int pc);
 
+void ftrace_trace_stack_regs(struct ring_buffer *buffer, unsigned long flags,
+                            int skip, int pc, struct pt_regs *regs);
+
 void ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags,
                            int pc);
 
@@ -400,6 +426,12 @@ static inline void ftrace_trace_stack(struct ring_buffer *buffer,
 {
 }
 
+static inline void ftrace_trace_stack_regs(struct ring_buffer *buffer,
+                                          unsigned long flags, int skip,
+                                          int pc, struct pt_regs *regs)
+{
+}
+
 static inline void ftrace_trace_userstack(struct ring_buffer *buffer,
                                          unsigned long flags, int pc)
 {
@@ -507,8 +539,18 @@ static inline int ftrace_graph_addr(unsigned long addr)
                return 1;
 
        for (i = 0; i < ftrace_graph_count; i++) {
-               if (addr == ftrace_graph_funcs[i])
+               if (addr == ftrace_graph_funcs[i]) {
+                       /*
+                        * If no irqs are to be traced, but a set_graph_function
+                        * is set, and called by an interrupt handler, we still
+                        * want to trace it.
+                        */
+                       if (in_irq())
+                               trace_recursion_set(TRACE_IRQ_BIT);
+                       else
+                               trace_recursion_clear(TRACE_IRQ_BIT);
                        return 1;
+               }
        }
 
        return 0;
@@ -609,6 +651,7 @@ enum trace_iterator_flags {
        TRACE_ITER_GRAPH_TIME           = 0x80000,
        TRACE_ITER_RECORD_CMD           = 0x100000,
        TRACE_ITER_OVERWRITE            = 0x200000,
+       TRACE_ITER_STOP_ON_FREE         = 0x400000,
 };
 
 /*
@@ -677,6 +720,7 @@ struct event_subsystem {
        struct dentry           *entry;
        struct event_filter     *filter;
        int                     nr_events;
+       int                     ref_count;
 };
 
 #define FILTER_PRED_INVALID    ((unsigned short)-1)
@@ -784,19 +828,4 @@ extern const char *__stop___trace_bprintk_fmt[];
        FTRACE_ENTRY(call, struct_name, id, PARAMS(tstruct), PARAMS(print))
 #include "trace_entries.h"
 
-/* Only current can touch trace_recursion */
-#define trace_recursion_inc() do { (current)->trace_recursion++; } while (0)
-#define trace_recursion_dec() do { (current)->trace_recursion--; } while (0)
-
-/* Ring buffer has the 10 LSB bits to count */
-#define trace_recursion_buffer() ((current)->trace_recursion & 0x3ff)
-
-/* for function tracing recursion */
-#define TRACE_INTERNAL_BIT             (1<<11)
-#define TRACE_GLOBAL_BIT               (1<<12)
-
-#define trace_recursion_set(bit)       do { (current)->trace_recursion |= (bit); } while (0)
-#define trace_recursion_clear(bit)     do { (current)->trace_recursion &= ~(bit); } while (0)
-#define trace_recursion_test(bit)      ((current)->trace_recursion & (bit))
-
 #endif /* _LINUX_KERNEL_TRACE_H */
index e32744c84d9497bd041a2e612d82f1f859406af9..93365907f219e71c446c0cf7d3a742d6eb83880a 100644 (file)
@@ -161,7 +161,8 @@ FTRACE_ENTRY(kernel_stack, stack_entry,
        TRACE_STACK,
 
        F_STRUCT(
-               __array(        unsigned long,  caller, FTRACE_STACK_ENTRIES    )
+               __field(        int,            size    )
+               __dynamic_array(unsigned long,  caller  )
        ),
 
        F_printk("\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n"
index 686ec399f2a83a007ddacfdbd07fd7cced24783f..581876f9f3872e9103a0110893396652599a1d6a 100644 (file)
@@ -244,6 +244,35 @@ static void ftrace_clear_events(void)
        mutex_unlock(&event_mutex);
 }
 
+static void __put_system(struct event_subsystem *system)
+{
+       struct event_filter *filter = system->filter;
+
+       WARN_ON_ONCE(system->ref_count == 0);
+       if (--system->ref_count)
+               return;
+
+       if (filter) {
+               kfree(filter->filter_string);
+               kfree(filter);
+       }
+       kfree(system->name);
+       kfree(system);
+}
+
+static void __get_system(struct event_subsystem *system)
+{
+       WARN_ON_ONCE(system->ref_count == 0);
+       system->ref_count++;
+}
+
+static void put_system(struct event_subsystem *system)
+{
+       mutex_lock(&event_mutex);
+       __put_system(system);
+       mutex_unlock(&event_mutex);
+}
+
 /*
  * __ftrace_set_clr_event(NULL, NULL, NULL, set) will set/unset all events.
  */
@@ -486,20 +515,11 @@ event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
                   loff_t *ppos)
 {
        struct ftrace_event_call *call = filp->private_data;
-       char buf[64];
        unsigned long val;
        int ret;
 
-       if (cnt >= sizeof(buf))
-               return -EINVAL;
-
-       if (copy_from_user(&buf, ubuf, cnt))
-               return -EFAULT;
-
-       buf[cnt] = 0;
-
-       ret = strict_strtoul(buf, 10, &val);
-       if (ret < 0)
+       ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
+       if (ret)
                return ret;
 
        ret = tracing_update_buffers();
@@ -528,7 +548,7 @@ system_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
                   loff_t *ppos)
 {
        const char set_to_char[4] = { '?', '0', '1', 'X' };
-       const char *system = filp->private_data;
+       struct event_subsystem *system = filp->private_data;
        struct ftrace_event_call *call;
        char buf[2];
        int set = 0;
@@ -539,7 +559,7 @@ system_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
                if (!call->name || !call->class || !call->class->reg)
                        continue;
 
-               if (system && strcmp(call->class->system, system) != 0)
+               if (system && strcmp(call->class->system, system->name) != 0)
                        continue;
 
                /*
@@ -569,21 +589,13 @@ static ssize_t
 system_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
                    loff_t *ppos)
 {
-       const char *system = filp->private_data;
+       struct event_subsystem *system = filp->private_data;
+       const char *name = NULL;
        unsigned long val;
-       char buf[64];
        ssize_t ret;
 
-       if (cnt >= sizeof(buf))
-               return -EINVAL;
-
-       if (copy_from_user(&buf, ubuf, cnt))
-               return -EFAULT;
-
-       buf[cnt] = 0;
-
-       ret = strict_strtoul(buf, 10, &val);
-       if (ret < 0)
+       ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
+       if (ret)
                return ret;
 
        ret = tracing_update_buffers();
@@ -593,7 +605,14 @@ system_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
        if (val != 0 && val != 1)
                return -EINVAL;
 
-       ret = __ftrace_set_clr_event(NULL, system, NULL, val);
+       /*
+        * Opening of "enable" adds a ref count to system,
+        * so the name is safe to use.
+        */
+       if (system)
+               name = system->name;
+
+       ret = __ftrace_set_clr_event(NULL, name, NULL, val);
        if (ret)
                goto out;
 
@@ -826,6 +845,52 @@ event_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,
        return cnt;
 }
 
+static LIST_HEAD(event_subsystems);
+
+static int subsystem_open(struct inode *inode, struct file *filp)
+{
+       struct event_subsystem *system = NULL;
+       int ret;
+
+       if (!inode->i_private)
+               goto skip_search;
+
+       /* Make sure the system still exists */
+       mutex_lock(&event_mutex);
+       list_for_each_entry(system, &event_subsystems, list) {
+               if (system == inode->i_private) {
+                       /* Don't open systems with no events */
+                       if (!system->nr_events) {
+                               system = NULL;
+                               break;
+                       }
+                       __get_system(system);
+                       break;
+               }
+       }
+       mutex_unlock(&event_mutex);
+
+       if (system != inode->i_private)
+               return -ENODEV;
+
+ skip_search:
+       ret = tracing_open_generic(inode, filp);
+       if (ret < 0 && system)
+               put_system(system);
+
+       return ret;
+}
+
+static int subsystem_release(struct inode *inode, struct file *file)
+{
+       struct event_subsystem *system = inode->i_private;
+
+       if (system)
+               put_system(system);
+
+       return 0;
+}
+
 static ssize_t
 subsystem_filter_read(struct file *filp, char __user *ubuf, size_t cnt,
                      loff_t *ppos)
@@ -963,17 +1028,19 @@ static const struct file_operations ftrace_event_filter_fops = {
 };
 
 static const struct file_operations ftrace_subsystem_filter_fops = {
-       .open = tracing_open_generic,
+       .open = subsystem_open,
        .read = subsystem_filter_read,
        .write = subsystem_filter_write,
        .llseek = default_llseek,
+       .release = subsystem_release,
 };
 
 static const struct file_operations ftrace_system_enable_fops = {
-       .open = tracing_open_generic,
+       .open = subsystem_open,
        .read = system_enable_read,
        .write = system_enable_write,
        .llseek = default_llseek,
+       .release = subsystem_release,
 };
 
 static const struct file_operations ftrace_show_header_fops = {
@@ -1002,8 +1069,6 @@ static struct dentry *event_trace_events_dir(void)
        return d_events;
 }
 
-static LIST_HEAD(event_subsystems);
-
 static struct dentry *
 event_subsystem_dir(const char *name, struct dentry *d_events)
 {
@@ -1013,6 +1078,7 @@ event_subsystem_dir(const char *name, struct dentry *d_events)
        /* First see if we did not already create this dir */
        list_for_each_entry(system, &event_subsystems, list) {
                if (strcmp(system->name, name) == 0) {
+                       __get_system(system);
                        system->nr_events++;
                        return system->entry;
                }
@@ -1035,6 +1101,7 @@ event_subsystem_dir(const char *name, struct dentry *d_events)
        }
 
        system->nr_events = 1;
+       system->ref_count = 1;
        system->name = kstrdup(name, GFP_KERNEL);
        if (!system->name) {
                debugfs_remove(system->entry);
@@ -1062,8 +1129,7 @@ event_subsystem_dir(const char *name, struct dentry *d_events)
                           "'%s/filter' entry\n", name);
        }
 
-       trace_create_file("enable", 0644, system->entry,
-                         (void *)system->name,
+       trace_create_file("enable", 0644, system->entry, system,
                          &ftrace_system_enable_fops);
 
        return system->entry;
@@ -1184,16 +1250,9 @@ static void remove_subsystem_dir(const char *name)
        list_for_each_entry(system, &event_subsystems, list) {
                if (strcmp(system->name, name) == 0) {
                        if (!--system->nr_events) {
-                               struct event_filter *filter = system->filter;
-
                                debugfs_remove_recursive(system->entry);
                                list_del(&system->list);
-                               if (filter) {
-                                       kfree(filter->filter_string);
-                                       kfree(filter);
-                               }
-                               kfree(system->name);
-                               kfree(system);
+                               __put_system(system);
                        }
                        break;
                }
index 8008ddcfbf20add080624f6714a5b38424b3789e..256764ecccd66fca5045cf0a53bb9d04d8e7f106 100644 (file)
@@ -1886,6 +1886,12 @@ int apply_subsystem_event_filter(struct event_subsystem *system,
 
        mutex_lock(&event_mutex);
 
+       /* Make sure the system still has events */
+       if (!system->nr_events) {
+               err = -ENODEV;
+               goto out_unlock;
+       }
+
        if (!strcmp(strstrip(filter_string), "0")) {
                filter_free_subsystem_preds(system);
                remove_filter_string(system->filter);
index 8d0e1cc4e9747281989e2b8f4f19f5fba5a8194c..c7b0c6a7db0986b2a54e1b892367bc7be2ffb058 100644 (file)
@@ -324,7 +324,8 @@ ftrace_trace_onoff_unreg(char *glob, char *cmd, char *param)
 }
 
 static int
-ftrace_trace_onoff_callback(char *glob, char *cmd, char *param, int enable)
+ftrace_trace_onoff_callback(struct ftrace_hash *hash,
+                           char *glob, char *cmd, char *param, int enable)
 {
        struct ftrace_probe_ops *ops;
        void *count = (void *)-1;
index 962cdb24ed817b380a4ea753e4f7f501b937b4cb..a7d2a4c653d8d893f51652c8cd599bf780359f45 100644 (file)
@@ -74,6 +74,20 @@ static struct tracer_flags tracer_flags = {
 
 static struct trace_array *graph_array;
 
+/*
+ * DURATION column is being also used to display IRQ signs,
+ * following values are used by print_graph_irq and others
+ * to fill in space into DURATION column.
+ */
+enum {
+       DURATION_FILL_FULL  = -1,
+       DURATION_FILL_START = -2,
+       DURATION_FILL_END   = -3,
+};
+
+static enum print_line_t
+print_graph_duration(unsigned long long duration, struct trace_seq *s,
+                    u32 flags);
 
 /* Add a function return address to the trace stack on thread info.*/
 int
@@ -213,7 +227,7 @@ int __trace_graph_entry(struct trace_array *tr,
 
 static inline int ftrace_graph_ignore_irqs(void)
 {
-       if (!ftrace_graph_skip_irqs)
+       if (!ftrace_graph_skip_irqs || trace_recursion_test(TRACE_IRQ_BIT))
                return 0;
 
        return in_irq();
@@ -577,32 +591,6 @@ get_return_for_leaf(struct trace_iterator *iter,
        return next;
 }
 
-/* Signal a overhead of time execution to the output */
-static int
-print_graph_overhead(unsigned long long duration, struct trace_seq *s,
-                    u32 flags)
-{
-       /* If duration disappear, we don't need anything */
-       if (!(flags & TRACE_GRAPH_PRINT_DURATION))
-               return 1;
-
-       /* Non nested entry or return */
-       if (duration == -1)
-               return trace_seq_printf(s, "  ");
-
-       if (flags & TRACE_GRAPH_PRINT_OVERHEAD) {
-               /* Duration exceeded 100 msecs */
-               if (duration > 100000ULL)
-                       return trace_seq_printf(s, "! ");
-
-               /* Duration exceeded 10 msecs */
-               if (duration > 10000ULL)
-                       return trace_seq_printf(s, "+ ");
-       }
-
-       return trace_seq_printf(s, "  ");
-}
-
 static int print_graph_abs_time(u64 t, struct trace_seq *s)
 {
        unsigned long usecs_rem;
@@ -625,34 +613,36 @@ print_graph_irq(struct trace_iterator *iter, unsigned long addr,
                addr >= (unsigned long)__irqentry_text_end)
                return TRACE_TYPE_UNHANDLED;
 
-       /* Absolute time */
-       if (flags & TRACE_GRAPH_PRINT_ABS_TIME) {
-               ret = print_graph_abs_time(iter->ts, s);
-               if (!ret)
-                       return TRACE_TYPE_PARTIAL_LINE;
-       }
+       if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
+               /* Absolute time */
+               if (flags & TRACE_GRAPH_PRINT_ABS_TIME) {
+                       ret = print_graph_abs_time(iter->ts, s);
+                       if (!ret)
+                               return TRACE_TYPE_PARTIAL_LINE;
+               }
 
-       /* Cpu */
-       if (flags & TRACE_GRAPH_PRINT_CPU) {
-               ret = print_graph_cpu(s, cpu);
-               if (ret == TRACE_TYPE_PARTIAL_LINE)
-                       return TRACE_TYPE_PARTIAL_LINE;
-       }
+               /* Cpu */
+               if (flags & TRACE_GRAPH_PRINT_CPU) {
+                       ret = print_graph_cpu(s, cpu);
+                       if (ret == TRACE_TYPE_PARTIAL_LINE)
+                               return TRACE_TYPE_PARTIAL_LINE;
+               }
 
-       /* Proc */
-       if (flags & TRACE_GRAPH_PRINT_PROC) {
-               ret = print_graph_proc(s, pid);
-               if (ret == TRACE_TYPE_PARTIAL_LINE)
-                       return TRACE_TYPE_PARTIAL_LINE;
-               ret = trace_seq_printf(s, " | ");
-               if (!ret)
-                       return TRACE_TYPE_PARTIAL_LINE;
+               /* Proc */
+               if (flags & TRACE_GRAPH_PRINT_PROC) {
+                       ret = print_graph_proc(s, pid);
+                       if (ret == TRACE_TYPE_PARTIAL_LINE)
+                               return TRACE_TYPE_PARTIAL_LINE;
+                       ret = trace_seq_printf(s, " | ");
+                       if (!ret)
+                               return TRACE_TYPE_PARTIAL_LINE;
+               }
        }
 
        /* No overhead */
-       ret = print_graph_overhead(-1, s, flags);
-       if (!ret)
-               return TRACE_TYPE_PARTIAL_LINE;
+       ret = print_graph_duration(DURATION_FILL_START, s, flags);
+       if (ret != TRACE_TYPE_HANDLED)
+               return ret;
 
        if (type == TRACE_GRAPH_ENT)
                ret = trace_seq_printf(s, "==========>");
@@ -662,9 +652,10 @@ print_graph_irq(struct trace_iterator *iter, unsigned long addr,
        if (!ret)
                return TRACE_TYPE_PARTIAL_LINE;
 
-       /* Don't close the duration column if haven't one */
-       if (flags & TRACE_GRAPH_PRINT_DURATION)
-               trace_seq_printf(s, " |");
+       ret = print_graph_duration(DURATION_FILL_END, s, flags);
+       if (ret != TRACE_TYPE_HANDLED)
+               return ret;
+
        ret = trace_seq_printf(s, "\n");
 
        if (!ret)
@@ -716,9 +707,49 @@ trace_print_graph_duration(unsigned long long duration, struct trace_seq *s)
 }
 
 static enum print_line_t
-print_graph_duration(unsigned long long duration, struct trace_seq *s)
+print_graph_duration(unsigned long long duration, struct trace_seq *s,
+                    u32 flags)
 {
-       int ret;
+       int ret = -1;
+
+       if (!(flags & TRACE_GRAPH_PRINT_DURATION) ||
+           !(trace_flags & TRACE_ITER_CONTEXT_INFO))
+                       return TRACE_TYPE_HANDLED;
+
+       /* No real adata, just filling the column with spaces */
+       switch (duration) {
+       case DURATION_FILL_FULL:
+               ret = trace_seq_printf(s, "              |  ");
+               return ret ? TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE;
+       case DURATION_FILL_START:
+               ret = trace_seq_printf(s, "  ");
+               return ret ? TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE;
+       case DURATION_FILL_END:
+               ret = trace_seq_printf(s, " |");
+               return ret ? TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE;
+       }
+
+       /* Signal a overhead of time execution to the output */
+       if (flags & TRACE_GRAPH_PRINT_OVERHEAD) {
+               /* Duration exceeded 100 msecs */
+               if (duration > 100000ULL)
+                       ret = trace_seq_printf(s, "! ");
+               /* Duration exceeded 10 msecs */
+               else if (duration > 10000ULL)
+                       ret = trace_seq_printf(s, "+ ");
+       }
+
+       /*
+        * The -1 means we either did not exceed the duration tresholds
+        * or we dont want to print out the overhead. Either way we need
+        * to fill out the space.
+        */
+       if (ret == -1)
+               ret = trace_seq_printf(s, "  ");
+
+       /* Catching here any failure happenned above */
+       if (!ret)
+               return TRACE_TYPE_PARTIAL_LINE;
 
        ret = trace_print_graph_duration(duration, s);
        if (ret != TRACE_TYPE_HANDLED)
@@ -767,18 +798,11 @@ print_graph_entry_leaf(struct trace_iterator *iter,
                        cpu_data->enter_funcs[call->depth] = 0;
        }
 
-       /* Overhead */
-       ret = print_graph_overhead(duration, s, flags);
-       if (!ret)
+       /* Overhead and duration */
+       ret = print_graph_duration(duration, s, flags);
+       if (ret == TRACE_TYPE_PARTIAL_LINE)
                return TRACE_TYPE_PARTIAL_LINE;
 
-       /* Duration */
-       if (flags & TRACE_GRAPH_PRINT_DURATION) {
-               ret = print_graph_duration(duration, s);
-               if (ret == TRACE_TYPE_PARTIAL_LINE)
-                       return TRACE_TYPE_PARTIAL_LINE;
-       }
-
        /* Function */
        for (i = 0; i < call->depth * TRACE_GRAPH_INDENT; i++) {
                ret = trace_seq_printf(s, " ");
@@ -815,17 +839,10 @@ print_graph_entry_nested(struct trace_iterator *iter,
                        cpu_data->enter_funcs[call->depth] = call->func;
        }
 
-       /* No overhead */
-       ret = print_graph_overhead(-1, s, flags);
-       if (!ret)
-               return TRACE_TYPE_PARTIAL_LINE;
-
        /* No time */
-       if (flags & TRACE_GRAPH_PRINT_DURATION) {
-               ret = trace_seq_printf(s, "            |  ");
-               if (!ret)
-                       return TRACE_TYPE_PARTIAL_LINE;
-       }
+       ret = print_graph_duration(DURATION_FILL_FULL, s, flags);
+       if (ret != TRACE_TYPE_HANDLED)
+               return ret;
 
        /* Function */
        for (i = 0; i < call->depth * TRACE_GRAPH_INDENT; i++) {
@@ -865,6 +882,9 @@ print_graph_prologue(struct trace_iterator *iter, struct trace_seq *s,
                        return TRACE_TYPE_PARTIAL_LINE;
        }
 
+       if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
+               return 0;
+
        /* Absolute time */
        if (flags & TRACE_GRAPH_PRINT_ABS_TIME) {
                ret = print_graph_abs_time(iter->ts, s);
@@ -1078,18 +1098,11 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s,
        if (print_graph_prologue(iter, s, 0, 0, flags))
                return TRACE_TYPE_PARTIAL_LINE;
 
-       /* Overhead */
-       ret = print_graph_overhead(duration, s, flags);
-       if (!ret)
+       /* Overhead and duration */
+       ret = print_graph_duration(duration, s, flags);
+       if (ret == TRACE_TYPE_PARTIAL_LINE)
                return TRACE_TYPE_PARTIAL_LINE;
 
-       /* Duration */
-       if (flags & TRACE_GRAPH_PRINT_DURATION) {
-               ret = print_graph_duration(duration, s);
-               if (ret == TRACE_TYPE_PARTIAL_LINE)
-                       return TRACE_TYPE_PARTIAL_LINE;
-       }
-
        /* Closing brace */
        for (i = 0; i < trace->depth * TRACE_GRAPH_INDENT; i++) {
                ret = trace_seq_printf(s, " ");
@@ -1146,17 +1159,10 @@ print_graph_comment(struct trace_seq *s, struct trace_entry *ent,
        if (print_graph_prologue(iter, s, 0, 0, flags))
                return TRACE_TYPE_PARTIAL_LINE;
 
-       /* No overhead */
-       ret = print_graph_overhead(-1, s, flags);
-       if (!ret)
-               return TRACE_TYPE_PARTIAL_LINE;
-
        /* No time */
-       if (flags & TRACE_GRAPH_PRINT_DURATION) {
-               ret = trace_seq_printf(s, "            |  ");
-               if (!ret)
-                       return TRACE_TYPE_PARTIAL_LINE;
-       }
+       ret = print_graph_duration(DURATION_FILL_FULL, s, flags);
+       if (ret != TRACE_TYPE_HANDLED)
+               return ret;
 
        /* Indentation */
        if (depth > 0)
@@ -1207,7 +1213,7 @@ print_graph_comment(struct trace_seq *s, struct trace_entry *ent,
 
 
 enum print_line_t
-__print_graph_function_flags(struct trace_iterator *iter, u32 flags)
+print_graph_function_flags(struct trace_iterator *iter, u32 flags)
 {
        struct ftrace_graph_ent_entry *field;
        struct fgraph_data *data = iter->private;
@@ -1270,18 +1276,7 @@ __print_graph_function_flags(struct trace_iterator *iter, u32 flags)
 static enum print_line_t
 print_graph_function(struct trace_iterator *iter)
 {
-       return __print_graph_function_flags(iter, tracer_flags.val);
-}
-
-enum print_line_t print_graph_function_flags(struct trace_iterator *iter,
-                                            u32 flags)
-{
-       if (trace_flags & TRACE_ITER_LATENCY_FMT)
-               flags |= TRACE_GRAPH_PRINT_DURATION;
-       else
-               flags |= TRACE_GRAPH_PRINT_ABS_TIME;
-
-       return __print_graph_function_flags(iter, flags);
+       return print_graph_function_flags(iter, tracer_flags.val);
 }
 
 static enum print_line_t
@@ -1309,8 +1304,7 @@ static void print_lat_header(struct seq_file *s, u32 flags)
        seq_printf(s, "#%.*s / _----=> need-resched    \n", size, spaces);
        seq_printf(s, "#%.*s| / _---=> hardirq/softirq \n", size, spaces);
        seq_printf(s, "#%.*s|| / _--=> preempt-depth   \n", size, spaces);
-       seq_printf(s, "#%.*s||| / _-=> lock-depth      \n", size, spaces);
-       seq_printf(s, "#%.*s|||| /                     \n", size, spaces);
+       seq_printf(s, "#%.*s||| /                      \n", size, spaces);
 }
 
 static void __print_graph_headers_flags(struct seq_file *s, u32 flags)
@@ -1329,7 +1323,7 @@ static void __print_graph_headers_flags(struct seq_file *s, u32 flags)
        if (flags & TRACE_GRAPH_PRINT_PROC)
                seq_printf(s, "  TASK/PID       ");
        if (lat)
-               seq_printf(s, "|||||");
+               seq_printf(s, "||||");
        if (flags & TRACE_GRAPH_PRINT_DURATION)
                seq_printf(s, "  DURATION   ");
        seq_printf(s, "               FUNCTION CALLS\n");
@@ -1343,7 +1337,7 @@ static void __print_graph_headers_flags(struct seq_file *s, u32 flags)
        if (flags & TRACE_GRAPH_PRINT_PROC)
                seq_printf(s, "   |    |        ");
        if (lat)
-               seq_printf(s, "|||||");
+               seq_printf(s, "||||");
        if (flags & TRACE_GRAPH_PRINT_DURATION)
                seq_printf(s, "   |   |      ");
        seq_printf(s, "               |   |   |   |\n");
@@ -1358,15 +1352,16 @@ void print_graph_headers_flags(struct seq_file *s, u32 flags)
 {
        struct trace_iterator *iter = s->private;
 
+       if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
+               return;
+
        if (trace_flags & TRACE_ITER_LATENCY_FMT) {
                /* print nothing if the buffers are empty */
                if (trace_empty(iter))
                        return;
 
                print_trace_header(s, iter);
-               flags |= TRACE_GRAPH_PRINT_DURATION;
-       } else
-               flags |= TRACE_GRAPH_PRINT_ABS_TIME;
+       }
 
        __print_graph_headers_flags(s, flags);
 }
index c77424be284d503c241a36d290af3a4c6e0ef98b..667aa8cc0cfcbcc1d3d4a960d03d8e9e15d1d3fc 100644 (file)
@@ -226,7 +226,9 @@ static void irqsoff_trace_close(struct trace_iterator *iter)
 }
 
 #define GRAPH_TRACER_FLAGS (TRACE_GRAPH_PRINT_CPU | \
-                           TRACE_GRAPH_PRINT_PROC)
+                           TRACE_GRAPH_PRINT_PROC | \
+                           TRACE_GRAPH_PRINT_ABS_TIME | \
+                           TRACE_GRAPH_PRINT_DURATION)
 
 static enum print_line_t irqsoff_print_line(struct trace_iterator *iter)
 {
index 27d13b36b8be9606deac3e1f42774ce7f3312c22..5fb3697bf0e5e982882d208d2321c67dd0edf5c8 100644 (file)
@@ -343,6 +343,14 @@ DEFINE_BASIC_FETCH_FUNCS(deref)
 DEFINE_FETCH_deref(string)
 DEFINE_FETCH_deref(string_size)
 
+static __kprobes void update_deref_fetch_param(struct deref_fetch_param *data)
+{
+       if (CHECK_FETCH_FUNCS(deref, data->orig.fn))
+               update_deref_fetch_param(data->orig.data);
+       else if (CHECK_FETCH_FUNCS(symbol, data->orig.fn))
+               update_symbol_cache(data->orig.data);
+}
+
 static __kprobes void free_deref_fetch_param(struct deref_fetch_param *data)
 {
        if (CHECK_FETCH_FUNCS(deref, data->orig.fn))
@@ -376,6 +384,19 @@ DEFINE_BASIC_FETCH_FUNCS(bitfield)
 #define fetch_bitfield_string NULL
 #define fetch_bitfield_string_size NULL
 
+static __kprobes void
+update_bitfield_fetch_param(struct bitfield_fetch_param *data)
+{
+       /*
+        * Don't check the bitfield itself, because this must be the
+        * last fetch function.
+        */
+       if (CHECK_FETCH_FUNCS(deref, data->orig.fn))
+               update_deref_fetch_param(data->orig.data);
+       else if (CHECK_FETCH_FUNCS(symbol, data->orig.fn))
+               update_symbol_cache(data->orig.data);
+}
+
 static __kprobes void
 free_bitfield_fetch_param(struct bitfield_fetch_param *data)
 {
@@ -389,6 +410,7 @@ free_bitfield_fetch_param(struct bitfield_fetch_param *data)
                free_symbol_cache(data->orig.data);
        kfree(data);
 }
+
 /* Default (unsigned long) fetch type */
 #define __DEFAULT_FETCH_TYPE(t) u##t
 #define _DEFAULT_FETCH_TYPE(t) __DEFAULT_FETCH_TYPE(t)
@@ -536,6 +558,7 @@ struct probe_arg {
 /* Flags for trace_probe */
 #define TP_FLAG_TRACE  1
 #define TP_FLAG_PROFILE        2
+#define TP_FLAG_REGISTERED 4
 
 struct trace_probe {
        struct list_head        list;
@@ -555,16 +578,49 @@ struct trace_probe {
        (sizeof(struct probe_arg) * (n)))
 
 
-static __kprobes int probe_is_return(struct trace_probe *tp)
+static __kprobes int trace_probe_is_return(struct trace_probe *tp)
 {
        return tp->rp.handler != NULL;
 }
 
-static __kprobes const char *probe_symbol(struct trace_probe *tp)
+static __kprobes const char *trace_probe_symbol(struct trace_probe *tp)
 {
        return tp->symbol ? tp->symbol : "unknown";
 }
 
+static __kprobes unsigned long trace_probe_offset(struct trace_probe *tp)
+{
+       return tp->rp.kp.offset;
+}
+
+static __kprobes bool trace_probe_is_enabled(struct trace_probe *tp)
+{
+       return !!(tp->flags & (TP_FLAG_TRACE | TP_FLAG_PROFILE));
+}
+
+static __kprobes bool trace_probe_is_registered(struct trace_probe *tp)
+{
+       return !!(tp->flags & TP_FLAG_REGISTERED);
+}
+
+static __kprobes bool trace_probe_has_gone(struct trace_probe *tp)
+{
+       return !!(kprobe_gone(&tp->rp.kp));
+}
+
+static __kprobes bool trace_probe_within_module(struct trace_probe *tp,
+                                               struct module *mod)
+{
+       int len = strlen(mod->name);
+       const char *name = trace_probe_symbol(tp);
+       return strncmp(mod->name, name, len) == 0 && name[len] == ':';
+}
+
+static __kprobes bool trace_probe_is_on_module(struct trace_probe *tp)
+{
+       return !!strchr(trace_probe_symbol(tp), ':');
+}
+
 static int register_probe_event(struct trace_probe *tp);
 static void unregister_probe_event(struct trace_probe *tp);
 
@@ -646,6 +702,16 @@ error:
        return ERR_PTR(ret);
 }
 
+static void update_probe_arg(struct probe_arg *arg)
+{
+       if (CHECK_FETCH_FUNCS(bitfield, arg->fetch.fn))
+               update_bitfield_fetch_param(arg->fetch.data);
+       else if (CHECK_FETCH_FUNCS(deref, arg->fetch.fn))
+               update_deref_fetch_param(arg->fetch.data);
+       else if (CHECK_FETCH_FUNCS(symbol, arg->fetch.fn))
+               update_symbol_cache(arg->fetch.data);
+}
+
 static void free_probe_arg(struct probe_arg *arg)
 {
        if (CHECK_FETCH_FUNCS(bitfield, arg->fetch.fn))
@@ -671,7 +737,7 @@ static void free_trace_probe(struct trace_probe *tp)
        kfree(tp);
 }
 
-static struct trace_probe *find_probe_event(const char *event,
+static struct trace_probe *find_trace_probe(const char *event,
                                            const char *group)
 {
        struct trace_probe *tp;
@@ -683,13 +749,96 @@ static struct trace_probe *find_probe_event(const char *event,
        return NULL;
 }
 
+/* Enable trace_probe - @flag must be TP_FLAG_TRACE or TP_FLAG_PROFILE */
+static int enable_trace_probe(struct trace_probe *tp, int flag)
+{
+       int ret = 0;
+
+       tp->flags |= flag;
+       if (trace_probe_is_enabled(tp) && trace_probe_is_registered(tp) &&
+           !trace_probe_has_gone(tp)) {
+               if (trace_probe_is_return(tp))
+                       ret = enable_kretprobe(&tp->rp);
+               else
+                       ret = enable_kprobe(&tp->rp.kp);
+       }
+
+       return ret;
+}
+
+/* Disable trace_probe - @flag must be TP_FLAG_TRACE or TP_FLAG_PROFILE */
+static void disable_trace_probe(struct trace_probe *tp, int flag)
+{
+       tp->flags &= ~flag;
+       if (!trace_probe_is_enabled(tp) && trace_probe_is_registered(tp)) {
+               if (trace_probe_is_return(tp))
+                       disable_kretprobe(&tp->rp);
+               else
+                       disable_kprobe(&tp->rp.kp);
+       }
+}
+
+/* Internal register function - just handle k*probes and flags */
+static int __register_trace_probe(struct trace_probe *tp)
+{
+       int i, ret;
+
+       if (trace_probe_is_registered(tp))
+               return -EINVAL;
+
+       for (i = 0; i < tp->nr_args; i++)
+               update_probe_arg(&tp->args[i]);
+
+       /* Set/clear disabled flag according to tp->flag */
+       if (trace_probe_is_enabled(tp))
+               tp->rp.kp.flags &= ~KPROBE_FLAG_DISABLED;
+       else
+               tp->rp.kp.flags |= KPROBE_FLAG_DISABLED;
+
+       if (trace_probe_is_return(tp))
+               ret = register_kretprobe(&tp->rp);
+       else
+               ret = register_kprobe(&tp->rp.kp);
+
+       if (ret == 0)
+               tp->flags |= TP_FLAG_REGISTERED;
+       else {
+               pr_warning("Could not insert probe at %s+%lu: %d\n",
+                          trace_probe_symbol(tp), trace_probe_offset(tp), ret);
+               if (ret == -ENOENT && trace_probe_is_on_module(tp)) {
+                       pr_warning("This probe might be able to register after"
+                                  "target module is loaded. Continue.\n");
+                       ret = 0;
+               } else if (ret == -EILSEQ) {
+                       pr_warning("Probing address(0x%p) is not an "
+                                  "instruction boundary.\n",
+                                  tp->rp.kp.addr);
+                       ret = -EINVAL;
+               }
+       }
+
+       return ret;
+}
+
+/* Internal unregister function - just handle k*probes and flags */
+static void __unregister_trace_probe(struct trace_probe *tp)
+{
+       if (trace_probe_is_registered(tp)) {
+               if (trace_probe_is_return(tp))
+                       unregister_kretprobe(&tp->rp);
+               else
+                       unregister_kprobe(&tp->rp.kp);
+               tp->flags &= ~TP_FLAG_REGISTERED;
+               /* Cleanup kprobe for reuse */
+               if (tp->rp.kp.symbol_name)
+                       tp->rp.kp.addr = NULL;
+       }
+}
+
 /* Unregister a trace_probe and probe_event: call with locking probe_lock */
 static void unregister_trace_probe(struct trace_probe *tp)
 {
-       if (probe_is_return(tp))
-               unregister_kretprobe(&tp->rp);
-       else
-               unregister_kprobe(&tp->rp.kp);
+       __unregister_trace_probe(tp);
        list_del(&tp->list);
        unregister_probe_event(tp);
 }
@@ -702,41 +851,65 @@ static int register_trace_probe(struct trace_probe *tp)
 
        mutex_lock(&probe_lock);
 
-       /* register as an event */
-       old_tp = find_probe_event(tp->call.name, tp->call.class->system);
+       /* Delete old (same name) event if exist */
+       old_tp = find_trace_probe(tp->call.name, tp->call.class->system);
        if (old_tp) {
-               /* delete old event */
                unregister_trace_probe(old_tp);
                free_trace_probe(old_tp);
        }
+
+       /* Register new event */
        ret = register_probe_event(tp);
        if (ret) {
                pr_warning("Failed to register probe event(%d)\n", ret);
                goto end;
        }
 
-       tp->rp.kp.flags |= KPROBE_FLAG_DISABLED;
-       if (probe_is_return(tp))
-               ret = register_kretprobe(&tp->rp);
-       else
-               ret = register_kprobe(&tp->rp.kp);
-
-       if (ret) {
-               pr_warning("Could not insert probe(%d)\n", ret);
-               if (ret == -EILSEQ) {
-                       pr_warning("Probing address(0x%p) is not an "
-                                  "instruction boundary.\n",
-                                  tp->rp.kp.addr);
-                       ret = -EINVAL;
-               }
+       /* Register k*probe */
+       ret = __register_trace_probe(tp);
+       if (ret < 0)
                unregister_probe_event(tp);
-       else
+       else
                list_add_tail(&tp->list, &probe_list);
+
 end:
        mutex_unlock(&probe_lock);
        return ret;
 }
 
+/* Module notifier call back, checking event on the module */
+static int trace_probe_module_callback(struct notifier_block *nb,
+                                      unsigned long val, void *data)
+{
+       struct module *mod = data;
+       struct trace_probe *tp;
+       int ret;
+
+       if (val != MODULE_STATE_COMING)
+               return NOTIFY_DONE;
+
+       /* Update probes on coming module */
+       mutex_lock(&probe_lock);
+       list_for_each_entry(tp, &probe_list, list) {
+               if (trace_probe_within_module(tp, mod)) {
+                       __unregister_trace_probe(tp);
+                       ret = __register_trace_probe(tp);
+                       if (ret)
+                               pr_warning("Failed to re-register probe %s on"
+                                          "%s: %d\n",
+                                          tp->call.name, mod->name, ret);
+               }
+       }
+       mutex_unlock(&probe_lock);
+
+       return NOTIFY_DONE;
+}
+
+static struct notifier_block trace_probe_module_nb = {
+       .notifier_call = trace_probe_module_callback,
+       .priority = 1   /* Invoked after kprobe module callback */
+};
+
 /* Split symbol and offset. */
 static int split_symbol_offset(char *symbol, unsigned long *offset)
 {
@@ -962,8 +1135,8 @@ static int create_trace_probe(int argc, char **argv)
 {
        /*
         * Argument syntax:
-        *  - Add kprobe: p[:[GRP/]EVENT] KSYM[+OFFS]|KADDR [FETCHARGS]
-        *  - Add kretprobe: r[:[GRP/]EVENT] KSYM[+0] [FETCHARGS]
+        *  - Add kprobe: p[:[GRP/]EVENT] [MOD:]KSYM[+OFFS]|KADDR [FETCHARGS]
+        *  - Add kretprobe: r[:[GRP/]EVENT] [MOD:]KSYM[+0] [FETCHARGS]
         * Fetch args:
         *  $retval     : fetch return value
         *  $stack      : fetch stack address
@@ -1025,7 +1198,7 @@ static int create_trace_probe(int argc, char **argv)
                        return -EINVAL;
                }
                mutex_lock(&probe_lock);
-               tp = find_probe_event(event, group);
+               tp = find_trace_probe(event, group);
                if (!tp) {
                        mutex_unlock(&probe_lock);
                        pr_info("Event %s/%s doesn't exist.\n", group, event);
@@ -1144,7 +1317,7 @@ error:
        return ret;
 }
 
-static void cleanup_all_probes(void)
+static void release_all_trace_probes(void)
 {
        struct trace_probe *tp;
 
@@ -1158,7 +1331,6 @@ static void cleanup_all_probes(void)
        mutex_unlock(&probe_lock);
 }
 
-
 /* Probes listing interfaces */
 static void *probes_seq_start(struct seq_file *m, loff_t *pos)
 {
@@ -1181,15 +1353,16 @@ static int probes_seq_show(struct seq_file *m, void *v)
        struct trace_probe *tp = v;
        int i;
 
-       seq_printf(m, "%c", probe_is_return(tp) ? 'r' : 'p');
+       seq_printf(m, "%c", trace_probe_is_return(tp) ? 'r' : 'p');
        seq_printf(m, ":%s/%s", tp->call.class->system, tp->call.name);
 
        if (!tp->symbol)
                seq_printf(m, " 0x%p", tp->rp.kp.addr);
        else if (tp->rp.kp.offset)
-               seq_printf(m, " %s+%u", probe_symbol(tp), tp->rp.kp.offset);
+               seq_printf(m, " %s+%u", trace_probe_symbol(tp),
+                          tp->rp.kp.offset);
        else
-               seq_printf(m, " %s", probe_symbol(tp));
+               seq_printf(m, " %s", trace_probe_symbol(tp));
 
        for (i = 0; i < tp->nr_args; i++)
                seq_printf(m, " %s=%s", tp->args[i].name, tp->args[i].comm);
@@ -1209,7 +1382,7 @@ static int probes_open(struct inode *inode, struct file *file)
 {
        if ((file->f_mode & FMODE_WRITE) &&
            (file->f_flags & O_TRUNC))
-               cleanup_all_probes();
+               release_all_trace_probes();
 
        return seq_open(file, &probes_seq_op);
 }
@@ -1397,7 +1570,8 @@ static __kprobes void kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs)
        store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize);
 
        if (!filter_current_check_discard(buffer, call, entry, event))
-               trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc);
+               trace_nowake_buffer_unlock_commit_regs(buffer, event,
+                                                      irq_flags, pc, regs);
 }
 
 /* Kretprobe handler */
@@ -1429,7 +1603,8 @@ static __kprobes void kretprobe_trace_func(struct kretprobe_instance *ri,
        store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize);
 
        if (!filter_current_check_discard(buffer, call, entry, event))
-               trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc);
+               trace_nowake_buffer_unlock_commit_regs(buffer, event,
+                                                      irq_flags, pc, regs);
 }
 
 /* Event entry printers */
@@ -1511,30 +1686,6 @@ partial:
        return TRACE_TYPE_PARTIAL_LINE;
 }
 
-static int probe_event_enable(struct ftrace_event_call *call)
-{
-       struct trace_probe *tp = (struct trace_probe *)call->data;
-
-       tp->flags |= TP_FLAG_TRACE;
-       if (probe_is_return(tp))
-               return enable_kretprobe(&tp->rp);
-       else
-               return enable_kprobe(&tp->rp.kp);
-}
-
-static void probe_event_disable(struct ftrace_event_call *call)
-{
-       struct trace_probe *tp = (struct trace_probe *)call->data;
-
-       tp->flags &= ~TP_FLAG_TRACE;
-       if (!(tp->flags & (TP_FLAG_TRACE | TP_FLAG_PROFILE))) {
-               if (probe_is_return(tp))
-                       disable_kretprobe(&tp->rp);
-               else
-                       disable_kprobe(&tp->rp.kp);
-       }
-}
-
 #undef DEFINE_FIELD
 #define DEFINE_FIELD(type, item, name, is_signed)                      \
        do {                                                            \
@@ -1596,7 +1747,7 @@ static int __set_print_fmt(struct trace_probe *tp, char *buf, int len)
 
        const char *fmt, *arg;
 
-       if (!probe_is_return(tp)) {
+       if (!trace_probe_is_return(tp)) {
                fmt = "(%lx)";
                arg = "REC->" FIELD_STRING_IP;
        } else {
@@ -1713,49 +1864,25 @@ static __kprobes void kretprobe_perf_func(struct kretprobe_instance *ri,
        head = this_cpu_ptr(call->perf_events);
        perf_trace_buf_submit(entry, size, rctx, entry->ret_ip, 1, regs, head);
 }
-
-static int probe_perf_enable(struct ftrace_event_call *call)
-{
-       struct trace_probe *tp = (struct trace_probe *)call->data;
-
-       tp->flags |= TP_FLAG_PROFILE;
-
-       if (probe_is_return(tp))
-               return enable_kretprobe(&tp->rp);
-       else
-               return enable_kprobe(&tp->rp.kp);
-}
-
-static void probe_perf_disable(struct ftrace_event_call *call)
-{
-       struct trace_probe *tp = (struct trace_probe *)call->data;
-
-       tp->flags &= ~TP_FLAG_PROFILE;
-
-       if (!(tp->flags & TP_FLAG_TRACE)) {
-               if (probe_is_return(tp))
-                       disable_kretprobe(&tp->rp);
-               else
-                       disable_kprobe(&tp->rp.kp);
-       }
-}
 #endif /* CONFIG_PERF_EVENTS */
 
 static __kprobes
 int kprobe_register(struct ftrace_event_call *event, enum trace_reg type)
 {
+       struct trace_probe *tp = (struct trace_probe *)event->data;
+
        switch (type) {
        case TRACE_REG_REGISTER:
-               return probe_event_enable(event);
+               return enable_trace_probe(tp, TP_FLAG_TRACE);
        case TRACE_REG_UNREGISTER:
-               probe_event_disable(event);
+               disable_trace_probe(tp, TP_FLAG_TRACE);
                return 0;
 
 #ifdef CONFIG_PERF_EVENTS
        case TRACE_REG_PERF_REGISTER:
-               return probe_perf_enable(event);
+               return enable_trace_probe(tp, TP_FLAG_PROFILE);
        case TRACE_REG_PERF_UNREGISTER:
-               probe_perf_disable(event);
+               disable_trace_probe(tp, TP_FLAG_PROFILE);
                return 0;
 #endif
        }
@@ -1805,7 +1932,7 @@ static int register_probe_event(struct trace_probe *tp)
 
        /* Initialize ftrace_event_call */
        INIT_LIST_HEAD(&call->class->fields);
-       if (probe_is_return(tp)) {
+       if (trace_probe_is_return(tp)) {
                call->event.funcs = &kretprobe_funcs;
                call->class->define_fields = kretprobe_event_define_fields;
        } else {
@@ -1844,6 +1971,9 @@ static __init int init_kprobe_trace(void)
        struct dentry *d_tracer;
        struct dentry *entry;
 
+       if (register_module_notifier(&trace_probe_module_nb))
+               return -EINVAL;
+
        d_tracer = tracing_init_dentry();
        if (!d_tracer)
                return 0;
@@ -1897,12 +2027,12 @@ static __init int kprobe_trace_self_tests_init(void)
                warn++;
        } else {
                /* Enable trace point */
-               tp = find_probe_event("testprobe", KPROBE_EVENT_SYSTEM);
+               tp = find_trace_probe("testprobe", KPROBE_EVENT_SYSTEM);
                if (WARN_ON_ONCE(tp == NULL)) {
                        pr_warning("error on getting new probe.\n");
                        warn++;
                } else
-                       probe_event_enable(&tp->call);
+                       enable_trace_probe(tp, TP_FLAG_TRACE);
        }
 
        ret = command_trace_probe("r:testprobe2 kprobe_trace_selftest_target "
@@ -1912,12 +2042,12 @@ static __init int kprobe_trace_self_tests_init(void)
                warn++;
        } else {
                /* Enable trace point */
-               tp = find_probe_event("testprobe2", KPROBE_EVENT_SYSTEM);
+               tp = find_trace_probe("testprobe2", KPROBE_EVENT_SYSTEM);
                if (WARN_ON_ONCE(tp == NULL)) {
                        pr_warning("error on getting new probe.\n");
                        warn++;
                } else
-                       probe_event_enable(&tp->call);
+                       enable_trace_probe(tp, TP_FLAG_TRACE);
        }
 
        if (warn)
@@ -1938,7 +2068,7 @@ static __init int kprobe_trace_self_tests_init(void)
        }
 
 end:
-       cleanup_all_probes();
+       release_all_trace_probes();
        if (warn)
                pr_cont("NG: Some tests are failed. Please check them.\n");
        else
index e37de492a9e18c7d41b629c8267ce0f78f03fff9..51999309a6cf5d71347da29c3406e45f1b1e98d0 100644 (file)
@@ -1107,19 +1107,20 @@ static enum print_line_t trace_stack_print(struct trace_iterator *iter,
 {
        struct stack_entry *field;
        struct trace_seq *s = &iter->seq;
-       int i;
+       unsigned long *p;
+       unsigned long *end;
 
        trace_assign_type(field, iter->ent);
+       end = (unsigned long *)((long)iter->ent + iter->ent_size);
 
        if (!trace_seq_puts(s, "<stack trace>\n"))
                goto partial;
-       for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
-               if (!field->caller[i] || (field->caller[i] == ULONG_MAX))
-                       break;
+
+       for (p = field->caller; p && *p != ULONG_MAX && p < end; p++) {
                if (!trace_seq_puts(s, " => "))
                        goto partial;
 
-               if (!seq_print_ip_sym(s, field->caller[i], flags))
+               if (!seq_print_ip_sym(s, *p, flags))
                        goto partial;
                if (!trace_seq_puts(s, "\n"))
                        goto partial;
index f029dd4fd2cac18eb309a9aab6b13009bd124305..e4a70c0c71b64d43cc11420f8399b62fc81074c4 100644 (file)
@@ -227,7 +227,9 @@ static void wakeup_trace_close(struct trace_iterator *iter)
                graph_trace_close(iter);
 }
 
-#define GRAPH_TRACER_FLAGS (TRACE_GRAPH_PRINT_PROC)
+#define GRAPH_TRACER_FLAGS (TRACE_GRAPH_PRINT_PROC | \
+                           TRACE_GRAPH_PRINT_ABS_TIME | \
+                           TRACE_GRAPH_PRINT_DURATION)
 
 static enum print_line_t wakeup_print_line(struct trace_iterator *iter)
 {
index b0b53b8e4c25ffbe23e4c8a1f40fa19ba4326682..77575b386d9743cd337962698d10bdd499d7a8c6 100644 (file)
@@ -156,20 +156,11 @@ stack_max_size_write(struct file *filp, const char __user *ubuf,
 {
        long *ptr = filp->private_data;
        unsigned long val, flags;
-       char buf[64];
        int ret;
        int cpu;
 
-       if (count >= sizeof(buf))
-               return -EINVAL;
-
-       if (copy_from_user(&buf, ubuf, count))
-               return -EFAULT;
-
-       buf[count] = 0;
-
-       ret = strict_strtoul(buf, 10, &val);
-       if (ret < 0)
+       ret = kstrtoul_from_user(ubuf, count, 10, &val);
+       if (ret)
                return ret;
 
        local_irq_save(flags);
index 3d0c56ad47929c91795ac7db81d97acbb1e28cf9..36491cd5b7d4a595a2c3a432ecce769caafb24c7 100644 (file)
@@ -200,6 +200,7 @@ static int is_softlockup(unsigned long touch_ts)
 }
 
 #ifdef CONFIG_HARDLOCKUP_DETECTOR
+
 static struct perf_event_attr wd_hw_attr = {
        .type           = PERF_TYPE_HARDWARE,
        .config         = PERF_COUNT_HW_CPU_CYCLES,
@@ -209,7 +210,7 @@ static struct perf_event_attr wd_hw_attr = {
 };
 
 /* Callback function for perf event subsystem */
-static void watchdog_overflow_callback(struct perf_event *event, int nmi,
+static void watchdog_overflow_callback(struct perf_event *event,
                 struct perf_sample_data *data,
                 struct pt_regs *regs)
 {
@@ -368,10 +369,11 @@ static int watchdog_nmi_enable(int cpu)
        if (event != NULL)
                goto out_enable;
 
-       /* Try to register using hardware perf events */
        wd_attr = &wd_hw_attr;
        wd_attr->sample_period = hw_nmi_get_sample_period(watchdog_thresh);
-       event = perf_event_create_kernel_counter(wd_attr, cpu, NULL, watchdog_overflow_callback);
+
+       /* Try to register using hardware perf events */
+       event = perf_event_create_kernel_counter(wd_attr, cpu, NULL, watchdog_overflow_callback, NULL);
        if (!IS_ERR(event)) {
                printk(KERN_INFO "NMI watchdog enabled, takes one hw-pmu counter.\n");
                goto out_save;
index 063653955f9fd7690ba5f588541c2dd288500958..ef7f32291852fec2b4e93f6d75053a1907b71b0f 100644 (file)
@@ -41,7 +41,7 @@ module_param_string(ksym, ksym_name, KSYM_NAME_LEN, S_IRUGO);
 MODULE_PARM_DESC(ksym, "Kernel symbol to monitor; this module will report any"
                        " write operations on the kernel symbol");
 
-static void sample_hbp_handler(struct perf_event *bp, int nmi,
+static void sample_hbp_handler(struct perf_event *bp,
                               struct perf_sample_data *data,
                               struct pt_regs *regs)
 {
@@ -60,7 +60,7 @@ static int __init hw_break_module_init(void)
        attr.bp_len = HW_BREAKPOINT_LEN_4;
        attr.bp_type = HW_BREAKPOINT_W | HW_BREAKPOINT_R;
 
-       sample_hbp = register_wide_hw_breakpoint(&attr, sample_hbp_handler);
+       sample_hbp = register_wide_hw_breakpoint(&attr, sample_hbp_handler, NULL);
        if (IS_ERR((void __force *)sample_hbp)) {
                ret = PTR_ERR((void __force *)sample_hbp);
                goto fail;
index 6f5a498608b292241e93dc9c498e8ce5f6a683cc..85c5f026930d336bc6b2d2658087134e11caa328 100644 (file)
@@ -66,6 +66,12 @@ OPTIONS
        used. This interfaces starts by centering on the line with more
        samples, TAB/UNTAB cycles through the lines with more samples.
 
+-c::
+--cpu:: Only report samples for the list of CPUs provided. Multiple CPUs can
+       be provided as a comma-separated list with no space: 0,1. Ranges of
+       CPUs are specified with -: 0-2. Default is to report samples on all
+       CPUs.
+
 SEE ALSO
 --------
 linkperf:perf-record[1], linkperf:perf-report[1]
index 02bafce4b341cfc63e399d63b5007b70fbe27d6e..2780d9ce48bfe22b2fbaad38f39d7f2384c2d131 100644 (file)
@@ -34,9 +34,11 @@ OPTIONS
        Specify vmlinux path which has debuginfo (Dwarf binary).
 
 -m::
---module=MODNAME::
+--module=MODNAME|PATH::
        Specify module name in which perf-probe searches probe points
-       or lines.
+       or lines. If a path of module file is passed, perf-probe
+       treat it as an offline module (this means you can add a probe on
+        a module which has not been loaded yet).
 
 -s::
 --source=PATH::
index 8ba03d6e5398d8387b11f9caf183bed81a0eb5a2..04253c07d19a8620f185d13950816d514755440b 100644 (file)
@@ -80,15 +80,24 @@ OPTIONS
 --dump-raw-trace::
         Dump raw trace in ASCII.
 
--g [type,min]::
+-g [type,min,order]::
 --call-graph::
-        Display call chains using type and min percent threshold.
+        Display call chains using type, min percent threshold and order.
        type can be either:
        - flat: single column, linear exposure of call chains.
        - graph: use a graph tree, displaying absolute overhead rates.
        - fractal: like graph, but displays relative rates. Each branch of
                 the tree is considered as a new profiled object. +
-       Default: fractal,0.5.
+
+       order can be either:
+       - callee: callee based call graph.
+       - caller: inverted caller based call graph.
+
+       Default: fractal,0.5,callee.
+
+-G::
+--inverted::
+        alias for inverted caller based call graph.
 
 --pretty=<key>::
         Pretty printing style.  key: normal, raw
@@ -119,6 +128,12 @@ OPTIONS
 --symfs=<directory>::
         Look for files with symbols relative to this directory.
 
+-c::
+--cpu:: Only report samples for the list of CPUs provided. Multiple CPUs can
+       be provided as a comma-separated list with no space: 0,1. Ranges of
+       CPUs are specified with -: 0-2. Default is to report samples on all
+       CPUs.
+
 SEE ALSO
 --------
 linkperf:perf-stat[1]
index 86c87e214b11d295ed07ce6fa5005f906484d8f7..db017867d9e8ff363f19eb3744c1195ae1415d46 100644 (file)
@@ -115,10 +115,10 @@ OPTIONS
 -f::
 --fields::
         Comma separated list of fields to print. Options are:
-        comm, tid, pid, time, cpu, event, trace, sym. Field
-        list can be prepended with the type, trace, sw or hw,
+        comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr.
+        Field list can be prepended with the type, trace, sw or hw,
         to indicate to which event type the field list applies.
-        e.g., -f sw:comm,tid,time,sym  and -f trace:time,cpu,trace
+        e.g., -f sw:comm,tid,time,ip,sym  and -f trace:time,cpu,trace
 
                perf script -f <fields>
 
@@ -132,17 +132,17 @@ OPTIONS
        The arguments are processed in the order received. A later usage can
        reset a prior request. e.g.:
     
-               -f trace: -f comm,tid,time,sym
+               -f trace: -f comm,tid,time,ip,sym
     
        The first -f suppresses trace events (field list is ""), but then the
-       second invocation sets the fields to comm,tid,time,sym. In this case a
+       second invocation sets the fields to comm,tid,time,ip,sym. In this case a
        warning is given to the user:
     
                "Overriding previous field request for all events."
     
        Alternativey, consider the order:
     
-               -f comm,tid,time,sym -f trace:
+               -f comm,tid,time,ip,sym -f trace:
     
        The first -f sets the fields for all events and the second -f
        suppresses trace events. The user is given a warning message about
@@ -182,6 +182,12 @@ OPTIONS
 --hide-call-graph::
         When printing symbols do not display call chain.
 
+-c::
+--cpu:: Only report samples for the list of CPUs provided. Multiple CPUs can
+       be provided as a comma-separated list with no space: 0,1. Ranges of
+       CPUs are specified with -: 0-2. Default is to report samples on all
+       CPUs.
+
 SEE ALSO
 --------
 linkperf:perf-record[1], linkperf:perf-script-perl[1],
index 940257b5774ec209d09e3b2d4fbee647ff86dd71..d0861bbd1d944675359cb0a3e53538e8a3c92bab 100644 (file)
@@ -279,6 +279,7 @@ LIB_H += util/thread.h
 LIB_H += util/thread_map.h
 LIB_H += util/trace-event.h
 LIB_H += util/probe-finder.h
+LIB_H += util/dwarf-aux.h
 LIB_H += util/probe-event.h
 LIB_H += util/pstack.h
 LIB_H += util/cpumap.h
@@ -435,6 +436,7 @@ else
        BASIC_CFLAGS += -DDWARF_SUPPORT
        EXTLIBS += -lelf -ldw
        LIB_OBJS += $(OUTPUT)util/probe-finder.o
+       LIB_OBJS += $(OUTPUT)util/dwarf-aux.o
 endif # PERF_HAVE_DWARF_REGS
 endif # NO_DWARF
 
index 7b139e1e7e869f9c11070012ac21bf13534d1d50..555aefd7fe0146eada9b45dfd366f68a6285f854 100644 (file)
@@ -28,6 +28,8 @@
 #include "util/hist.h"
 #include "util/session.h"
 
+#include <linux/bitmap.h>
+
 static char            const *input_name = "perf.data";
 
 static bool            force, use_tui, use_stdio;
@@ -38,6 +40,9 @@ static bool           print_line;
 
 static const char *sym_hist_filter;
 
+static const char      *cpu_list;
+static DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
+
 static int perf_evlist__add_sample(struct perf_evlist *evlist,
                                   struct perf_sample *sample,
                                   struct perf_evsel *evsel,
@@ -90,6 +95,9 @@ static int process_sample_event(union perf_event *event,
                return -1;
        }
 
+       if (cpu_list && !test_bit(sample->cpu, cpu_bitmap))
+               return 0;
+
        if (!al.filtered &&
            perf_evlist__add_sample(session->evlist, sample, evsel, &al)) {
                pr_warning("problem incrementing symbol count, "
@@ -177,6 +185,12 @@ static int __cmd_annotate(void)
        if (session == NULL)
                return -ENOMEM;
 
+       if (cpu_list) {
+               ret = perf_session__cpu_bitmap(session, cpu_list, cpu_bitmap);
+               if (ret)
+                       goto out_delete;
+       }
+
        ret = perf_session__process_events(session, &event_ops);
        if (ret)
                goto out_delete;
@@ -252,6 +266,7 @@ static const struct option options[] = {
                    "print matching source lines (may be slow)"),
        OPT_BOOLEAN('P', "full-paths", &full_paths,
                    "Don't shorten the displayed pathnames"),
+       OPT_STRING('c', "cpu", &cpu_list, "cpu", "list of cpus to profile"),
        OPT_END()
 };
 
index 2c0e64d0b4aa6107800dd9736d50da050fa84429..5f2a5c7046dfd6d07b973a454ddc28332786b4ec 100644 (file)
@@ -242,7 +242,8 @@ static const struct option options[] = {
        OPT_STRING('s', "source", &symbol_conf.source_prefix,
                   "directory", "path to kernel source"),
        OPT_STRING('m', "module", &params.target_module,
-                  "modname", "target module name"),
+                  "modname|path",
+                  "target module name (for online) or path (for offline)"),
 #endif
        OPT__DRY_RUN(&probe_event_dry_run),
        OPT_INTEGER('\0', "max-probes", &params.max_probe_points,
index 8e2c85798185c58068783f2b392e00f305f492e1..80dc5b790e4790c22b7c4f23fcbae6f45b757acb 100644 (file)
@@ -740,7 +740,7 @@ static bool force, append_file;
 const struct option record_options[] = {
        OPT_CALLBACK('e', "event", &evsel_list, "event",
                     "event selector. use 'perf list' to list available events",
-                    parse_events),
+                    parse_events_option),
        OPT_CALLBACK(0, "filter", &evsel_list, "filter",
                     "event filter", parse_filter),
        OPT_INTEGER('p', "pid", &target_pid,
index 287a173523a7fa4d39c0c6635cbbaf94a0ac181a..f854efda76869412210ede431f4844a5e386cd34 100644 (file)
@@ -33,6 +33,8 @@
 #include "util/sort.h"
 #include "util/hist.h"
 
+#include <linux/bitmap.h>
+
 static char            const *input_name = "perf.data";
 
 static bool            force, use_tui, use_stdio;
@@ -45,9 +47,13 @@ static struct perf_read_values       show_threads_values;
 static const char      default_pretty_printing_style[] = "normal";
 static const char      *pretty_printing_style = default_pretty_printing_style;
 
-static char            callchain_default_opt[] = "fractal,0.5";
+static char            callchain_default_opt[] = "fractal,0.5,callee";
+static bool            inverted_callchain;
 static symbol_filter_t annotate_init;
 
+static const char      *cpu_list;
+static DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
+
 static int perf_session__add_hist_entry(struct perf_session *session,
                                        struct addr_location *al,
                                        struct perf_sample *sample,
@@ -116,6 +122,9 @@ static int process_sample_event(union perf_event *event,
        if (al.filtered || (hide_unresolved && al.sym == NULL))
                return 0;
 
+       if (cpu_list && !test_bit(sample->cpu, cpu_bitmap))
+               return 0;
+
        if (al.map != NULL)
                al.map->dso->hit = 1;
 
@@ -262,6 +271,12 @@ static int __cmd_report(void)
        if (session == NULL)
                return -ENOMEM;
 
+       if (cpu_list) {
+               ret = perf_session__cpu_bitmap(session, cpu_list, cpu_bitmap);
+               if (ret)
+                       goto out_delete;
+       }
+
        if (show_threads)
                perf_read_values_init(&show_threads_values);
 
@@ -386,13 +401,29 @@ parse_callchain_opt(const struct option *opt __used, const char *arg,
        if (!tok)
                goto setup;
 
-       tok2 = strtok(NULL, ",");
        callchain_param.min_percent = strtod(tok, &endptr);
        if (tok == endptr)
                return -1;
 
-       if (tok2)
+       /* get the print limit */
+       tok2 = strtok(NULL, ",");
+       if (!tok2)
+               goto setup;
+
+       if (tok2[0] != 'c') {
                callchain_param.print_limit = strtod(tok2, &endptr);
+               tok2 = strtok(NULL, ",");
+               if (!tok2)
+                       goto setup;
+       }
+
+       /* get the call chain order */
+       if (!strcmp(tok2, "caller"))
+               callchain_param.order = ORDER_CALLER;
+       else if (!strcmp(tok2, "callee"))
+               callchain_param.order = ORDER_CALLEE;
+       else
+               return -1;
 setup:
        if (callchain_register_param(&callchain_param) < 0) {
                fprintf(stderr, "Can't register callchain params\n");
@@ -436,9 +467,10 @@ static const struct option options[] = {
                   "regex filter to identify parent, see: '--sort parent'"),
        OPT_BOOLEAN('x', "exclude-other", &symbol_conf.exclude_other,
                    "Only display entries with parent-match"),
-       OPT_CALLBACK_DEFAULT('g', "call-graph", NULL, "output_type,min_percent",
-                    "Display callchains using output_type (graph, flat, fractal, or none) and min percent threshold. "
-                    "Default: fractal,0.5", &parse_callchain_opt, callchain_default_opt),
+       OPT_CALLBACK_DEFAULT('g', "call-graph", NULL, "output_type,min_percent, call_order",
+                    "Display callchains using output_type (graph, flat, fractal, or none) , min percent threshold and callchain order. "
+                    "Default: fractal,0.5,callee", &parse_callchain_opt, callchain_default_opt),
+       OPT_BOOLEAN('G', "inverted", &inverted_callchain, "alias for inverted call graph"),
        OPT_STRING('d', "dsos", &symbol_conf.dso_list_str, "dso[,dso...]",
                   "only consider symbols in these dsos"),
        OPT_STRING('C', "comms", &symbol_conf.comm_list_str, "comm[,comm...]",
@@ -455,6 +487,7 @@ static const struct option options[] = {
                    "Only display entries resolved to a symbol"),
        OPT_STRING(0, "symfs", &symbol_conf.symfs, "directory",
                    "Look for files with symbols relative to this directory"),
+       OPT_STRING('c', "cpu", &cpu_list, "cpu", "list of cpus to profile"),
        OPT_END()
 };
 
@@ -467,6 +500,9 @@ int cmd_report(int argc, const char **argv, const char *prefix __used)
        else if (use_tui)
                use_browser = 1;
 
+       if (inverted_callchain)
+               callchain_param.order = ORDER_CALLER;
+
        if (strcmp(input_name, "-") != 0)
                setup_browser(true);
        else
@@ -504,7 +540,14 @@ int cmd_report(int argc, const char **argv, const char *prefix __used)
        if (parent_pattern != default_parent_pattern) {
                if (sort_dimension__add("parent") < 0)
                        return -1;
-               sort_parent.elide = 1;
+
+               /*
+                * Only show the parent fields if we explicitly
+                * sort that way. If we only use parent machinery
+                * for filtering, we don't want it.
+                */
+               if (!strstr(sort_order, "parent"))
+                       sort_parent.elide = 1;
        } else
                symbol_conf.exclude_other = false;
 
index 22747de7234b548cd1fadac1c53d9884849df935..09024ec2ab2e97a478271c67dc29f40c5c242211 100644 (file)
@@ -13,6 +13,7 @@
 #include "util/util.h"
 #include "util/evlist.h"
 #include "util/evsel.h"
+#include <linux/bitmap.h>
 
 static char const              *script_name;
 static char const              *generate_script_lang;
@@ -21,6 +22,8 @@ static u64                    last_timestamp;
 static u64                     nr_unordered;
 extern const struct option     record_options[];
 static bool                    no_callchain;
+static const char              *cpu_list;
+static DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
 
 enum perf_output_field {
        PERF_OUTPUT_COMM            = 1U << 0,
@@ -30,7 +33,10 @@ enum perf_output_field {
        PERF_OUTPUT_CPU             = 1U << 4,
        PERF_OUTPUT_EVNAME          = 1U << 5,
        PERF_OUTPUT_TRACE           = 1U << 6,
-       PERF_OUTPUT_SYM             = 1U << 7,
+       PERF_OUTPUT_IP              = 1U << 7,
+       PERF_OUTPUT_SYM             = 1U << 8,
+       PERF_OUTPUT_DSO             = 1U << 9,
+       PERF_OUTPUT_ADDR            = 1U << 10,
 };
 
 struct output_option {
@@ -44,7 +50,10 @@ struct output_option {
        {.str = "cpu",   .field = PERF_OUTPUT_CPU},
        {.str = "event", .field = PERF_OUTPUT_EVNAME},
        {.str = "trace", .field = PERF_OUTPUT_TRACE},
+       {.str = "ip",    .field = PERF_OUTPUT_IP},
        {.str = "sym",   .field = PERF_OUTPUT_SYM},
+       {.str = "dso",   .field = PERF_OUTPUT_DSO},
+       {.str = "addr",  .field = PERF_OUTPUT_ADDR},
 };
 
 /* default set to maintain compatibility with current format */
@@ -60,7 +69,8 @@ static struct {
 
                .fields = PERF_OUTPUT_COMM | PERF_OUTPUT_TID |
                              PERF_OUTPUT_CPU | PERF_OUTPUT_TIME |
-                             PERF_OUTPUT_EVNAME | PERF_OUTPUT_SYM,
+                             PERF_OUTPUT_EVNAME | PERF_OUTPUT_IP |
+                                 PERF_OUTPUT_SYM | PERF_OUTPUT_DSO,
 
                .invalid_fields = PERF_OUTPUT_TRACE,
        },
@@ -70,7 +80,8 @@ static struct {
 
                .fields = PERF_OUTPUT_COMM | PERF_OUTPUT_TID |
                              PERF_OUTPUT_CPU | PERF_OUTPUT_TIME |
-                             PERF_OUTPUT_EVNAME | PERF_OUTPUT_SYM,
+                             PERF_OUTPUT_EVNAME | PERF_OUTPUT_IP |
+                                 PERF_OUTPUT_SYM | PERF_OUTPUT_DSO,
 
                .invalid_fields = PERF_OUTPUT_TRACE,
        },
@@ -88,7 +99,8 @@ static struct {
 
                .fields = PERF_OUTPUT_COMM | PERF_OUTPUT_TID |
                              PERF_OUTPUT_CPU | PERF_OUTPUT_TIME |
-                             PERF_OUTPUT_EVNAME | PERF_OUTPUT_SYM,
+                             PERF_OUTPUT_EVNAME | PERF_OUTPUT_IP |
+                                 PERF_OUTPUT_SYM | PERF_OUTPUT_DSO,
 
                .invalid_fields = PERF_OUTPUT_TRACE,
        },
@@ -157,9 +169,9 @@ static int perf_evsel__check_attr(struct perf_evsel *evsel,
                !perf_session__has_traces(session, "record -R"))
                return -EINVAL;
 
-       if (PRINT_FIELD(SYM)) {
+       if (PRINT_FIELD(IP)) {
                if (perf_event_attr__check_stype(attr, PERF_SAMPLE_IP, "IP",
-                                          PERF_OUTPUT_SYM))
+                                          PERF_OUTPUT_IP))
                        return -EINVAL;
 
                if (!no_callchain &&
@@ -167,6 +179,24 @@ static int perf_evsel__check_attr(struct perf_evsel *evsel,
                        symbol_conf.use_callchain = false;
        }
 
+       if (PRINT_FIELD(ADDR) &&
+               perf_event_attr__check_stype(attr, PERF_SAMPLE_ADDR, "ADDR",
+                                      PERF_OUTPUT_ADDR))
+               return -EINVAL;
+
+       if (PRINT_FIELD(SYM) && !PRINT_FIELD(IP) && !PRINT_FIELD(ADDR)) {
+               pr_err("Display of symbols requested but neither sample IP nor "
+                          "sample address\nis selected. Hence, no addresses to convert "
+                      "to symbols.\n");
+               return -EINVAL;
+       }
+       if (PRINT_FIELD(DSO) && !PRINT_FIELD(IP) && !PRINT_FIELD(ADDR)) {
+               pr_err("Display of DSO requested but neither sample IP nor "
+                          "sample address\nis selected. Hence, no addresses to convert "
+                      "to DSO.\n");
+               return -EINVAL;
+       }
+
        if ((PRINT_FIELD(PID) || PRINT_FIELD(TID)) &&
                perf_event_attr__check_stype(attr, PERF_SAMPLE_TID, "TID",
                                       PERF_OUTPUT_TID|PERF_OUTPUT_PID))
@@ -230,7 +260,7 @@ static void print_sample_start(struct perf_sample *sample,
        if (PRINT_FIELD(COMM)) {
                if (latency_format)
                        printf("%8.8s ", thread->comm);
-               else if (PRINT_FIELD(SYM) && symbol_conf.use_callchain)
+               else if (PRINT_FIELD(IP) && symbol_conf.use_callchain)
                        printf("%s ", thread->comm);
                else
                        printf("%16s ", thread->comm);
@@ -271,6 +301,63 @@ static void print_sample_start(struct perf_sample *sample,
        }
 }
 
+static bool sample_addr_correlates_sym(struct perf_event_attr *attr)
+{
+       if ((attr->type == PERF_TYPE_SOFTWARE) &&
+           ((attr->config == PERF_COUNT_SW_PAGE_FAULTS) ||
+            (attr->config == PERF_COUNT_SW_PAGE_FAULTS_MIN) ||
+            (attr->config == PERF_COUNT_SW_PAGE_FAULTS_MAJ)))
+               return true;
+
+       return false;
+}
+
+static void print_sample_addr(union perf_event *event,
+                         struct perf_sample *sample,
+                         struct perf_session *session,
+                         struct thread *thread,
+                         struct perf_event_attr *attr)
+{
+       struct addr_location al;
+       u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
+       const char *symname, *dsoname;
+
+       printf("%16" PRIx64, sample->addr);
+
+       if (!sample_addr_correlates_sym(attr))
+               return;
+
+       thread__find_addr_map(thread, session, cpumode, MAP__FUNCTION,
+                             event->ip.pid, sample->addr, &al);
+       if (!al.map)
+               thread__find_addr_map(thread, session, cpumode, MAP__VARIABLE,
+                                     event->ip.pid, sample->addr, &al);
+
+       al.cpu = sample->cpu;
+       al.sym = NULL;
+
+       if (al.map)
+               al.sym = map__find_symbol(al.map, al.addr, NULL);
+
+       if (PRINT_FIELD(SYM)) {
+               if (al.sym && al.sym->name)
+                       symname = al.sym->name;
+               else
+                       symname = "";
+
+               printf(" %16s", symname);
+       }
+
+       if (PRINT_FIELD(DSO)) {
+               if (al.map && al.map->dso && al.map->dso->name)
+                       dsoname = al.map->dso->name;
+               else
+                       dsoname = "";
+
+               printf(" (%s)", dsoname);
+       }
+}
+
 static void process_event(union perf_event *event __unused,
                          struct perf_sample *sample,
                          struct perf_evsel *evsel,
@@ -288,12 +375,16 @@ static void process_event(union perf_event *event __unused,
                print_trace_event(sample->cpu, sample->raw_data,
                                  sample->raw_size);
 
-       if (PRINT_FIELD(SYM)) {
+       if (PRINT_FIELD(ADDR))
+               print_sample_addr(event, sample, session, thread, attr);
+
+       if (PRINT_FIELD(IP)) {
                if (!symbol_conf.use_callchain)
                        printf(" ");
                else
                        printf("\n");
-               perf_session__print_symbols(event, sample, session);
+               perf_session__print_ip(event, sample, session,
+                                             PRINT_FIELD(SYM), PRINT_FIELD(DSO));
        }
 
        printf("\n");
@@ -365,6 +456,10 @@ static int process_sample_event(union perf_event *event,
                last_timestamp = sample->time;
                return 0;
        }
+
+       if (cpu_list && !test_bit(sample->cpu, cpu_bitmap))
+               return 0;
+
        scripting_ops->process_event(event, sample, evsel, session, thread);
 
        session->hists.stats.total_period += sample->period;
@@ -985,8 +1080,9 @@ static const struct option options[] = {
        OPT_STRING(0, "symfs", &symbol_conf.symfs, "directory",
                    "Look for files with symbols relative to this directory"),
        OPT_CALLBACK('f', "fields", NULL, "str",
-                    "comma separated output fields prepend with 'type:'. Valid types: hw,sw,trace,raw. Fields: comm,tid,pid,time,cpu,event,trace,sym",
+                    "comma separated output fields prepend with 'type:'. Valid types: hw,sw,trace,raw. Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso,addr",
                     parse_output_fields),
+       OPT_STRING('c', "cpu", &cpu_list, "cpu", "list of cpus to profile"),
 
        OPT_END()
 };
@@ -1167,6 +1263,11 @@ int cmd_script(int argc, const char **argv, const char *prefix __used)
        if (session == NULL)
                return -ENOMEM;
 
+       if (cpu_list) {
+               if (perf_session__cpu_bitmap(session, cpu_list, cpu_bitmap))
+                       return -1;
+       }
+
        if (!no_callchain)
                symbol_conf.use_callchain = true;
        else
index a9f06715e44d021567131317310bc6096547e666..1ad04ce29c3479828c8a91b2a13efbaaf871a3f2 100644 (file)
@@ -61,6 +61,8 @@
 #include <locale.h>
 
 #define DEFAULT_SEPARATOR      " "
+#define CNTR_NOT_SUPPORTED     "<not supported>"
+#define CNTR_NOT_COUNTED       "<not counted>"
 
 static struct perf_event_attr default_attrs[] = {
 
@@ -448,6 +450,7 @@ static int run_perf_stat(int argc __used, const char **argv)
                                if (verbose)
                                        ui__warning("%s event is not supported by the kernel.\n",
                                                    event_name(counter));
+                               counter->supported = false;
                                continue;
                        }
 
@@ -466,6 +469,7 @@ static int run_perf_stat(int argc __used, const char **argv)
                        die("Not all events could be opened.\n");
                        return -1;
                }
+               counter->supported = true;
        }
 
        if (perf_evlist__set_filters(evsel_list)) {
@@ -513,7 +517,10 @@ static void print_noise_pct(double total, double avg)
        if (avg)
                pct = 100.0*total/avg;
 
-       fprintf(stderr, "  ( +-%6.2f%% )", pct);
+       if (csv_output)
+               fprintf(stderr, "%s%.2f%%", csv_sep, pct);
+       else
+               fprintf(stderr, "  ( +-%6.2f%% )", pct);
 }
 
 static void print_noise(struct perf_evsel *evsel, double avg)
@@ -861,7 +868,7 @@ static void print_counter_aggr(struct perf_evsel *counter)
        if (scaled == -1) {
                fprintf(stderr, "%*s%s%*s",
                        csv_output ? 0 : 18,
-                       "<not counted>",
+                       counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED,
                        csv_sep,
                        csv_output ? 0 : -24,
                        event_name(counter));
@@ -878,13 +885,13 @@ static void print_counter_aggr(struct perf_evsel *counter)
        else
                abs_printout(-1, counter, avg);
 
+       print_noise(counter, avg);
+
        if (csv_output) {
                fputc('\n', stderr);
                return;
        }
 
-       print_noise(counter, avg);
-
        if (scaled) {
                double avg_enabled, avg_running;
 
@@ -914,7 +921,8 @@ static void print_counter(struct perf_evsel *counter)
                                csv_output ? 0 : -4,
                                evsel_list->cpus->map[cpu], csv_sep,
                                csv_output ? 0 : 18,
-                               "<not counted>", csv_sep,
+                               counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED,
+                               csv_sep,
                                csv_output ? 0 : -24,
                                event_name(counter));
 
@@ -1024,7 +1032,7 @@ static int stat__set_big_num(const struct option *opt __used,
 static const struct option options[] = {
        OPT_CALLBACK('e', "event", &evsel_list, "event",
                     "event selector. use 'perf list' to list available events",
-                    parse_events),
+                    parse_events_option),
        OPT_CALLBACK(0, "filter", &evsel_list, "filter",
                     "event filter", parse_filter),
        OPT_BOOLEAN('i', "no-inherit", &no_inherit,
index 2da9162262b05a189fb969440f8816d90e55b99a..55f4c76f282114e3edb7bc5ce6852000f73c329b 100644 (file)
@@ -12,6 +12,7 @@
 #include "util/parse-events.h"
 #include "util/symbol.h"
 #include "util/thread_map.h"
+#include "../../include/linux/hw_breakpoint.h"
 
 static long page_size;
 
@@ -245,8 +246,8 @@ static int trace_event__id(const char *evname)
        int err = -1, fd;
 
        if (asprintf(&filename,
-                    "/sys/kernel/debug/tracing/events/syscalls/%s/id",
-                    evname) < 0)
+                    "%s/syscalls/%s/id",
+                    debugfs_path, evname) < 0)
                return -1;
 
        fd = open(filename, O_RDONLY);
@@ -600,6 +601,246 @@ out_free_threads:
 #undef nsyscalls
 }
 
+#define TEST_ASSERT_VAL(text, cond) \
+do { \
+       if (!cond) { \
+               pr_debug("FAILED %s:%d %s\n", __FILE__, __LINE__, text); \
+               return -1; \
+       } \
+} while (0)
+
+static int test__checkevent_tracepoint(struct perf_evlist *evlist)
+{
+       struct perf_evsel *evsel = list_entry(evlist->entries.next,
+                                             struct perf_evsel, node);
+
+       TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries);
+       TEST_ASSERT_VAL("wrong type", PERF_TYPE_TRACEPOINT == evsel->attr.type);
+       TEST_ASSERT_VAL("wrong sample_type",
+               (PERF_SAMPLE_RAW | PERF_SAMPLE_TIME | PERF_SAMPLE_CPU) ==
+               evsel->attr.sample_type);
+       TEST_ASSERT_VAL("wrong sample_period", 1 == evsel->attr.sample_period);
+       return 0;
+}
+
+static int test__checkevent_tracepoint_multi(struct perf_evlist *evlist)
+{
+       struct perf_evsel *evsel;
+
+       TEST_ASSERT_VAL("wrong number of entries", evlist->nr_entries > 1);
+
+       list_for_each_entry(evsel, &evlist->entries, node) {
+               TEST_ASSERT_VAL("wrong type",
+                       PERF_TYPE_TRACEPOINT == evsel->attr.type);
+               TEST_ASSERT_VAL("wrong sample_type",
+                       (PERF_SAMPLE_RAW | PERF_SAMPLE_TIME | PERF_SAMPLE_CPU)
+                       == evsel->attr.sample_type);
+               TEST_ASSERT_VAL("wrong sample_period",
+                       1 == evsel->attr.sample_period);
+       }
+       return 0;
+}
+
+static int test__checkevent_raw(struct perf_evlist *evlist)
+{
+       struct perf_evsel *evsel = list_entry(evlist->entries.next,
+                                             struct perf_evsel, node);
+
+       TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries);
+       TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->attr.type);
+       TEST_ASSERT_VAL("wrong config", 1 == evsel->attr.config);
+       return 0;
+}
+
+static int test__checkevent_numeric(struct perf_evlist *evlist)
+{
+       struct perf_evsel *evsel = list_entry(evlist->entries.next,
+                                             struct perf_evsel, node);
+
+       TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries);
+       TEST_ASSERT_VAL("wrong type", 1 == evsel->attr.type);
+       TEST_ASSERT_VAL("wrong config", 1 == evsel->attr.config);
+       return 0;
+}
+
+static int test__checkevent_symbolic_name(struct perf_evlist *evlist)
+{
+       struct perf_evsel *evsel = list_entry(evlist->entries.next,
+                                             struct perf_evsel, node);
+
+       TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries);
+       TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type);
+       TEST_ASSERT_VAL("wrong config",
+                       PERF_COUNT_HW_INSTRUCTIONS == evsel->attr.config);
+       return 0;
+}
+
+static int test__checkevent_symbolic_alias(struct perf_evlist *evlist)
+{
+       struct perf_evsel *evsel = list_entry(evlist->entries.next,
+                                             struct perf_evsel, node);
+
+       TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries);
+       TEST_ASSERT_VAL("wrong type", PERF_TYPE_SOFTWARE == evsel->attr.type);
+       TEST_ASSERT_VAL("wrong config",
+                       PERF_COUNT_SW_PAGE_FAULTS == evsel->attr.config);
+       return 0;
+}
+
+static int test__checkevent_genhw(struct perf_evlist *evlist)
+{
+       struct perf_evsel *evsel = list_entry(evlist->entries.next,
+                                             struct perf_evsel, node);
+
+       TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries);
+       TEST_ASSERT_VAL("wrong type", PERF_TYPE_HW_CACHE == evsel->attr.type);
+       TEST_ASSERT_VAL("wrong config", (1 << 16) == evsel->attr.config);
+       return 0;
+}
+
+static int test__checkevent_breakpoint(struct perf_evlist *evlist)
+{
+       struct perf_evsel *evsel = list_entry(evlist->entries.next,
+                                             struct perf_evsel, node);
+
+       TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries);
+       TEST_ASSERT_VAL("wrong type", PERF_TYPE_BREAKPOINT == evsel->attr.type);
+       TEST_ASSERT_VAL("wrong config", 0 == evsel->attr.config);
+       TEST_ASSERT_VAL("wrong bp_type", (HW_BREAKPOINT_R | HW_BREAKPOINT_W) ==
+                                        evsel->attr.bp_type);
+       TEST_ASSERT_VAL("wrong bp_len", HW_BREAKPOINT_LEN_4 ==
+                                       evsel->attr.bp_len);
+       return 0;
+}
+
+static int test__checkevent_breakpoint_x(struct perf_evlist *evlist)
+{
+       struct perf_evsel *evsel = list_entry(evlist->entries.next,
+                                             struct perf_evsel, node);
+
+       TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries);
+       TEST_ASSERT_VAL("wrong type", PERF_TYPE_BREAKPOINT == evsel->attr.type);
+       TEST_ASSERT_VAL("wrong config", 0 == evsel->attr.config);
+       TEST_ASSERT_VAL("wrong bp_type",
+                       HW_BREAKPOINT_X == evsel->attr.bp_type);
+       TEST_ASSERT_VAL("wrong bp_len", sizeof(long) == evsel->attr.bp_len);
+       return 0;
+}
+
+static int test__checkevent_breakpoint_r(struct perf_evlist *evlist)
+{
+       struct perf_evsel *evsel = list_entry(evlist->entries.next,
+                                             struct perf_evsel, node);
+
+       TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries);
+       TEST_ASSERT_VAL("wrong type",
+                       PERF_TYPE_BREAKPOINT == evsel->attr.type);
+       TEST_ASSERT_VAL("wrong config", 0 == evsel->attr.config);
+       TEST_ASSERT_VAL("wrong bp_type",
+                       HW_BREAKPOINT_R == evsel->attr.bp_type);
+       TEST_ASSERT_VAL("wrong bp_len",
+                       HW_BREAKPOINT_LEN_4 == evsel->attr.bp_len);
+       return 0;
+}
+
+static int test__checkevent_breakpoint_w(struct perf_evlist *evlist)
+{
+       struct perf_evsel *evsel = list_entry(evlist->entries.next,
+                                             struct perf_evsel, node);
+
+       TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries);
+       TEST_ASSERT_VAL("wrong type",
+                       PERF_TYPE_BREAKPOINT == evsel->attr.type);
+       TEST_ASSERT_VAL("wrong config", 0 == evsel->attr.config);
+       TEST_ASSERT_VAL("wrong bp_type",
+                       HW_BREAKPOINT_W == evsel->attr.bp_type);
+       TEST_ASSERT_VAL("wrong bp_len",
+                       HW_BREAKPOINT_LEN_4 == evsel->attr.bp_len);
+       return 0;
+}
+
+static struct test__event_st {
+       const char *name;
+       __u32 type;
+       int (*check)(struct perf_evlist *evlist);
+} test__events[] = {
+       {
+               .name  = "syscalls:sys_enter_open",
+               .check = test__checkevent_tracepoint,
+       },
+       {
+               .name  = "syscalls:*",
+               .check = test__checkevent_tracepoint_multi,
+       },
+       {
+               .name  = "r1",
+               .check = test__checkevent_raw,
+       },
+       {
+               .name  = "1:1",
+               .check = test__checkevent_numeric,
+       },
+       {
+               .name  = "instructions",
+               .check = test__checkevent_symbolic_name,
+       },
+       {
+               .name  = "faults",
+               .check = test__checkevent_symbolic_alias,
+       },
+       {
+               .name  = "L1-dcache-load-miss",
+               .check = test__checkevent_genhw,
+       },
+       {
+               .name  = "mem:0",
+               .check = test__checkevent_breakpoint,
+       },
+       {
+               .name  = "mem:0:x",
+               .check = test__checkevent_breakpoint_x,
+       },
+       {
+               .name  = "mem:0:r",
+               .check = test__checkevent_breakpoint_r,
+       },
+       {
+               .name  = "mem:0:w",
+               .check = test__checkevent_breakpoint_w,
+       },
+};
+
+#define TEST__EVENTS_CNT (sizeof(test__events) / sizeof(struct test__event_st))
+
+static int test__parse_events(void)
+{
+       struct perf_evlist *evlist;
+       u_int i;
+       int ret = 0;
+
+       for (i = 0; i < TEST__EVENTS_CNT; i++) {
+               struct test__event_st *e = &test__events[i];
+
+               evlist = perf_evlist__new(NULL, NULL);
+               if (evlist == NULL)
+                       break;
+
+               ret = parse_events(evlist, e->name, 0);
+               if (ret) {
+                       pr_debug("failed to parse event '%s', err %d\n",
+                                e->name, ret);
+                       break;
+               }
+
+               ret = e->check(evlist);
+               if (ret)
+                       break;
+
+               perf_evlist__delete(evlist);
+       }
+
+       return ret;
+}
 static struct test {
        const char *desc;
        int (*func)(void);
@@ -620,6 +861,10 @@ static struct test {
                .desc = "read samples using the mmap interface",
                .func = test__basic_mmap,
        },
+       {
+               .desc = "parse events tests",
+               .func = test__parse_events,
+       },
        {
                .func = NULL,
        },
index f2f3f4937aa245142eda2082274b7b200e41db4e..a43433f083001e373e1f51ee22ed335dd330b368 100644 (file)
@@ -990,7 +990,7 @@ static const char * const top_usage[] = {
 static const struct option options[] = {
        OPT_CALLBACK('e', "event", &top.evlist, "event",
                     "event selector. use 'perf list' to list available events",
-                    parse_events),
+                    parse_events_option),
        OPT_INTEGER('c', "count", &default_interval,
                    "event period to sample"),
        OPT_INTEGER('p', "pid", &top.target_pid,
index 1a79df9f739f8425ce2991dcc0cc7a2eeac982dc..9b4ff16cac96dd924ec37c7fb4ab3226ded1f377 100644 (file)
@@ -14,6 +14,11 @@ enum chain_mode {
        CHAIN_GRAPH_REL
 };
 
+enum chain_order {
+       ORDER_CALLER,
+       ORDER_CALLEE
+};
+
 struct callchain_node {
        struct callchain_node   *parent;
        struct list_head        siblings;
@@ -41,6 +46,7 @@ struct callchain_param {
        u32                     print_limit;
        double                  min_percent;
        sort_chain_func_t       sort;
+       enum chain_order        order;
 };
 
 struct callchain_list {
diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c
new file mode 100644 (file)
index 0000000..fddf40f
--- /dev/null
@@ -0,0 +1,663 @@
+/*
+ * dwarf-aux.c : libdw auxiliary interfaces
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ */
+
+#include <stdbool.h>
+#include "util.h"
+#include "debug.h"
+#include "dwarf-aux.h"
+
+/**
+ * cu_find_realpath - Find the realpath of the target file
+ * @cu_die: A DIE(dwarf information entry) of CU(compilation Unit)
+ * @fname:  The tail filename of the target file
+ *
+ * Find the real(long) path of @fname in @cu_die.
+ */
+const char *cu_find_realpath(Dwarf_Die *cu_die, const char *fname)
+{
+       Dwarf_Files *files;
+       size_t nfiles, i;
+       const char *src = NULL;
+       int ret;
+
+       if (!fname)
+               return NULL;
+
+       ret = dwarf_getsrcfiles(cu_die, &files, &nfiles);
+       if (ret != 0)
+               return NULL;
+
+       for (i = 0; i < nfiles; i++) {
+               src = dwarf_filesrc(files, i, NULL, NULL);
+               if (strtailcmp(src, fname) == 0)
+                       break;
+       }
+       if (i == nfiles)
+               return NULL;
+       return src;
+}
+
+/**
+ * cu_get_comp_dir - Get the path of compilation directory
+ * @cu_die: a CU DIE
+ *
+ * Get the path of compilation directory of given @cu_die.
+ * Since this depends on DW_AT_comp_dir, older gcc will not
+ * embedded it. In that case, this returns NULL.
+ */
+const char *cu_get_comp_dir(Dwarf_Die *cu_die)
+{
+       Dwarf_Attribute attr;
+       if (dwarf_attr(cu_die, DW_AT_comp_dir, &attr) == NULL)
+               return NULL;
+       return dwarf_formstring(&attr);
+}
+
+/**
+ * cu_find_lineinfo - Get a line number and file name for given address
+ * @cu_die: a CU DIE
+ * @addr: An address
+ * @fname: a pointer which returns the file name string
+ * @lineno: a pointer which returns the line number
+ *
+ * Find a line number and file name for @addr in @cu_die.
+ */
+int cu_find_lineinfo(Dwarf_Die *cu_die, unsigned long addr,
+                   const char **fname, int *lineno)
+{
+       Dwarf_Line *line;
+       Dwarf_Addr laddr;
+
+       line = dwarf_getsrc_die(cu_die, (Dwarf_Addr)addr);
+       if (line && dwarf_lineaddr(line, &laddr) == 0 &&
+           addr == (unsigned long)laddr && dwarf_lineno(line, lineno) == 0) {
+               *fname = dwarf_linesrc(line, NULL, NULL);
+               if (!*fname)
+                       /* line number is useless without filename */
+                       *lineno = 0;
+       }
+
+       return *lineno ?: -ENOENT;
+}
+
+/**
+ * die_compare_name - Compare diename and tname
+ * @dw_die: a DIE
+ * @tname: a string of target name
+ *
+ * Compare the name of @dw_die and @tname. Return false if @dw_die has no name.
+ */
+bool die_compare_name(Dwarf_Die *dw_die, const char *tname)
+{
+       const char *name;
+       name = dwarf_diename(dw_die);
+       return name ? (strcmp(tname, name) == 0) : false;
+}
+
+/**
+ * die_get_call_lineno - Get callsite line number of inline-function instance
+ * @in_die: a DIE of an inlined function instance
+ *
+ * Get call-site line number of @in_die. This means from where the inline
+ * function is called.
+ */
+int die_get_call_lineno(Dwarf_Die *in_die)
+{
+       Dwarf_Attribute attr;
+       Dwarf_Word ret;
+
+       if (!dwarf_attr(in_die, DW_AT_call_line, &attr))
+               return -ENOENT;
+
+       dwarf_formudata(&attr, &ret);
+       return (int)ret;
+}
+
+/**
+ * die_get_type - Get type DIE
+ * @vr_die: a DIE of a variable
+ * @die_mem: where to store a type DIE
+ *
+ * Get a DIE of the type of given variable (@vr_die), and store
+ * it to die_mem. Return NULL if fails to get a type DIE.
+ */
+Dwarf_Die *die_get_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem)
+{
+       Dwarf_Attribute attr;
+
+       if (dwarf_attr_integrate(vr_die, DW_AT_type, &attr) &&
+           dwarf_formref_die(&attr, die_mem))
+               return die_mem;
+       else
+               return NULL;
+}
+
+/* Get a type die, but skip qualifiers */
+static Dwarf_Die *__die_get_real_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem)
+{
+       int tag;
+
+       do {
+               vr_die = die_get_type(vr_die, die_mem);
+               if (!vr_die)
+                       break;
+               tag = dwarf_tag(vr_die);
+       } while (tag == DW_TAG_const_type ||
+                tag == DW_TAG_restrict_type ||
+                tag == DW_TAG_volatile_type ||
+                tag == DW_TAG_shared_type);
+
+       return vr_die;
+}
+
+/**
+ * die_get_real_type - Get a type die, but skip qualifiers and typedef
+ * @vr_die: a DIE of a variable
+ * @die_mem: where to store a type DIE
+ *
+ * Get a DIE of the type of given variable (@vr_die), and store
+ * it to die_mem. Return NULL if fails to get a type DIE.
+ * If the type is qualifiers (e.g. const) or typedef, this skips it
+ * and tries to find real type (structure or basic types, e.g. int).
+ */
+Dwarf_Die *die_get_real_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem)
+{
+       do {
+               vr_die = __die_get_real_type(vr_die, die_mem);
+       } while (vr_die && dwarf_tag(vr_die) == DW_TAG_typedef);
+
+       return vr_die;
+}
+
+/* Get attribute and translate it as a udata */
+static int die_get_attr_udata(Dwarf_Die *tp_die, unsigned int attr_name,
+                             Dwarf_Word *result)
+{
+       Dwarf_Attribute attr;
+
+       if (dwarf_attr(tp_die, attr_name, &attr) == NULL ||
+           dwarf_formudata(&attr, result) != 0)
+               return -ENOENT;
+
+       return 0;
+}
+
+/**
+ * die_is_signed_type - Check whether a type DIE is signed or not
+ * @tp_die: a DIE of a type
+ *
+ * Get the encoding of @tp_die and return true if the encoding
+ * is signed.
+ */
+bool die_is_signed_type(Dwarf_Die *tp_die)
+{
+       Dwarf_Word ret;
+
+       if (die_get_attr_udata(tp_die, DW_AT_encoding, &ret))
+               return false;
+
+       return (ret == DW_ATE_signed_char || ret == DW_ATE_signed ||
+               ret == DW_ATE_signed_fixed);
+}
+
+/**
+ * die_get_data_member_location - Get the data-member offset
+ * @mb_die: a DIE of a member of a data structure
+ * @offs: The offset of the member in the data structure
+ *
+ * Get the offset of @mb_die in the data structure including @mb_die, and
+ * stores result offset to @offs. If any error occurs this returns errno.
+ */
+int die_get_data_member_location(Dwarf_Die *mb_die, Dwarf_Word *offs)
+{
+       Dwarf_Attribute attr;
+       Dwarf_Op *expr;
+       size_t nexpr;
+       int ret;
+
+       if (dwarf_attr(mb_die, DW_AT_data_member_location, &attr) == NULL)
+               return -ENOENT;
+
+       if (dwarf_formudata(&attr, offs) != 0) {
+               /* DW_AT_data_member_location should be DW_OP_plus_uconst */
+               ret = dwarf_getlocation(&attr, &expr, &nexpr);
+               if (ret < 0 || nexpr == 0)
+                       return -ENOENT;
+
+               if (expr[0].atom != DW_OP_plus_uconst || nexpr != 1) {
+                       pr_debug("Unable to get offset:Unexpected OP %x (%zd)\n",
+                                expr[0].atom, nexpr);
+                       return -ENOTSUP;
+               }
+               *offs = (Dwarf_Word)expr[0].number;
+       }
+       return 0;
+}
+
+/**
+ * die_find_child - Generic DIE search function in DIE tree
+ * @rt_die: a root DIE
+ * @callback: a callback function
+ * @data: a user data passed to the callback function
+ * @die_mem: a buffer for result DIE
+ *
+ * Trace DIE tree from @rt_die and call @callback for each child DIE.
+ * If @callback returns DIE_FIND_CB_END, this stores the DIE into
+ * @die_mem and returns it. If @callback returns DIE_FIND_CB_CONTINUE,
+ * this continues to trace the tree. Optionally, @callback can return
+ * DIE_FIND_CB_CHILD and DIE_FIND_CB_SIBLING, those means trace only
+ * the children and trace only the siblings respectively.
+ * Returns NULL if @callback can't find any appropriate DIE.
+ */
+Dwarf_Die *die_find_child(Dwarf_Die *rt_die,
+                         int (*callback)(Dwarf_Die *, void *),
+                         void *data, Dwarf_Die *die_mem)
+{
+       Dwarf_Die child_die;
+       int ret;
+
+       ret = dwarf_child(rt_die, die_mem);
+       if (ret != 0)
+               return NULL;
+
+       do {
+               ret = callback(die_mem, data);
+               if (ret == DIE_FIND_CB_END)
+                       return die_mem;
+
+               if ((ret & DIE_FIND_CB_CHILD) &&
+                   die_find_child(die_mem, callback, data, &child_die)) {
+                       memcpy(die_mem, &child_die, sizeof(Dwarf_Die));
+                       return die_mem;
+               }
+       } while ((ret & DIE_FIND_CB_SIBLING) &&
+                dwarf_siblingof(die_mem, die_mem) == 0);
+
+       return NULL;
+}
+
+struct __addr_die_search_param {
+       Dwarf_Addr      addr;
+       Dwarf_Die       *die_mem;
+};
+
+/* die_find callback for non-inlined function search */
+static int __die_search_func_cb(Dwarf_Die *fn_die, void *data)
+{
+       struct __addr_die_search_param *ad = data;
+
+       if (dwarf_tag(fn_die) == DW_TAG_subprogram &&
+           dwarf_haspc(fn_die, ad->addr)) {
+               memcpy(ad->die_mem, fn_die, sizeof(Dwarf_Die));
+               return DWARF_CB_ABORT;
+       }
+       return DWARF_CB_OK;
+}
+
+/**
+ * die_find_realfunc - Search a non-inlined function at given address
+ * @cu_die: a CU DIE which including @addr
+ * @addr: target address
+ * @die_mem: a buffer for result DIE
+ *
+ * Search a non-inlined function DIE which includes @addr. Stores the
+ * DIE to @die_mem and returns it if found. Returns NULl if failed.
+ */
+Dwarf_Die *die_find_realfunc(Dwarf_Die *cu_die, Dwarf_Addr addr,
+                                   Dwarf_Die *die_mem)
+{
+       struct __addr_die_search_param ad;
+       ad.addr = addr;
+       ad.die_mem = die_mem;
+       /* dwarf_getscopes can't find subprogram. */
+       if (!dwarf_getfuncs(cu_die, __die_search_func_cb, &ad, 0))
+               return NULL;
+       else
+               return die_mem;
+}
+
+/* die_find callback for inline function search */
+static int __die_find_inline_cb(Dwarf_Die *die_mem, void *data)
+{
+       Dwarf_Addr *addr = data;
+
+       if (dwarf_tag(die_mem) == DW_TAG_inlined_subroutine &&
+           dwarf_haspc(die_mem, *addr))
+               return DIE_FIND_CB_END;
+
+       return DIE_FIND_CB_CONTINUE;
+}
+
+/**
+ * die_find_inlinefunc - Search an inlined function at given address
+ * @cu_die: a CU DIE which including @addr
+ * @addr: target address
+ * @die_mem: a buffer for result DIE
+ *
+ * Search an inlined function DIE which includes @addr. Stores the
+ * DIE to @die_mem and returns it if found. Returns NULl if failed.
+ * If several inlined functions are expanded recursively, this trace
+ * it and returns deepest one.
+ */
+Dwarf_Die *die_find_inlinefunc(Dwarf_Die *sp_die, Dwarf_Addr addr,
+                              Dwarf_Die *die_mem)
+{
+       Dwarf_Die tmp_die;
+
+       sp_die = die_find_child(sp_die, __die_find_inline_cb, &addr, &tmp_die);
+       if (!sp_die)
+               return NULL;
+
+       /* Inlined function could be recursive. Trace it until fail */
+       while (sp_die) {
+               memcpy(die_mem, sp_die, sizeof(Dwarf_Die));
+               sp_die = die_find_child(sp_die, __die_find_inline_cb, &addr,
+                                       &tmp_die);
+       }
+
+       return die_mem;
+}
+
+/* Line walker internal parameters */
+struct __line_walk_param {
+       const char *fname;
+       line_walk_callback_t callback;
+       void *data;
+       int retval;
+};
+
+static int __die_walk_funclines_cb(Dwarf_Die *in_die, void *data)
+{
+       struct __line_walk_param *lw = data;
+       Dwarf_Addr addr;
+       int lineno;
+
+       if (dwarf_tag(in_die) == DW_TAG_inlined_subroutine) {
+               lineno = die_get_call_lineno(in_die);
+               if (lineno > 0 && dwarf_entrypc(in_die, &addr) == 0) {
+                       lw->retval = lw->callback(lw->fname, lineno, addr,
+                                                 lw->data);
+                       if (lw->retval != 0)
+                               return DIE_FIND_CB_END;
+               }
+       }
+       return DIE_FIND_CB_SIBLING;
+}
+
+/* Walk on lines of blocks included in given DIE */
+static int __die_walk_funclines(Dwarf_Die *sp_die,
+                               line_walk_callback_t callback, void *data)
+{
+       struct __line_walk_param lw = {
+               .callback = callback,
+               .data = data,
+               .retval = 0,
+       };
+       Dwarf_Die die_mem;
+       Dwarf_Addr addr;
+       int lineno;
+
+       /* Handle function declaration line */
+       lw.fname = dwarf_decl_file(sp_die);
+       if (lw.fname && dwarf_decl_line(sp_die, &lineno) == 0 &&
+           dwarf_entrypc(sp_die, &addr) == 0) {
+               lw.retval = callback(lw.fname, lineno, addr, data);
+               if (lw.retval != 0)
+                       goto done;
+       }
+       die_find_child(sp_die, __die_walk_funclines_cb, &lw, &die_mem);
+done:
+       return lw.retval;
+}
+
+static int __die_walk_culines_cb(Dwarf_Die *sp_die, void *data)
+{
+       struct __line_walk_param *lw = data;
+
+       lw->retval = __die_walk_funclines(sp_die, lw->callback, lw->data);
+       if (lw->retval != 0)
+               return DWARF_CB_ABORT;
+
+       return DWARF_CB_OK;
+}
+
+/**
+ * die_walk_lines - Walk on lines inside given DIE
+ * @rt_die: a root DIE (CU or subprogram)
+ * @callback: callback routine
+ * @data: user data
+ *
+ * Walk on all lines inside given @rt_die and call @callback on each line.
+ * If the @rt_die is a function, walk only on the lines inside the function,
+ * otherwise @rt_die must be a CU DIE.
+ * Note that this walks not only dwarf line list, but also function entries
+ * and inline call-site.
+ */
+int die_walk_lines(Dwarf_Die *rt_die, line_walk_callback_t callback, void *data)
+{
+       Dwarf_Lines *lines;
+       Dwarf_Line *line;
+       Dwarf_Addr addr;
+       const char *fname;
+       int lineno, ret = 0;
+       Dwarf_Die die_mem, *cu_die;
+       size_t nlines, i;
+
+       /* Get the CU die */
+       if (dwarf_tag(rt_die) == DW_TAG_subprogram)
+               cu_die = dwarf_diecu(rt_die, &die_mem, NULL, NULL);
+       else
+               cu_die = rt_die;
+       if (!cu_die) {
+               pr_debug2("Failed to get CU from subprogram\n");
+               return -EINVAL;
+       }
+
+       /* Get lines list in the CU */
+       if (dwarf_getsrclines(cu_die, &lines, &nlines) != 0) {
+               pr_debug2("Failed to get source lines on this CU.\n");
+               return -ENOENT;
+       }
+       pr_debug2("Get %zd lines from this CU\n", nlines);
+
+       /* Walk on the lines on lines list */
+       for (i = 0; i < nlines; i++) {
+               line = dwarf_onesrcline(lines, i);
+               if (line == NULL ||
+                   dwarf_lineno(line, &lineno) != 0 ||
+                   dwarf_lineaddr(line, &addr) != 0) {
+                       pr_debug2("Failed to get line info. "
+                                 "Possible error in debuginfo.\n");
+                       continue;
+               }
+               /* Filter lines based on address */
+               if (rt_die != cu_die)
+                       /*
+                        * Address filtering
+                        * The line is included in given function, and
+                        * no inline block includes it.
+                        */
+                       if (!dwarf_haspc(rt_die, addr) ||
+                           die_find_inlinefunc(rt_die, addr, &die_mem))
+                               continue;
+               /* Get source line */
+               fname = dwarf_linesrc(line, NULL, NULL);
+
+               ret = callback(fname, lineno, addr, data);
+               if (ret != 0)
+                       return ret;
+       }
+
+       /*
+        * Dwarf lines doesn't include function declarations and inlined
+        * subroutines. We have to check functions list or given function.
+        */
+       if (rt_die != cu_die)
+               ret = __die_walk_funclines(rt_die, callback, data);
+       else {
+               struct __line_walk_param param = {
+                       .callback = callback,
+                       .data = data,
+                       .retval = 0,
+               };
+               dwarf_getfuncs(cu_die, __die_walk_culines_cb, &param, 0);
+               ret = param.retval;
+       }
+
+       return ret;
+}
+
+struct __find_variable_param {
+       const char *name;
+       Dwarf_Addr addr;
+};
+
+static int __die_find_variable_cb(Dwarf_Die *die_mem, void *data)
+{
+       struct __find_variable_param *fvp = data;
+       int tag;
+
+       tag = dwarf_tag(die_mem);
+       if ((tag == DW_TAG_formal_parameter ||
+            tag == DW_TAG_variable) &&
+           die_compare_name(die_mem, fvp->name))
+               return DIE_FIND_CB_END;
+
+       if (dwarf_haspc(die_mem, fvp->addr))
+               return DIE_FIND_CB_CONTINUE;
+       else
+               return DIE_FIND_CB_SIBLING;
+}
+
+/**
+ * die_find_variable_at - Find a given name variable at given address
+ * @sp_die: a function DIE
+ * @name: variable name
+ * @addr: address
+ * @die_mem: a buffer for result DIE
+ *
+ * Find a variable DIE called @name at @addr in @sp_die.
+ */
+Dwarf_Die *die_find_variable_at(Dwarf_Die *sp_die, const char *name,
+                               Dwarf_Addr addr, Dwarf_Die *die_mem)
+{
+       struct __find_variable_param fvp = { .name = name, .addr = addr};
+
+       return die_find_child(sp_die, __die_find_variable_cb, (void *)&fvp,
+                             die_mem);
+}
+
+static int __die_find_member_cb(Dwarf_Die *die_mem, void *data)
+{
+       const char *name = data;
+
+       if ((dwarf_tag(die_mem) == DW_TAG_member) &&
+           die_compare_name(die_mem, name))
+               return DIE_FIND_CB_END;
+
+       return DIE_FIND_CB_SIBLING;
+}
+
+/**
+ * die_find_member - Find a given name member in a data structure
+ * @st_die: a data structure type DIE
+ * @name: member name
+ * @die_mem: a buffer for result DIE
+ *
+ * Find a member DIE called @name in @st_die.
+ */
+Dwarf_Die *die_find_member(Dwarf_Die *st_die, const char *name,
+                          Dwarf_Die *die_mem)
+{
+       return die_find_child(st_die, __die_find_member_cb, (void *)name,
+                             die_mem);
+}
+
+/**
+ * die_get_typename - Get the name of given variable DIE
+ * @vr_die: a variable DIE
+ * @buf: a buffer for result type name
+ * @len: a max-length of @buf
+ *
+ * Get the name of @vr_die and stores it to @buf. Return the actual length
+ * of type name if succeeded. Return -E2BIG if @len is not enough long, and
+ * Return -ENOENT if failed to find type name.
+ * Note that the result will stores typedef name if possible, and stores
+ * "*(function_type)" if the type is a function pointer.
+ */
+int die_get_typename(Dwarf_Die *vr_die, char *buf, int len)
+{
+       Dwarf_Die type;
+       int tag, ret, ret2;
+       const char *tmp = "";
+
+       if (__die_get_real_type(vr_die, &type) == NULL)
+               return -ENOENT;
+
+       tag = dwarf_tag(&type);
+       if (tag == DW_TAG_array_type || tag == DW_TAG_pointer_type)
+               tmp = "*";
+       else if (tag == DW_TAG_subroutine_type) {
+               /* Function pointer */
+               ret = snprintf(buf, len, "(function_type)");
+               return (ret >= len) ? -E2BIG : ret;
+       } else {
+               if (!dwarf_diename(&type))
+                       return -ENOENT;
+               if (tag == DW_TAG_union_type)
+                       tmp = "union ";
+               else if (tag == DW_TAG_structure_type)
+                       tmp = "struct ";
+               /* Write a base name */
+               ret = snprintf(buf, len, "%s%s", tmp, dwarf_diename(&type));
+               return (ret >= len) ? -E2BIG : ret;
+       }
+       ret = die_get_typename(&type, buf, len);
+       if (ret > 0) {
+               ret2 = snprintf(buf + ret, len - ret, "%s", tmp);
+               ret = (ret2 >= len - ret) ? -E2BIG : ret2 + ret;
+       }
+       return ret;
+}
+
+/**
+ * die_get_varname - Get the name and type of given variable DIE
+ * @vr_die: a variable DIE
+ * @buf: a buffer for type and variable name
+ * @len: the max-length of @buf
+ *
+ * Get the name and type of @vr_die and stores it in @buf as "type\tname".
+ */
+int die_get_varname(Dwarf_Die *vr_die, char *buf, int len)
+{
+       int ret, ret2;
+
+       ret = die_get_typename(vr_die, buf, len);
+       if (ret < 0) {
+               pr_debug("Failed to get type, make it unknown.\n");
+               ret = snprintf(buf, len, "(unknown_type)");
+       }
+       if (ret > 0) {
+               ret2 = snprintf(buf + ret, len - ret, "\t%s",
+                               dwarf_diename(vr_die));
+               ret = (ret2 >= len - ret) ? -E2BIG : ret2 + ret;
+       }
+       return ret;
+}
+
diff --git a/tools/perf/util/dwarf-aux.h b/tools/perf/util/dwarf-aux.h
new file mode 100644 (file)
index 0000000..bc3b211
--- /dev/null
@@ -0,0 +1,100 @@
+#ifndef _DWARF_AUX_H
+#define _DWARF_AUX_H
+/*
+ * dwarf-aux.h : libdw auxiliary interfaces
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ */
+
+#include <dwarf.h>
+#include <elfutils/libdw.h>
+#include <elfutils/libdwfl.h>
+#include <elfutils/version.h>
+
+/* Find the realpath of the target file */
+extern const char *cu_find_realpath(Dwarf_Die *cu_die, const char *fname);
+
+/* Get DW_AT_comp_dir (should be NULL with older gcc) */
+extern const char *cu_get_comp_dir(Dwarf_Die *cu_die);
+
+/* Get a line number and file name for given address */
+extern int cu_find_lineinfo(Dwarf_Die *cudie, unsigned long addr,
+                           const char **fname, int *lineno);
+
+/* Compare diename and tname */
+extern bool die_compare_name(Dwarf_Die *dw_die, const char *tname);
+
+/* Get callsite line number of inline-function instance */
+extern int die_get_call_lineno(Dwarf_Die *in_die);
+
+/* Get type die */
+extern Dwarf_Die *die_get_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem);
+
+/* Get a type die, but skip qualifiers and typedef */
+extern Dwarf_Die *die_get_real_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem);
+
+/* Check whether the DIE is signed or not */
+extern bool die_is_signed_type(Dwarf_Die *tp_die);
+
+/* Get data_member_location offset */
+extern int die_get_data_member_location(Dwarf_Die *mb_die, Dwarf_Word *offs);
+
+/* Return values for die_find_child() callbacks */
+enum {
+       DIE_FIND_CB_END = 0,            /* End of Search */
+       DIE_FIND_CB_CHILD = 1,          /* Search only children */
+       DIE_FIND_CB_SIBLING = 2,        /* Search only siblings */
+       DIE_FIND_CB_CONTINUE = 3,       /* Search children and siblings */
+};
+
+/* Search child DIEs */
+extern Dwarf_Die *die_find_child(Dwarf_Die *rt_die,
+                                int (*callback)(Dwarf_Die *, void *),
+                                void *data, Dwarf_Die *die_mem);
+
+/* Search a non-inlined function including given address */
+extern Dwarf_Die *die_find_realfunc(Dwarf_Die *cu_die, Dwarf_Addr addr,
+                                   Dwarf_Die *die_mem);
+
+/* Search an inlined function including given address */
+extern Dwarf_Die *die_find_inlinefunc(Dwarf_Die *sp_die, Dwarf_Addr addr,
+                                     Dwarf_Die *die_mem);
+
+/* Walker on lines (Note: line number will not be sorted) */
+typedef int (* line_walk_callback_t) (const char *fname, int lineno,
+                                     Dwarf_Addr addr, void *data);
+
+/*
+ * Walk on lines inside given DIE. If the DIE is a subprogram, walk only on
+ * the lines inside the subprogram, otherwise the DIE must be a CU DIE.
+ */
+extern int die_walk_lines(Dwarf_Die *rt_die, line_walk_callback_t callback,
+                         void *data);
+
+/* Find a variable called 'name' at given address */
+extern Dwarf_Die *die_find_variable_at(Dwarf_Die *sp_die, const char *name,
+                                      Dwarf_Addr addr, Dwarf_Die *die_mem);
+
+/* Find a member called 'name' */
+extern Dwarf_Die *die_find_member(Dwarf_Die *st_die, const char *name,
+                                 Dwarf_Die *die_mem);
+
+/* Get the name of given variable DIE */
+extern int die_get_typename(Dwarf_Die *vr_die, char *buf, int len);
+
+/* Get the name and type of given variable DIE, stored as "type\tname" */
+extern int die_get_varname(Dwarf_Die *vr_die, char *buf, int len);
+#endif
index 0239eb87b2321ea9e4fe208605020e83b1d7c718..a03a36b7908a595e1ed91400bbab289cf3778e38 100644 (file)
@@ -377,6 +377,7 @@ int perf_event__parse_sample(const union perf_event *event, u64 type,
                array++;
        }
 
+       data->addr = 0;
        if (type & PERF_SAMPLE_ADDR) {
                data->addr = *array;
                array++;
index 7e9366e4490b532bce4facc7b92e38a83c9c68e4..e9a31554e2658cf1ac50a0d476f428c691ed5482 100644 (file)
@@ -61,6 +61,7 @@ struct perf_evsel {
                off_t           id_offset;
        };
        struct cgroup_sel       *cgrp;
+       bool                    supported;
 };
 
 struct cpu_map;
index afb0849fe530e5f4a06937457bb98644acf681df..cb2959a3fb43e12f6e7de76c6a7fc6a04221f8b7 100644 (file)
@@ -877,9 +877,12 @@ int perf_session__read_header(struct perf_session *session, int fd)
                struct perf_evsel *evsel;
                off_t tmp;
 
-               if (perf_header__getbuffer64(header, fd, &f_attr, sizeof(f_attr)))
+               if (readn(fd, &f_attr, sizeof(f_attr)) <= 0)
                        goto out_errno;
 
+               if (header->needs_swap)
+                       perf_event__attr_swap(&f_attr.attr);
+
                tmp = lseek(fd, 0, SEEK_CUR);
                evsel = perf_evsel__new(&f_attr.attr, i);
 
index 627a02e03c57ab381d97cf2be9756997851d752d..677e1da6bb3eae6d688cd7e1afc51070e55810ea 100644 (file)
@@ -14,7 +14,8 @@ enum hist_filter {
 
 struct callchain_param callchain_param = {
        .mode   = CHAIN_GRAPH_REL,
-       .min_percent = 0.5
+       .min_percent = 0.5,
+       .order  = ORDER_CALLEE
 };
 
 u16 hists__col_len(struct hists *self, enum hist_column col)
@@ -846,6 +847,9 @@ print_entries:
        for (nd = rb_first(&self->entries); nd; nd = rb_next(nd)) {
                struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
 
+               if (h->filtered)
+                       continue;
+
                if (show_displacement) {
                        if (h->pair != NULL)
                                displacement = ((long)h->pair->position -
index 41982c373faf2a075f9bbba9bedcd62cc402e0b8..4ea7e19f52515d61a1bd02ccac9e4942bad60dfa 100644 (file)
@@ -86,22 +86,24 @@ static const char *sw_event_names[PERF_COUNT_SW_MAX] = {
 
 #define MAX_ALIASES 8
 
-static const char *hw_cache[][MAX_ALIASES] = {
+static const char *hw_cache[PERF_COUNT_HW_CACHE_MAX][MAX_ALIASES] = {
  { "L1-dcache",        "l1-d",         "l1d",          "L1-data",              },
  { "L1-icache",        "l1-i",         "l1i",          "L1-instruction",       },
- { "LLC",      "L2"                                                    },
+ { "LLC",      "L2",                                                   },
  { "dTLB",     "d-tlb",        "Data-TLB",                             },
  { "iTLB",     "i-tlb",        "Instruction-TLB",                      },
  { "branch",   "branches",     "bpu",          "btb",          "bpc",  },
+ { "node",                                                             },
 };
 
-static const char *hw_cache_op[][MAX_ALIASES] = {
+static const char *hw_cache_op[PERF_COUNT_HW_CACHE_OP_MAX][MAX_ALIASES] = {
  { "load",     "loads",        "read",                                 },
  { "store",    "stores",       "write",                                },
  { "prefetch", "prefetches",   "speculative-read", "speculative-load", },
 };
 
-static const char *hw_cache_result[][MAX_ALIASES] = {
+static const char *hw_cache_result[PERF_COUNT_HW_CACHE_RESULT_MAX]
+                                 [MAX_ALIASES] = {
  { "refs",     "Reference",    "ops",          "access",               },
  { "misses",   "miss",                                                 },
 };
@@ -124,6 +126,7 @@ static unsigned long hw_cache_stat[C(MAX)] = {
  [C(DTLB)]     = (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH),
  [C(ITLB)]     = (CACHE_READ),
  [C(BPU)]      = (CACHE_READ),
+ [C(NODE)]     = (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH),
 };
 
 #define for_each_subsystem(sys_dir, sys_dirent, sys_next)             \
@@ -393,7 +396,7 @@ parse_generic_hw_event(const char **str, struct perf_event_attr *attr)
                                                PERF_COUNT_HW_CACHE_OP_MAX);
                        if (cache_op >= 0) {
                                if (!is_cache_op_valid(cache_type, cache_op))
-                                       return 0;
+                                       return EVT_FAILED;
                                continue;
                        }
                }
@@ -475,7 +478,7 @@ parse_single_tracepoint_event(char *sys_name,
 /* sys + ':' + event + ':' + flags*/
 #define MAX_EVOPT_LEN  (MAX_EVENT_LENGTH * 2 + 2 + 128)
 static enum event_result
-parse_multiple_tracepoint_event(const struct option *opt, char *sys_name,
+parse_multiple_tracepoint_event(struct perf_evlist *evlist, char *sys_name,
                                const char *evt_exp, char *flags)
 {
        char evt_path[MAXPATHLEN];
@@ -509,7 +512,7 @@ parse_multiple_tracepoint_event(const struct option *opt, char *sys_name,
                if (len < 0)
                        return EVT_FAILED;
 
-               if (parse_events(opt, event_opt, 0))
+               if (parse_events(evlist, event_opt, 0))
                        return EVT_FAILED;
        }
 
@@ -517,7 +520,7 @@ parse_multiple_tracepoint_event(const struct option *opt, char *sys_name,
 }
 
 static enum event_result
-parse_tracepoint_event(const struct option *opt, const char **strp,
+parse_tracepoint_event(struct perf_evlist *evlist, const char **strp,
                       struct perf_event_attr *attr)
 {
        const char *evt_name;
@@ -557,8 +560,8 @@ parse_tracepoint_event(const struct option *opt, const char **strp,
                return EVT_FAILED;
        if (strpbrk(evt_name, "*?")) {
                *strp += strlen(sys_name) + evt_length + 1; /* 1 == the ':' */
-               return parse_multiple_tracepoint_event(opt, sys_name, evt_name,
-                                                      flags);
+               return parse_multiple_tracepoint_event(evlist, sys_name,
+                                                      evt_name, flags);
        } else {
                return parse_single_tracepoint_event(sys_name, evt_name,
                                                     evt_length, attr, strp);
@@ -778,12 +781,12 @@ parse_event_modifier(const char **strp, struct perf_event_attr *attr)
  * Symbolic names are (almost) exactly matched.
  */
 static enum event_result
-parse_event_symbols(const struct option *opt, const char **str,
+parse_event_symbols(struct perf_evlist *evlist, const char **str,
                    struct perf_event_attr *attr)
 {
        enum event_result ret;
 
-       ret = parse_tracepoint_event(opt, str, attr);
+       ret = parse_tracepoint_event(evlist, str, attr);
        if (ret != EVT_FAILED)
                goto modifier;
 
@@ -822,9 +825,8 @@ modifier:
        return ret;
 }
 
-int parse_events(const struct option *opt, const char *str, int unset __used)
+int parse_events(struct perf_evlist *evlist , const char *str, int unset __used)
 {
-       struct perf_evlist *evlist = *(struct perf_evlist **)opt->value;
        struct perf_event_attr attr;
        enum event_result ret;
        const char *ostr;
@@ -832,7 +834,7 @@ int parse_events(const struct option *opt, const char *str, int unset __used)
        for (;;) {
                ostr = str;
                memset(&attr, 0, sizeof(attr));
-               ret = parse_event_symbols(opt, &str, &attr);
+               ret = parse_event_symbols(evlist, &str, &attr);
                if (ret == EVT_FAILED)
                        return -1;
 
@@ -863,6 +865,13 @@ int parse_events(const struct option *opt, const char *str, int unset __used)
        return 0;
 }
 
+int parse_events_option(const struct option *opt, const char *str,
+                       int unset __used)
+{
+       struct perf_evlist *evlist = *(struct perf_evlist **)opt->value;
+       return parse_events(evlist, str, unset);
+}
+
 int parse_filter(const struct option *opt, const char *str,
                 int unset __used)
 {
index 746d3fcbfc2a4ee1b6f3e031286ee5b03a30aa6f..2f8e375e038dd9adbe45368cbc79a1d22801826f 100644 (file)
@@ -8,6 +8,7 @@
 
 struct list_head;
 struct perf_evsel;
+struct perf_evlist;
 
 struct option;
 
@@ -24,7 +25,10 @@ const char *event_type(int type);
 const char *event_name(struct perf_evsel *event);
 extern const char *__event_name(int type, u64 config);
 
-extern int parse_events(const struct option *opt, const char *str, int unset);
+extern int parse_events_option(const struct option *opt, const char *str,
+                              int unset);
+extern int parse_events(struct perf_evlist *evlist, const char *str,
+                       int unset);
 extern int parse_filter(const struct option *opt, const char *str, int unset);
 
 #define EVENTS_HELP_MAX (128*1024)
index f0223166e76165cd1e25d86cade2d7bb8337c9c6..b82d54fa2c566d420a32e2e675c873a6ce7c7720 100644 (file)
@@ -117,6 +117,10 @@ static struct map *kernel_get_module_map(const char *module)
        struct rb_node *nd;
        struct map_groups *grp = &machine.kmaps;
 
+       /* A file path -- this is an offline module */
+       if (module && strchr(module, '/'))
+               return machine__new_module(&machine, 0, module);
+
        if (!module)
                module = "kernel";
 
@@ -170,16 +174,24 @@ const char *kernel_get_module_path(const char *module)
 }
 
 #ifdef DWARF_SUPPORT
-static int open_vmlinux(const char *module)
+/* Open new debuginfo of given module */
+static struct debuginfo *open_debuginfo(const char *module)
 {
-       const char *path = kernel_get_module_path(module);
-       if (!path) {
-               pr_err("Failed to find path of %s module.\n",
-                      module ?: "kernel");
-               return -ENOENT;
+       const char *path;
+
+       /* A file path -- this is an offline module */
+       if (module && strchr(module, '/'))
+               path = module;
+       else {
+               path = kernel_get_module_path(module);
+
+               if (!path) {
+                       pr_err("Failed to find path of %s module.\n",
+                              module ?: "kernel");
+                       return NULL;
+               }
        }
-       pr_debug("Try to open %s\n", path);
-       return open(path, O_RDONLY);
+       return debuginfo__new(path);
 }
 
 /*
@@ -193,13 +205,24 @@ static int kprobe_convert_to_perf_probe(struct probe_trace_point *tp,
        struct map *map;
        u64 addr;
        int ret = -ENOENT;
+       struct debuginfo *dinfo;
 
        sym = __find_kernel_function_by_name(tp->symbol, &map);
        if (sym) {
                addr = map->unmap_ip(map, sym->start + tp->offset);
                pr_debug("try to find %s+%ld@%" PRIx64 "\n", tp->symbol,
                         tp->offset, addr);
-               ret = find_perf_probe_point((unsigned long)addr, pp);
+
+               dinfo = debuginfo__new_online_kernel(addr);
+               if (dinfo) {
+                       ret = debuginfo__find_probe_point(dinfo,
+                                                (unsigned long)addr, pp);
+                       debuginfo__delete(dinfo);
+               } else {
+                       pr_debug("Failed to open debuginfo at 0x%" PRIx64 "\n",
+                                addr);
+                       ret = -ENOENT;
+               }
        }
        if (ret <= 0) {
                pr_debug("Failed to find corresponding probes from "
@@ -214,30 +237,70 @@ static int kprobe_convert_to_perf_probe(struct probe_trace_point *tp,
        return 0;
 }
 
+static int add_module_to_probe_trace_events(struct probe_trace_event *tevs,
+                                           int ntevs, const char *module)
+{
+       int i, ret = 0;
+       char *tmp;
+
+       if (!module)
+               return 0;
+
+       tmp = strrchr(module, '/');
+       if (tmp) {
+               /* This is a module path -- get the module name */
+               module = strdup(tmp + 1);
+               if (!module)
+                       return -ENOMEM;
+               tmp = strchr(module, '.');
+               if (tmp)
+                       *tmp = '\0';
+               tmp = (char *)module;   /* For free() */
+       }
+
+       for (i = 0; i < ntevs; i++) {
+               tevs[i].point.module = strdup(module);
+               if (!tevs[i].point.module) {
+                       ret = -ENOMEM;
+                       break;
+               }
+       }
+
+       if (tmp)
+               free(tmp);
+
+       return ret;
+}
+
 /* Try to find perf_probe_event with debuginfo */
 static int try_to_find_probe_trace_events(struct perf_probe_event *pev,
-                                          struct probe_trace_event **tevs,
-                                          int max_tevs, const char *module)
+                                         struct probe_trace_event **tevs,
+                                         int max_tevs, const char *module)
 {
        bool need_dwarf = perf_probe_event_need_dwarf(pev);
-       int fd, ntevs;
+       struct debuginfo *dinfo = open_debuginfo(module);
+       int ntevs, ret = 0;
 
-       fd = open_vmlinux(module);
-       if (fd < 0) {
+       if (!dinfo) {
                if (need_dwarf) {
                        pr_warning("Failed to open debuginfo file.\n");
-                       return fd;
+                       return -ENOENT;
                }
-               pr_debug("Could not open vmlinux. Try to use symbols.\n");
+               pr_debug("Could not open debuginfo. Try to use symbols.\n");
                return 0;
        }
 
-       /* Searching trace events corresponding to probe event */
-       ntevs = find_probe_trace_events(fd, pev, tevs, max_tevs);
+       /* Searching trace events corresponding to a probe event */
+       ntevs = debuginfo__find_trace_events(dinfo, pev, tevs, max_tevs);
+
+       debuginfo__delete(dinfo);
 
        if (ntevs > 0) {        /* Succeeded to find trace events */
                pr_debug("find %d probe_trace_events.\n", ntevs);
-               return ntevs;
+               if (module)
+                       ret = add_module_to_probe_trace_events(*tevs, ntevs,
+                                                              module);
+               return ret < 0 ? ret : ntevs;
        }
 
        if (ntevs == 0) {       /* No error but failed to find probe point. */
@@ -371,8 +434,9 @@ int show_line_range(struct line_range *lr, const char *module)
 {
        int l = 1;
        struct line_node *ln;
+       struct debuginfo *dinfo;
        FILE *fp;
-       int fd, ret;
+       int ret;
        char *tmp;
 
        /* Search a line range */
@@ -380,13 +444,14 @@ int show_line_range(struct line_range *lr, const char *module)
        if (ret < 0)
                return ret;
 
-       fd = open_vmlinux(module);
-       if (fd < 0) {
+       dinfo = open_debuginfo(module);
+       if (!dinfo) {
                pr_warning("Failed to open debuginfo file.\n");
-               return fd;
+               return -ENOENT;
        }
 
-       ret = find_line_range(fd, lr);
+       ret = debuginfo__find_line_range(dinfo, lr);
+       debuginfo__delete(dinfo);
        if (ret == 0) {
                pr_warning("Specified source line is not found.\n");
                return -ENOENT;
@@ -448,7 +513,8 @@ end:
        return ret;
 }
 
-static int show_available_vars_at(int fd, struct perf_probe_event *pev,
+static int show_available_vars_at(struct debuginfo *dinfo,
+                                 struct perf_probe_event *pev,
                                  int max_vls, struct strfilter *_filter,
                                  bool externs)
 {
@@ -463,7 +529,8 @@ static int show_available_vars_at(int fd, struct perf_probe_event *pev,
                return -EINVAL;
        pr_debug("Searching variables at %s\n", buf);
 
-       ret = find_available_vars_at(fd, pev, &vls, max_vls, externs);
+       ret = debuginfo__find_available_vars_at(dinfo, pev, &vls,
+                                               max_vls, externs);
        if (ret <= 0) {
                pr_err("Failed to find variables at %s (%d)\n", buf, ret);
                goto end;
@@ -504,24 +571,26 @@ int show_available_vars(struct perf_probe_event *pevs, int npevs,
                        int max_vls, const char *module,
                        struct strfilter *_filter, bool externs)
 {
-       int i, fd, ret = 0;
+       int i, ret = 0;
+       struct debuginfo *dinfo;
 
        ret = init_vmlinux();
        if (ret < 0)
                return ret;
 
+       dinfo = open_debuginfo(module);
+       if (!dinfo) {
+               pr_warning("Failed to open debuginfo file.\n");
+               return -ENOENT;
+       }
+
        setup_pager();
 
-       for (i = 0; i < npevs && ret >= 0; i++) {
-               fd = open_vmlinux(module);
-               if (fd < 0) {
-                       pr_warning("Failed to open debug information file.\n");
-                       ret = fd;
-                       break;
-               }
-               ret = show_available_vars_at(fd, &pevs[i], max_vls, _filter,
+       for (i = 0; i < npevs && ret >= 0; i++)
+               ret = show_available_vars_at(dinfo, &pevs[i], max_vls, _filter,
                                             externs);
-       }
+
+       debuginfo__delete(dinfo);
        return ret;
 }
 
@@ -990,7 +1059,7 @@ bool perf_probe_event_need_dwarf(struct perf_probe_event *pev)
 
 /* Parse probe_events event into struct probe_point */
 static int parse_probe_trace_command(const char *cmd,
-                                       struct probe_trace_event *tev)
+                                    struct probe_trace_event *tev)
 {
        struct probe_trace_point *tp = &tev->point;
        char pr;
@@ -1023,8 +1092,14 @@ static int parse_probe_trace_command(const char *cmd,
 
        tp->retprobe = (pr == 'r');
 
-       /* Scan function name and offset */
-       ret = sscanf(argv[1], "%a[^+]+%lu", (float *)(void *)&tp->symbol,
+       /* Scan module name(if there), function name and offset */
+       p = strchr(argv[1], ':');
+       if (p) {
+               tp->module = strndup(argv[1], p - argv[1]);
+               p++;
+       } else
+               p = argv[1];
+       ret = sscanf(p, "%a[^+]+%lu", (float *)(void *)&tp->symbol,
                     &tp->offset);
        if (ret == 1)
                tp->offset = 0;
@@ -1269,9 +1344,10 @@ char *synthesize_probe_trace_command(struct probe_trace_event *tev)
        if (buf == NULL)
                return NULL;
 
-       len = e_snprintf(buf, MAX_CMDLEN, "%c:%s/%s %s+%lu",
+       len = e_snprintf(buf, MAX_CMDLEN, "%c:%s/%s %s%s%s+%lu",
                         tp->retprobe ? 'r' : 'p',
                         tev->group, tev->event,
+                        tp->module ?: "", tp->module ? ":" : "",
                         tp->symbol, tp->offset);
        if (len <= 0)
                goto error;
@@ -1378,6 +1454,8 @@ static void clear_probe_trace_event(struct probe_trace_event *tev)
                free(tev->group);
        if (tev->point.symbol)
                free(tev->point.symbol);
+       if (tev->point.module)
+               free(tev->point.module);
        for (i = 0; i < tev->nargs; i++) {
                if (tev->args[i].name)
                        free(tev->args[i].name);
@@ -1729,7 +1807,7 @@ static int convert_to_probe_trace_events(struct perf_probe_event *pev,
        /* Convert perf_probe_event with debuginfo */
        ret = try_to_find_probe_trace_events(pev, tevs, max_tevs, module);
        if (ret != 0)
-               return ret;
+               return ret;     /* Found in debuginfo or got an error */
 
        /* Allocate trace event buffer */
        tev = *tevs = zalloc(sizeof(struct probe_trace_event));
@@ -1742,6 +1820,11 @@ static int convert_to_probe_trace_events(struct perf_probe_event *pev,
                ret = -ENOMEM;
                goto error;
        }
+       tev->point.module = strdup(module);
+       if (tev->point.module == NULL) {
+               ret = -ENOMEM;
+               goto error;
+       }
        tev->point.offset = pev->point.offset;
        tev->point.retprobe = pev->point.retprobe;
        tev->nargs = pev->nargs;
index 3434fc9d79d5a11c1b67a594f06feda2d8100ea0..a7dee835f49c45698d72406864a0daa29649315e 100644 (file)
@@ -10,6 +10,7 @@ extern bool probe_event_dry_run;
 /* kprobe-tracer tracing point */
 struct probe_trace_point {
        char            *symbol;        /* Base symbol */
+       char            *module;        /* Module name */
        unsigned long   offset;         /* Offset from symbol */
        bool            retprobe;       /* Return probe flag */
 };
index 3b9d0b800d5c85c332357220f9c5bd1ed8de4ffc..3e44a3e36519cb0f6d1ee708d88b90f467d38032 100644 (file)
 /* Kprobe tracer basic type is up to u64 */
 #define MAX_BASIC_TYPE_BITS    64
 
-/*
- * Compare the tail of two strings.
- * Return 0 if whole of either string is same as another's tail part.
- */
-static int strtailcmp(const char *s1, const char *s2)
-{
-       int i1 = strlen(s1);
-       int i2 = strlen(s2);
-       while (--i1 >= 0 && --i2 >= 0) {
-               if (s1[i1] != s2[i2])
-                       return s1[i1] - s2[i2];
-       }
-       return 0;
-}
-
 /* Line number list operations */
 
 /* Add a line to line number list */
@@ -131,29 +116,37 @@ static const Dwfl_Callbacks offline_callbacks = {
 };
 
 /* Get a Dwarf from offline image */
-static Dwarf *dwfl_init_offline_dwarf(int fd, Dwfl **dwflp, Dwarf_Addr *bias)
+static int debuginfo__init_offline_dwarf(struct debuginfo *self,
+                                        const char *path)
 {
        Dwfl_Module *mod;
-       Dwarf *dbg = NULL;
+       int fd;
 
-       if (!dwflp)
-               return NULL;
+       fd = open(path, O_RDONLY);
+       if (fd < 0)
+               return fd;
 
-       *dwflp = dwfl_begin(&offline_callbacks);
-       if (!*dwflp)
-               return NULL;
+       self->dwfl = dwfl_begin(&offline_callbacks);
+       if (!self->dwfl)
+               goto error;
 
-       mod = dwfl_report_offline(*dwflp, "", "", fd);
+       mod = dwfl_report_offline(self->dwfl, "", "", fd);
        if (!mod)
                goto error;
 
-       dbg = dwfl_module_getdwarf(mod, bias);
-       if (!dbg) {
+       self->dbg = dwfl_module_getdwarf(mod, &self->bias);
+       if (!self->dbg)
+               goto error;
+
+       return 0;
 error:
-               dwfl_end(*dwflp);
-               *dwflp = NULL;
-       }
-       return dbg;
+       if (self->dwfl)
+               dwfl_end(self->dwfl);
+       else
+               close(fd);
+       memset(self, 0, sizeof(*self));
+
+       return -ENOENT;
 }
 
 #if _ELFUTILS_PREREQ(0, 148)
@@ -189,597 +182,81 @@ static const Dwfl_Callbacks kernel_callbacks = {
 };
 
 /* Get a Dwarf from live kernel image */
-static Dwarf *dwfl_init_live_kernel_dwarf(Dwarf_Addr addr, Dwfl **dwflp,
-                                         Dwarf_Addr *bias)
+static int debuginfo__init_online_kernel_dwarf(struct debuginfo *self,
+                                              Dwarf_Addr addr)
 {
-       Dwarf *dbg;
-
-       if (!dwflp)
-               return NULL;
-
-       *dwflp = dwfl_begin(&kernel_callbacks);
-       if (!*dwflp)
-               return NULL;
+       self->dwfl = dwfl_begin(&kernel_callbacks);
+       if (!self->dwfl)
+               return -EINVAL;
 
        /* Load the kernel dwarves: Don't care the result here */
-       dwfl_linux_kernel_report_kernel(*dwflp);
-       dwfl_linux_kernel_report_modules(*dwflp);
+       dwfl_linux_kernel_report_kernel(self->dwfl);
+       dwfl_linux_kernel_report_modules(self->dwfl);
 
-       dbg = dwfl_addrdwarf(*dwflp, addr, bias);
+       self->dbg = dwfl_addrdwarf(self->dwfl, addr, &self->bias);
        /* Here, check whether we could get a real dwarf */
-       if (!dbg) {
+       if (!self->dbg) {
                pr_debug("Failed to find kernel dwarf at %lx\n",
                         (unsigned long)addr);
-               dwfl_end(*dwflp);
-               *dwflp = NULL;
+               dwfl_end(self->dwfl);
+               memset(self, 0, sizeof(*self));
+               return -ENOENT;
        }
-       return dbg;
+
+       return 0;
 }
 #else
 /* With older elfutils, this just support kernel module... */
-static Dwarf *dwfl_init_live_kernel_dwarf(Dwarf_Addr addr __used, Dwfl **dwflp,
-                                         Dwarf_Addr *bias)
+static int debuginfo__init_online_kernel_dwarf(struct debuginfo *self,
+                                              Dwarf_Addr addr __used)
 {
-       int fd;
        const char *path = kernel_get_module_path("kernel");
 
        if (!path) {
                pr_err("Failed to find vmlinux path\n");
-               return NULL;
+               return -ENOENT;
        }
 
        pr_debug2("Use file %s for debuginfo\n", path);
-       fd = open(path, O_RDONLY);
-       if (fd < 0)
-               return NULL;
-
-       return dwfl_init_offline_dwarf(fd, dwflp, bias);
+       return debuginfo__init_offline_dwarf(self, path);
 }
 #endif
 
-/* Dwarf wrappers */
-
-/* Find the realpath of the target file. */
-static const char *cu_find_realpath(Dwarf_Die *cu_die, const char *fname)
-{
-       Dwarf_Files *files;
-       size_t nfiles, i;
-       const char *src = NULL;
-       int ret;
-
-       if (!fname)
-               return NULL;
-
-       ret = dwarf_getsrcfiles(cu_die, &files, &nfiles);
-       if (ret != 0)
-               return NULL;
-
-       for (i = 0; i < nfiles; i++) {
-               src = dwarf_filesrc(files, i, NULL, NULL);
-               if (strtailcmp(src, fname) == 0)
-                       break;
-       }
-       if (i == nfiles)
-               return NULL;
-       return src;
-}
-
-/* Get DW_AT_comp_dir (should be NULL with older gcc) */
-static const char *cu_get_comp_dir(Dwarf_Die *cu_die)
-{
-       Dwarf_Attribute attr;
-       if (dwarf_attr(cu_die, DW_AT_comp_dir, &attr) == NULL)
-               return NULL;
-       return dwarf_formstring(&attr);
-}
-
-/* Get a line number and file name for given address */
-static int cu_find_lineinfo(Dwarf_Die *cudie, unsigned long addr,
-                           const char **fname, int *lineno)
-{
-       Dwarf_Line *line;
-       Dwarf_Addr laddr;
-
-       line = dwarf_getsrc_die(cudie, (Dwarf_Addr)addr);
-       if (line && dwarf_lineaddr(line, &laddr) == 0 &&
-           addr == (unsigned long)laddr && dwarf_lineno(line, lineno) == 0) {
-               *fname = dwarf_linesrc(line, NULL, NULL);
-               if (!*fname)
-                       /* line number is useless without filename */
-                       *lineno = 0;
-       }
-
-       return *lineno ?: -ENOENT;
-}
-
-/* Compare diename and tname */
-static bool die_compare_name(Dwarf_Die *dw_die, const char *tname)
-{
-       const char *name;
-       name = dwarf_diename(dw_die);
-       return name ? (strcmp(tname, name) == 0) : false;
-}
-
-/* Get callsite line number of inline-function instance */
-static int die_get_call_lineno(Dwarf_Die *in_die)
-{
-       Dwarf_Attribute attr;
-       Dwarf_Word ret;
-
-       if (!dwarf_attr(in_die, DW_AT_call_line, &attr))
-               return -ENOENT;
-
-       dwarf_formudata(&attr, &ret);
-       return (int)ret;
-}
-
-/* Get type die */
-static Dwarf_Die *die_get_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem)
-{
-       Dwarf_Attribute attr;
-
-       if (dwarf_attr_integrate(vr_die, DW_AT_type, &attr) &&
-           dwarf_formref_die(&attr, die_mem))
-               return die_mem;
-       else
-               return NULL;
-}
-
-/* Get a type die, but skip qualifiers */
-static Dwarf_Die *__die_get_real_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem)
-{
-       int tag;
-
-       do {
-               vr_die = die_get_type(vr_die, die_mem);
-               if (!vr_die)
-                       break;
-               tag = dwarf_tag(vr_die);
-       } while (tag == DW_TAG_const_type ||
-                tag == DW_TAG_restrict_type ||
-                tag == DW_TAG_volatile_type ||
-                tag == DW_TAG_shared_type);
-
-       return vr_die;
-}
-
-/* Get a type die, but skip qualifiers and typedef */
-static Dwarf_Die *die_get_real_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem)
-{
-       do {
-               vr_die = __die_get_real_type(vr_die, die_mem);
-       } while (vr_die && dwarf_tag(vr_die) == DW_TAG_typedef);
-
-       return vr_die;
-}
-
-static int die_get_attr_udata(Dwarf_Die *tp_die, unsigned int attr_name,
-                             Dwarf_Word *result)
-{
-       Dwarf_Attribute attr;
-
-       if (dwarf_attr(tp_die, attr_name, &attr) == NULL ||
-           dwarf_formudata(&attr, result) != 0)
-               return -ENOENT;
-
-       return 0;
-}
-
-static bool die_is_signed_type(Dwarf_Die *tp_die)
-{
-       Dwarf_Word ret;
-
-       if (die_get_attr_udata(tp_die, DW_AT_encoding, &ret))
-               return false;
-
-       return (ret == DW_ATE_signed_char || ret == DW_ATE_signed ||
-               ret == DW_ATE_signed_fixed);
-}
-
-static int die_get_byte_size(Dwarf_Die *tp_die)
-{
-       Dwarf_Word ret;
-
-       if (die_get_attr_udata(tp_die, DW_AT_byte_size, &ret))
-               return 0;
-
-       return (int)ret;
-}
-
-static int die_get_bit_size(Dwarf_Die *tp_die)
-{
-       Dwarf_Word ret;
-
-       if (die_get_attr_udata(tp_die, DW_AT_bit_size, &ret))
-               return 0;
-
-       return (int)ret;
-}
-
-static int die_get_bit_offset(Dwarf_Die *tp_die)
-{
-       Dwarf_Word ret;
-
-       if (die_get_attr_udata(tp_die, DW_AT_bit_offset, &ret))
-               return 0;
-
-       return (int)ret;
-}
-
-/* Get data_member_location offset */
-static int die_get_data_member_location(Dwarf_Die *mb_die, Dwarf_Word *offs)
-{
-       Dwarf_Attribute attr;
-       Dwarf_Op *expr;
-       size_t nexpr;
-       int ret;
-
-       if (dwarf_attr(mb_die, DW_AT_data_member_location, &attr) == NULL)
-               return -ENOENT;
-
-       if (dwarf_formudata(&attr, offs) != 0) {
-               /* DW_AT_data_member_location should be DW_OP_plus_uconst */
-               ret = dwarf_getlocation(&attr, &expr, &nexpr);
-               if (ret < 0 || nexpr == 0)
-                       return -ENOENT;
-
-               if (expr[0].atom != DW_OP_plus_uconst || nexpr != 1) {
-                       pr_debug("Unable to get offset:Unexpected OP %x (%zd)\n",
-                                expr[0].atom, nexpr);
-                       return -ENOTSUP;
-               }
-               *offs = (Dwarf_Word)expr[0].number;
-       }
-       return 0;
-}
-
-/* Return values for die_find callbacks */
-enum {
-       DIE_FIND_CB_FOUND = 0,          /* End of Search */
-       DIE_FIND_CB_CHILD = 1,          /* Search only children */
-       DIE_FIND_CB_SIBLING = 2,        /* Search only siblings */
-       DIE_FIND_CB_CONTINUE = 3,       /* Search children and siblings */
-};
-
-/* Search a child die */
-static Dwarf_Die *die_find_child(Dwarf_Die *rt_die,
-                                int (*callback)(Dwarf_Die *, void *),
-                                void *data, Dwarf_Die *die_mem)
+struct debuginfo *debuginfo__new(const char *path)
 {
-       Dwarf_Die child_die;
-       int ret;
-
-       ret = dwarf_child(rt_die, die_mem);
-       if (ret != 0)
+       struct debuginfo *self = zalloc(sizeof(struct debuginfo));
+       if (!self)
                return NULL;
 
-       do {
-               ret = callback(die_mem, data);
-               if (ret == DIE_FIND_CB_FOUND)
-                       return die_mem;
-
-               if ((ret & DIE_FIND_CB_CHILD) &&
-                   die_find_child(die_mem, callback, data, &child_die)) {
-                       memcpy(die_mem, &child_die, sizeof(Dwarf_Die));
-                       return die_mem;
-               }
-       } while ((ret & DIE_FIND_CB_SIBLING) &&
-                dwarf_siblingof(die_mem, die_mem) == 0);
-
-       return NULL;
-}
-
-struct __addr_die_search_param {
-       Dwarf_Addr      addr;
-       Dwarf_Die       *die_mem;
-};
-
-static int __die_search_func_cb(Dwarf_Die *fn_die, void *data)
-{
-       struct __addr_die_search_param *ad = data;
-
-       if (dwarf_tag(fn_die) == DW_TAG_subprogram &&
-           dwarf_haspc(fn_die, ad->addr)) {
-               memcpy(ad->die_mem, fn_die, sizeof(Dwarf_Die));
-               return DWARF_CB_ABORT;
+       if (debuginfo__init_offline_dwarf(self, path) < 0) {
+               free(self);
+               self = NULL;
        }
-       return DWARF_CB_OK;
-}
-
-/* Search a real subprogram including this line, */
-static Dwarf_Die *die_find_real_subprogram(Dwarf_Die *cu_die, Dwarf_Addr addr,
-                                          Dwarf_Die *die_mem)
-{
-       struct __addr_die_search_param ad;
-       ad.addr = addr;
-       ad.die_mem = die_mem;
-       /* dwarf_getscopes can't find subprogram. */
-       if (!dwarf_getfuncs(cu_die, __die_search_func_cb, &ad, 0))
-               return NULL;
-       else
-               return die_mem;
-}
-
-/* die_find callback for inline function search */
-static int __die_find_inline_cb(Dwarf_Die *die_mem, void *data)
-{
-       Dwarf_Addr *addr = data;
-
-       if (dwarf_tag(die_mem) == DW_TAG_inlined_subroutine &&
-           dwarf_haspc(die_mem, *addr))
-               return DIE_FIND_CB_FOUND;
 
-       return DIE_FIND_CB_CONTINUE;
+       return self;
 }
 
-/* Similar to dwarf_getfuncs, but returns inlined_subroutine if exists. */
-static Dwarf_Die *die_find_inlinefunc(Dwarf_Die *sp_die, Dwarf_Addr addr,
-                                     Dwarf_Die *die_mem)
+struct debuginfo *debuginfo__new_online_kernel(unsigned long addr)
 {
-       Dwarf_Die tmp_die;
-
-       sp_die = die_find_child(sp_die, __die_find_inline_cb, &addr, &tmp_die);
-       if (!sp_die)
+       struct debuginfo *self = zalloc(sizeof(struct debuginfo));
+       if (!self)
                return NULL;
 
-       /* Inlined function could be recursive. Trace it until fail */
-       while (sp_die) {
-               memcpy(die_mem, sp_die, sizeof(Dwarf_Die));
-               sp_die = die_find_child(sp_die, __die_find_inline_cb, &addr,
-                                       &tmp_die);
-       }
-
-       return die_mem;
-}
-
-/* Walker on lines (Note: line number will not be sorted) */
-typedef int (* line_walk_handler_t) (const char *fname, int lineno,
-                                    Dwarf_Addr addr, void *data);
-
-struct __line_walk_param {
-       const char *fname;
-       line_walk_handler_t handler;
-       void *data;
-       int retval;
-};
-
-static int __die_walk_funclines_cb(Dwarf_Die *in_die, void *data)
-{
-       struct __line_walk_param *lw = data;
-       Dwarf_Addr addr;
-       int lineno;
-
-       if (dwarf_tag(in_die) == DW_TAG_inlined_subroutine) {
-               lineno = die_get_call_lineno(in_die);
-               if (lineno > 0 && dwarf_entrypc(in_die, &addr) == 0) {
-                       lw->retval = lw->handler(lw->fname, lineno, addr,
-                                                lw->data);
-                       if (lw->retval != 0)
-                               return DIE_FIND_CB_FOUND;
-               }
-       }
-       return DIE_FIND_CB_SIBLING;
-}
-
-/* Walk on lines of blocks included in given DIE */
-static int __die_walk_funclines(Dwarf_Die *sp_die,
-                               line_walk_handler_t handler, void *data)
-{
-       struct __line_walk_param lw = {
-               .handler = handler,
-               .data = data,
-               .retval = 0,
-       };
-       Dwarf_Die die_mem;
-       Dwarf_Addr addr;
-       int lineno;
-
-       /* Handle function declaration line */
-       lw.fname = dwarf_decl_file(sp_die);
-       if (lw.fname && dwarf_decl_line(sp_die, &lineno) == 0 &&
-           dwarf_entrypc(sp_die, &addr) == 0) {
-               lw.retval = handler(lw.fname, lineno, addr, data);
-               if (lw.retval != 0)
-                       goto done;
-       }
-       die_find_child(sp_die, __die_walk_funclines_cb, &lw, &die_mem);
-done:
-       return lw.retval;
-}
-
-static int __die_walk_culines_cb(Dwarf_Die *sp_die, void *data)
-{
-       struct __line_walk_param *lw = data;
-
-       lw->retval = __die_walk_funclines(sp_die, lw->handler, lw->data);
-       if (lw->retval != 0)
-               return DWARF_CB_ABORT;
-
-       return DWARF_CB_OK;
-}
-
-/*
- * Walk on lines inside given PDIE. If the PDIE is subprogram, walk only on
- * the lines inside the subprogram, otherwise PDIE must be a CU DIE.
- */
-static int die_walk_lines(Dwarf_Die *pdie, line_walk_handler_t handler,
-                         void *data)
-{
-       Dwarf_Lines *lines;
-       Dwarf_Line *line;
-       Dwarf_Addr addr;
-       const char *fname;
-       int lineno, ret = 0;
-       Dwarf_Die die_mem, *cu_die;
-       size_t nlines, i;
-
-       /* Get the CU die */
-       if (dwarf_tag(pdie) == DW_TAG_subprogram)
-               cu_die = dwarf_diecu(pdie, &die_mem, NULL, NULL);
-       else
-               cu_die = pdie;
-       if (!cu_die) {
-               pr_debug2("Failed to get CU from subprogram\n");
-               return -EINVAL;
-       }
-
-       /* Get lines list in the CU */
-       if (dwarf_getsrclines(cu_die, &lines, &nlines) != 0) {
-               pr_debug2("Failed to get source lines on this CU.\n");
-               return -ENOENT;
-       }
-       pr_debug2("Get %zd lines from this CU\n", nlines);
-
-       /* Walk on the lines on lines list */
-       for (i = 0; i < nlines; i++) {
-               line = dwarf_onesrcline(lines, i);
-               if (line == NULL ||
-                   dwarf_lineno(line, &lineno) != 0 ||
-                   dwarf_lineaddr(line, &addr) != 0) {
-                       pr_debug2("Failed to get line info. "
-                                 "Possible error in debuginfo.\n");
-                       continue;
-               }
-               /* Filter lines based on address */
-               if (pdie != cu_die)
-                       /*
-                        * Address filtering
-                        * The line is included in given function, and
-                        * no inline block includes it.
-                        */
-                       if (!dwarf_haspc(pdie, addr) ||
-                           die_find_inlinefunc(pdie, addr, &die_mem))
-                               continue;
-               /* Get source line */
-               fname = dwarf_linesrc(line, NULL, NULL);
-
-               ret = handler(fname, lineno, addr, data);
-               if (ret != 0)
-                       return ret;
-       }
-
-       /*
-        * Dwarf lines doesn't include function declarations and inlined
-        * subroutines. We have to check functions list or given function.
-        */
-       if (pdie != cu_die)
-               ret = __die_walk_funclines(pdie, handler, data);
-       else {
-               struct __line_walk_param param = {
-                       .handler = handler,
-                       .data = data,
-                       .retval = 0,
-               };
-               dwarf_getfuncs(cu_die, __die_walk_culines_cb, &param, 0);
-               ret = param.retval;
+       if (debuginfo__init_online_kernel_dwarf(self, (Dwarf_Addr)addr) < 0) {
+               free(self);
+               self = NULL;
        }
 
-       return ret;
-}
-
-struct __find_variable_param {
-       const char *name;
-       Dwarf_Addr addr;
-};
-
-static int __die_find_variable_cb(Dwarf_Die *die_mem, void *data)
-{
-       struct __find_variable_param *fvp = data;
-       int tag;
-
-       tag = dwarf_tag(die_mem);
-       if ((tag == DW_TAG_formal_parameter ||
-            tag == DW_TAG_variable) &&
-           die_compare_name(die_mem, fvp->name))
-               return DIE_FIND_CB_FOUND;
-
-       if (dwarf_haspc(die_mem, fvp->addr))
-               return DIE_FIND_CB_CONTINUE;
-       else
-               return DIE_FIND_CB_SIBLING;
-}
-
-/* Find a variable called 'name' at given address */
-static Dwarf_Die *die_find_variable_at(Dwarf_Die *sp_die, const char *name,
-                                      Dwarf_Addr addr, Dwarf_Die *die_mem)
-{
-       struct __find_variable_param fvp = { .name = name, .addr = addr};
-
-       return die_find_child(sp_die, __die_find_variable_cb, (void *)&fvp,
-                             die_mem);
-}
-
-static int __die_find_member_cb(Dwarf_Die *die_mem, void *data)
-{
-       const char *name = data;
-
-       if ((dwarf_tag(die_mem) == DW_TAG_member) &&
-           die_compare_name(die_mem, name))
-               return DIE_FIND_CB_FOUND;
-
-       return DIE_FIND_CB_SIBLING;
-}
-
-/* Find a member called 'name' */
-static Dwarf_Die *die_find_member(Dwarf_Die *st_die, const char *name,
-                                 Dwarf_Die *die_mem)
-{
-       return die_find_child(st_die, __die_find_member_cb, (void *)name,
-                             die_mem);
-}
-
-/* Get the name of given variable DIE */
-static int die_get_typename(Dwarf_Die *vr_die, char *buf, int len)
-{
-       Dwarf_Die type;
-       int tag, ret, ret2;
-       const char *tmp = "";
-
-       if (__die_get_real_type(vr_die, &type) == NULL)
-               return -ENOENT;
-
-       tag = dwarf_tag(&type);
-       if (tag == DW_TAG_array_type || tag == DW_TAG_pointer_type)
-               tmp = "*";
-       else if (tag == DW_TAG_subroutine_type) {
-               /* Function pointer */
-               ret = snprintf(buf, len, "(function_type)");
-               return (ret >= len) ? -E2BIG : ret;
-       } else {
-               if (!dwarf_diename(&type))
-                       return -ENOENT;
-               if (tag == DW_TAG_union_type)
-                       tmp = "union ";
-               else if (tag == DW_TAG_structure_type)
-                       tmp = "struct ";
-               /* Write a base name */
-               ret = snprintf(buf, len, "%s%s", tmp, dwarf_diename(&type));
-               return (ret >= len) ? -E2BIG : ret;
-       }
-       ret = die_get_typename(&type, buf, len);
-       if (ret > 0) {
-               ret2 = snprintf(buf + ret, len - ret, "%s", tmp);
-               ret = (ret2 >= len - ret) ? -E2BIG : ret2 + ret;
-       }
-       return ret;
+       return self;
 }
 
-/* Get the name and type of given variable DIE, stored as "type\tname" */
-static int die_get_varname(Dwarf_Die *vr_die, char *buf, int len)
+void debuginfo__delete(struct debuginfo *self)
 {
-       int ret, ret2;
-
-       ret = die_get_typename(vr_die, buf, len);
-       if (ret < 0) {
-               pr_debug("Failed to get type, make it unknown.\n");
-               ret = snprintf(buf, len, "(unknown_type)");
+       if (self) {
+               if (self->dwfl)
+                       dwfl_end(self->dwfl);
+               free(self);
        }
-       if (ret > 0) {
-               ret2 = snprintf(buf + ret, len - ret, "\t%s",
-                               dwarf_diename(vr_die));
-               ret = (ret2 >= len - ret) ? -E2BIG : ret2 + ret;
-       }
-       return ret;
 }
 
 /*
@@ -897,6 +374,7 @@ static int convert_variable_type(Dwarf_Die *vr_die,
        struct probe_trace_arg_ref **ref_ptr = &tvar->ref;
        Dwarf_Die type;
        char buf[16];
+       int bsize, boffs, total;
        int ret;
 
        /* TODO: check all types */
@@ -906,11 +384,15 @@ static int convert_variable_type(Dwarf_Die *vr_die,
                return (tvar->type == NULL) ? -ENOMEM : 0;
        }
 
-       if (die_get_bit_size(vr_die) != 0) {
+       bsize = dwarf_bitsize(vr_die);
+       if (bsize > 0) {
                /* This is a bitfield */
-               ret = snprintf(buf, 16, "b%d@%d/%zd", die_get_bit_size(vr_die),
-                               die_get_bit_offset(vr_die),
-                               BYTES_TO_BITS(die_get_byte_size(vr_die)));
+               boffs = dwarf_bitoffset(vr_die);
+               total = dwarf_bytesize(vr_die);
+               if (boffs < 0 || total < 0)
+                       return -ENOENT;
+               ret = snprintf(buf, 16, "b%d@%d/%zd", bsize, boffs,
+                               BYTES_TO_BITS(total));
                goto formatted;
        }
 
@@ -958,10 +440,11 @@ static int convert_variable_type(Dwarf_Die *vr_die,
                return (tvar->type == NULL) ? -ENOMEM : 0;
        }
 
-       ret = BYTES_TO_BITS(die_get_byte_size(&type));
-       if (!ret)
+       ret = dwarf_bytesize(&type);
+       if (ret <= 0)
                /* No size ... try to use default type */
                return 0;
+       ret = BYTES_TO_BITS(ret);
 
        /* Check the bitwidth */
        if (ret > MAX_BASIC_TYPE_BITS) {
@@ -1025,7 +508,7 @@ static int convert_variable_fields(Dwarf_Die *vr_die, const char *varname,
                        else
                                *ref_ptr = ref;
                }
-               ref->offset += die_get_byte_size(&type) * field->index;
+               ref->offset += dwarf_bytesize(&type) * field->index;
                if (!field->next)
                        /* Save vr_die for converting types */
                        memcpy(die_mem, vr_die, sizeof(*die_mem));
@@ -1245,8 +728,7 @@ static int call_probe_finder(Dwarf_Die *sp_die, struct probe_finder *pf)
 
        /* If no real subprogram, find a real one */
        if (!sp_die || dwarf_tag(sp_die) != DW_TAG_subprogram) {
-               sp_die = die_find_real_subprogram(&pf->cu_die,
-                                                 pf->addr, &die_mem);
+               sp_die = die_find_realfunc(&pf->cu_die, pf->addr, &die_mem);
                if (!sp_die) {
                        pr_warning("Failed to find probe point in any "
                                   "functions.\n");
@@ -1504,28 +986,18 @@ static int pubname_search_cb(Dwarf *dbg, Dwarf_Global *gl, void *data)
 }
 
 /* Find probe points from debuginfo */
-static int find_probes(int fd, struct probe_finder *pf)
+static int debuginfo__find_probes(struct debuginfo *self,
+                                 struct probe_finder *pf)
 {
        struct perf_probe_point *pp = &pf->pev->point;
        Dwarf_Off off, noff;
        size_t cuhl;
        Dwarf_Die *diep;
-       Dwarf *dbg = NULL;
-       Dwfl *dwfl;
-       Dwarf_Addr bias;        /* Currently ignored */
        int ret = 0;
 
-       dbg = dwfl_init_offline_dwarf(fd, &dwfl, &bias);
-       if (!dbg) {
-               pr_warning("No debug information found in the vmlinux - "
-                       "please rebuild with CONFIG_DEBUG_INFO=y.\n");
-               close(fd);      /* Without dwfl_end(), fd isn't closed. */
-               return -EBADF;
-       }
-
 #if _ELFUTILS_PREREQ(0, 142)
        /* Get the call frame information from this dwarf */
-       pf->cfi = dwarf_getcfi(dbg);
+       pf->cfi = dwarf_getcfi(self->dbg);
 #endif
 
        off = 0;
@@ -1544,7 +1016,8 @@ static int find_probes(int fd, struct probe_finder *pf)
                        .data = pf,
                };
 
-               dwarf_getpubnames(dbg, pubname_search_cb, &pubname_param, 0);
+               dwarf_getpubnames(self->dbg, pubname_search_cb,
+                                 &pubname_param, 0);
                if (pubname_param.found) {
                        ret = probe_point_search_cb(&pf->sp_die, &probe_param);
                        if (ret)
@@ -1553,9 +1026,9 @@ static int find_probes(int fd, struct probe_finder *pf)
        }
 
        /* Loop on CUs (Compilation Unit) */
-       while (!dwarf_nextcu(dbg, off, &noff, &cuhl, NULL, NULL, NULL)) {
+       while (!dwarf_nextcu(self->dbg, off, &noff, &cuhl, NULL, NULL, NULL)) {
                /* Get the DIE(Debugging Information Entry) of this CU */
-               diep = dwarf_offdie(dbg, off + cuhl, &pf->cu_die);
+               diep = dwarf_offdie(self->dbg, off + cuhl, &pf->cu_die);
                if (!diep)
                        continue;
 
@@ -1582,8 +1055,6 @@ static int find_probes(int fd, struct probe_finder *pf)
 
 found:
        line_list__free(&pf->lcache);
-       if (dwfl)
-               dwfl_end(dwfl);
 
        return ret;
 }
@@ -1629,8 +1100,9 @@ static int add_probe_trace_event(Dwarf_Die *sp_die, struct probe_finder *pf)
 }
 
 /* Find probe_trace_events specified by perf_probe_event from debuginfo */
-int find_probe_trace_events(int fd, struct perf_probe_event *pev,
-                           struct probe_trace_event **tevs, int max_tevs)
+int debuginfo__find_trace_events(struct debuginfo *self,
+                                struct perf_probe_event *pev,
+                                struct probe_trace_event **tevs, int max_tevs)
 {
        struct trace_event_finder tf = {
                        .pf = {.pev = pev, .callback = add_probe_trace_event},
@@ -1645,7 +1117,7 @@ int find_probe_trace_events(int fd, struct perf_probe_event *pev,
        tf.tevs = *tevs;
        tf.ntevs = 0;
 
-       ret = find_probes(fd, &tf.pf);
+       ret = debuginfo__find_probes(self, &tf.pf);
        if (ret < 0) {
                free(*tevs);
                *tevs = NULL;
@@ -1739,9 +1211,10 @@ out:
 }
 
 /* Find available variables at given probe point */
-int find_available_vars_at(int fd, struct perf_probe_event *pev,
-                          struct variable_list **vls, int max_vls,
-                          bool externs)
+int debuginfo__find_available_vars_at(struct debuginfo *self,
+                                     struct perf_probe_event *pev,
+                                     struct variable_list **vls,
+                                     int max_vls, bool externs)
 {
        struct available_var_finder af = {
                        .pf = {.pev = pev, .callback = add_available_vars},
@@ -1756,7 +1229,7 @@ int find_available_vars_at(int fd, struct perf_probe_event *pev,
        af.vls = *vls;
        af.nvls = 0;
 
-       ret = find_probes(fd, &af.pf);
+       ret = debuginfo__find_probes(self, &af.pf);
        if (ret < 0) {
                /* Free vlist for error */
                while (af.nvls--) {
@@ -1774,28 +1247,19 @@ int find_available_vars_at(int fd, struct perf_probe_event *pev,
 }
 
 /* Reverse search */
-int find_perf_probe_point(unsigned long addr, struct perf_probe_point *ppt)
+int debuginfo__find_probe_point(struct debuginfo *self, unsigned long addr,
+                               struct perf_probe_point *ppt)
 {
        Dwarf_Die cudie, spdie, indie;
-       Dwarf *dbg = NULL;
-       Dwfl *dwfl = NULL;
-       Dwarf_Addr _addr, baseaddr, bias = 0;
+       Dwarf_Addr _addr, baseaddr;
        const char *fname = NULL, *func = NULL, *tmp;
        int baseline = 0, lineno = 0, ret = 0;
 
-       /* Open the live linux kernel */
-       dbg = dwfl_init_live_kernel_dwarf(addr, &dwfl, &bias);
-       if (!dbg) {
-               pr_warning("No debug information found in the vmlinux - "
-                       "please rebuild with CONFIG_DEBUG_INFO=y.\n");
-               ret = -EINVAL;
-               goto end;
-       }
-
        /* Adjust address with bias */
-       addr += bias;
+       addr += self->bias;
+
        /* Find cu die */
-       if (!dwarf_addrdie(dbg, (Dwarf_Addr)addr - bias, &cudie)) {
+       if (!dwarf_addrdie(self->dbg, (Dwarf_Addr)addr - self->bias, &cudie)) {
                pr_warning("Failed to find debug information for address %lx\n",
                           addr);
                ret = -EINVAL;
@@ -1807,7 +1271,7 @@ int find_perf_probe_point(unsigned long addr, struct perf_probe_point *ppt)
        /* Don't care whether it failed or not */
 
        /* Find a corresponding function (name, baseline and baseaddr) */
-       if (die_find_real_subprogram(&cudie, (Dwarf_Addr)addr, &spdie)) {
+       if (die_find_realfunc(&cudie, (Dwarf_Addr)addr, &spdie)) {
                /* Get function entry information */
                tmp = dwarf_diename(&spdie);
                if (!tmp ||
@@ -1871,8 +1335,6 @@ post:
                }
        }
 end:
-       if (dwfl)
-               dwfl_end(dwfl);
        if (ret == 0 && (fname || func))
                ret = 1;        /* Found a point */
        return ret;
@@ -1982,26 +1444,15 @@ static int find_line_range_by_func(struct line_finder *lf)
        return param.retval;
 }
 
-int find_line_range(int fd, struct line_range *lr)
+int debuginfo__find_line_range(struct debuginfo *self, struct line_range *lr)
 {
        struct line_finder lf = {.lr = lr, .found = 0};
        int ret = 0;
        Dwarf_Off off = 0, noff;
        size_t cuhl;
        Dwarf_Die *diep;
-       Dwarf *dbg = NULL;
-       Dwfl *dwfl;
-       Dwarf_Addr bias;        /* Currently ignored */
        const char *comp_dir;
 
-       dbg = dwfl_init_offline_dwarf(fd, &dwfl, &bias);
-       if (!dbg) {
-               pr_warning("No debug information found in the vmlinux - "
-                       "please rebuild with CONFIG_DEBUG_INFO=y.\n");
-               close(fd);      /* Without dwfl_end(), fd isn't closed. */
-               return -EBADF;
-       }
-
        /* Fastpath: lookup by function name from .debug_pubnames section */
        if (lr->function) {
                struct pubname_callback_param pubname_param = {
@@ -2010,7 +1461,8 @@ int find_line_range(int fd, struct line_range *lr)
                struct dwarf_callback_param line_range_param = {
                        .data = (void *)&lf, .retval = 0};
 
-               dwarf_getpubnames(dbg, pubname_search_cb, &pubname_param, 0);
+               dwarf_getpubnames(self->dbg, pubname_search_cb,
+                                 &pubname_param, 0);
                if (pubname_param.found) {
                        line_range_search_cb(&lf.sp_die, &line_range_param);
                        if (lf.found)
@@ -2020,11 +1472,12 @@ int find_line_range(int fd, struct line_range *lr)
 
        /* Loop on CUs (Compilation Unit) */
        while (!lf.found && ret >= 0) {
-               if (dwarf_nextcu(dbg, off, &noff, &cuhl, NULL, NULL, NULL) != 0)
+               if (dwarf_nextcu(self->dbg, off, &noff, &cuhl,
+                                NULL, NULL, NULL) != 0)
                        break;
 
                /* Get the DIE(Debugging Information Entry) of this CU */
-               diep = dwarf_offdie(dbg, off + cuhl, &lf.cu_die);
+               diep = dwarf_offdie(self->dbg, off + cuhl, &lf.cu_die);
                if (!diep)
                        continue;
 
@@ -2058,7 +1511,6 @@ found:
        }
 
        pr_debug("path: %s\n", lr->path);
-       dwfl_end(dwfl);
        return (ret < 0) ? ret : lf.found;
 }
 
index 605730a366dbc5dc0158ffc32db254420c5045bb..c478b42a2473f68ea1dcb786c1a5892c70913866 100644 (file)
@@ -16,27 +16,42 @@ static inline int is_c_varname(const char *name)
 }
 
 #ifdef DWARF_SUPPORT
+
+#include "dwarf-aux.h"
+
+/* TODO: export debuginfo data structure even if no dwarf support */
+
+/* debug information structure */
+struct debuginfo {
+       Dwarf           *dbg;
+       Dwfl            *dwfl;
+       Dwarf_Addr      bias;
+};
+
+extern struct debuginfo *debuginfo__new(const char *path);
+extern struct debuginfo *debuginfo__new_online_kernel(unsigned long addr);
+extern void debuginfo__delete(struct debuginfo *self);
+
 /* Find probe_trace_events specified by perf_probe_event from debuginfo */
-extern int find_probe_trace_events(int fd, struct perf_probe_event *pev,
-                                   struct probe_trace_event **tevs,
-                                   int max_tevs);
+extern int debuginfo__find_trace_events(struct debuginfo *self,
+                                       struct perf_probe_event *pev,
+                                       struct probe_trace_event **tevs,
+                                       int max_tevs);
 
 /* Find a perf_probe_point from debuginfo */
-extern int find_perf_probe_point(unsigned long addr,
-                                struct perf_probe_point *ppt);
+extern int debuginfo__find_probe_point(struct debuginfo *self,
+                                      unsigned long addr,
+                                      struct perf_probe_point *ppt);
 
 /* Find a line range */
-extern int find_line_range(int fd, struct line_range *lr);
+extern int debuginfo__find_line_range(struct debuginfo *self,
+                                     struct line_range *lr);
 
 /* Find available variables */
-extern int find_available_vars_at(int fd, struct perf_probe_event *pev,
-                                 struct variable_list **vls, int max_points,
-                                 bool externs);
-
-#include <dwarf.h>
-#include <elfutils/libdw.h>
-#include <elfutils/libdwfl.h>
-#include <elfutils/version.h>
+extern int debuginfo__find_available_vars_at(struct debuginfo *self,
+                                            struct perf_probe_event *pev,
+                                            struct variable_list **vls,
+                                            int max_points, bool externs);
 
 struct probe_finder {
        struct perf_probe_event *pev;           /* Target probe event */
index a9ac0504aabd25f3dc4a528f5e86d291eed4da05..8e0b5a39d8a739720e621521f8b65710b1bb2486 100644 (file)
@@ -247,7 +247,7 @@ struct pyrf_cpu_map {
 static int pyrf_cpu_map__init(struct pyrf_cpu_map *pcpus,
                              PyObject *args, PyObject *kwargs)
 {
-       static char *kwlist[] = { "cpustr", NULL, NULL, };
+       static char *kwlist[] = { "cpustr", NULL };
        char *cpustr = NULL;
 
        if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|s",
@@ -316,7 +316,7 @@ struct pyrf_thread_map {
 static int pyrf_thread_map__init(struct pyrf_thread_map *pthreads,
                                 PyObject *args, PyObject *kwargs)
 {
-       static char *kwlist[] = { "pid", "tid", NULL, NULL, };
+       static char *kwlist[] = { "pid", "tid", NULL };
        int pid = -1, tid = -1;
 
        if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ii",
@@ -418,7 +418,9 @@ static int pyrf_evsel__init(struct pyrf_evsel *pevsel,
                "wakeup_events",
                "bp_type",
                "bp_addr",
-               "bp_len", NULL, NULL, };
+               "bp_len",
+                NULL
+       };
        u64 sample_period = 0;
        u32 disabled = 0,
            inherit = 0,
@@ -499,7 +501,7 @@ static PyObject *pyrf_evsel__open(struct pyrf_evsel *pevsel,
        struct thread_map *threads = NULL;
        PyObject *pcpus = NULL, *pthreads = NULL;
        int group = 0, inherit = 0;
-       static char *kwlist[] = {"cpus", "threads", "group", "inherit", NULL, NULL};
+       static char *kwlist[] = { "cpus", "threads", "group", "inherit", NULL };
 
        if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|OOii", kwlist,
                                         &pcpus, &pthreads, &group, &inherit))
@@ -582,8 +584,7 @@ static PyObject *pyrf_evlist__mmap(struct pyrf_evlist *pevlist,
                                   PyObject *args, PyObject *kwargs)
 {
        struct perf_evlist *evlist = &pevlist->evlist;
-       static char *kwlist[] = {"pages", "overwrite",
-                                 NULL, NULL};
+       static char *kwlist[] = { "pages", "overwrite", NULL };
        int pages = 128, overwrite = false;
 
        if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ii", kwlist,
@@ -603,7 +604,7 @@ static PyObject *pyrf_evlist__poll(struct pyrf_evlist *pevlist,
                                   PyObject *args, PyObject *kwargs)
 {
        struct perf_evlist *evlist = &pevlist->evlist;
-       static char *kwlist[] = {"timeout", NULL, NULL};
+       static char *kwlist[] = { "timeout", NULL };
        int timeout = -1, n;
 
        if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|i", kwlist, &timeout))
@@ -674,7 +675,7 @@ static PyObject *pyrf_evlist__read_on_cpu(struct pyrf_evlist *pevlist,
        struct perf_evlist *evlist = &pevlist->evlist;
        union perf_event *event;
        int sample_id_all = 1, cpu;
-       static char *kwlist[] = {"cpu", "sample_id_all", NULL, NULL};
+       static char *kwlist[] = { "cpu", "sample_id_all", NULL };
        int err;
 
        if (!PyArg_ParseTupleAndKeywords(args, kwargs, "i|i", kwlist,
index f5a8fbdd3f76b51f9b5abf0bd551c964c3d301ed..72458d9da5b13a4f26f6003810273bee4dc88c2c 100644 (file)
@@ -12,6 +12,7 @@
 #include "session.h"
 #include "sort.h"
 #include "util.h"
+#include "cpumap.h"
 
 static int perf_session__open(struct perf_session *self, bool force)
 {
@@ -247,9 +248,14 @@ int perf_session__resolve_callchain(struct perf_session *self,
        callchain_cursor_reset(&self->callchain_cursor);
 
        for (i = 0; i < chain->nr; i++) {
-               u64 ip = chain->ips[i];
+               u64 ip;
                struct addr_location al;
 
+               if (callchain_param.order == ORDER_CALLEE)
+                       ip = chain->ips[i];
+               else
+                       ip = chain->ips[chain->nr - i - 1];
+
                if (ip >= PERF_CONTEXT_MAX) {
                        switch (ip) {
                        case PERF_CONTEXT_HV:
@@ -407,20 +413,26 @@ static void perf_event__read_swap(union perf_event *event)
        event->read.id           = bswap_64(event->read.id);
 }
 
-static void perf_event__attr_swap(union perf_event *event)
+/* exported for swapping attributes in file header */
+void perf_event__attr_swap(struct perf_event_attr *attr)
+{
+       attr->type              = bswap_32(attr->type);
+       attr->size              = bswap_32(attr->size);
+       attr->config            = bswap_64(attr->config);
+       attr->sample_period     = bswap_64(attr->sample_period);
+       attr->sample_type       = bswap_64(attr->sample_type);
+       attr->read_format       = bswap_64(attr->read_format);
+       attr->wakeup_events     = bswap_32(attr->wakeup_events);
+       attr->bp_type           = bswap_32(attr->bp_type);
+       attr->bp_addr           = bswap_64(attr->bp_addr);
+       attr->bp_len            = bswap_64(attr->bp_len);
+}
+
+static void perf_event__hdr_attr_swap(union perf_event *event)
 {
        size_t size;
 
-       event->attr.attr.type           = bswap_32(event->attr.attr.type);
-       event->attr.attr.size           = bswap_32(event->attr.attr.size);
-       event->attr.attr.config         = bswap_64(event->attr.attr.config);
-       event->attr.attr.sample_period  = bswap_64(event->attr.attr.sample_period);
-       event->attr.attr.sample_type    = bswap_64(event->attr.attr.sample_type);
-       event->attr.attr.read_format    = bswap_64(event->attr.attr.read_format);
-       event->attr.attr.wakeup_events  = bswap_32(event->attr.attr.wakeup_events);
-       event->attr.attr.bp_type        = bswap_32(event->attr.attr.bp_type);
-       event->attr.attr.bp_addr        = bswap_64(event->attr.attr.bp_addr);
-       event->attr.attr.bp_len         = bswap_64(event->attr.attr.bp_len);
+       perf_event__attr_swap(&event->attr.attr);
 
        size = event->header.size;
        size -= (void *)&event->attr.id - (void *)event;
@@ -448,7 +460,7 @@ static perf_event__swap_op perf_event__swap_ops[] = {
        [PERF_RECORD_LOST]                = perf_event__all64_swap,
        [PERF_RECORD_READ]                = perf_event__read_swap,
        [PERF_RECORD_SAMPLE]              = perf_event__all64_swap,
-       [PERF_RECORD_HEADER_ATTR]         = perf_event__attr_swap,
+       [PERF_RECORD_HEADER_ATTR]         = perf_event__hdr_attr_swap,
        [PERF_RECORD_HEADER_EVENT_TYPE]   = perf_event__event_type_swap,
        [PERF_RECORD_HEADER_TRACING_DATA] = perf_event__tracing_data_swap,
        [PERF_RECORD_HEADER_BUILD_ID]     = NULL,
@@ -708,9 +720,9 @@ static void dump_sample(struct perf_session *session, union perf_event *event,
        if (!dump_trace)
                return;
 
-       printf("(IP, %d): %d/%d: %#" PRIx64 " period: %" PRIu64 "\n",
+       printf("(IP, %d): %d/%d: %#" PRIx64 " period: %" PRIu64 " addr: %#" PRIx64 "\n",
               event->header.misc, sample->pid, sample->tid, sample->ip,
-              sample->period);
+              sample->period, sample->addr);
 
        if (session->sample_type & PERF_SAMPLE_CALLCHAIN)
                callchain__printf(sample);
@@ -1202,9 +1214,10 @@ struct perf_evsel *perf_session__find_first_evtype(struct perf_session *session,
        return NULL;
 }
 
-void perf_session__print_symbols(union perf_event *event,
-                               struct perf_sample *sample,
-                               struct perf_session *session)
+void perf_session__print_ip(union perf_event *event,
+                           struct perf_sample *sample,
+                           struct perf_session *session,
+                           int print_sym, int print_dso)
 {
        struct addr_location al;
        const char *symname, *dsoname;
@@ -1233,32 +1246,83 @@ void perf_session__print_symbols(union perf_event *event,
                        if (!node)
                                break;
 
-                       if (node->sym && node->sym->name)
-                               symname = node->sym->name;
+                       printf("\t%16" PRIx64, node->ip);
+                       if (print_sym) {
+                               if (node->sym && node->sym->name)
+                                       symname = node->sym->name;
+                               else
+                                       symname = "";
+
+                               printf(" %s", symname);
+                       }
+                       if (print_dso) {
+                               if (node->map && node->map->dso && node->map->dso->name)
+                                       dsoname = node->map->dso->name;
+                               else
+                                       dsoname = "";
+
+                               printf(" (%s)", dsoname);
+                       }
+                       printf("\n");
+
+                       callchain_cursor_advance(cursor);
+               }
+
+       } else {
+               printf("%16" PRIx64, sample->ip);
+               if (print_sym) {
+                       if (al.sym && al.sym->name)
+                               symname = al.sym->name;
                        else
                                symname = "";
 
-                       if (node->map && node->map->dso && node->map->dso->name)
-                               dsoname = node->map->dso->name;
+                       printf(" %s", symname);
+               }
+
+               if (print_dso) {
+                       if (al.map && al.map->dso && al.map->dso->name)
+                               dsoname = al.map->dso->name;
                        else
                                dsoname = "";
 
-                       printf("\t%16" PRIx64 " %s (%s)\n", node->ip, symname, dsoname);
+                       printf(" (%s)", dsoname);
+               }
+       }
+}
 
-                       callchain_cursor_advance(cursor);
+int perf_session__cpu_bitmap(struct perf_session *session,
+                            const char *cpu_list, unsigned long *cpu_bitmap)
+{
+       int i;
+       struct cpu_map *map;
+
+       for (i = 0; i < PERF_TYPE_MAX; ++i) {
+               struct perf_evsel *evsel;
+
+               evsel = perf_session__find_first_evtype(session, i);
+               if (!evsel)
+                       continue;
+
+               if (!(evsel->attr.sample_type & PERF_SAMPLE_CPU)) {
+                       pr_err("File does not contain CPU events. "
+                              "Remove -c option to proceed.\n");
+                       return -1;
                }
+       }
 
-       } else {
-               if (al.sym && al.sym->name)
-                       symname = al.sym->name;
-               else
-                       symname = "";
+       map = cpu_map__new(cpu_list);
 
-               if (al.map && al.map->dso && al.map->dso->name)
-                       dsoname = al.map->dso->name;
-               else
-                       dsoname = "";
+       for (i = 0; i < map->nr; i++) {
+               int cpu = map->map[i];
+
+               if (cpu >= MAX_NR_CPUS) {
+                       pr_err("Requested CPU %d too large. "
+                              "Consider raising MAX_NR_CPUS\n", cpu);
+                       return -1;
+               }
 
-               printf("%16" PRIx64 " %s (%s)", al.addr, symname, dsoname);
+               set_bit(cpu, cpu_bitmap);
        }
+
+       return 0;
 }
index 66d4e1490879266f5e56411929cd639a8e91b299..170601e67d6b78086349ebd7810285978edd95f9 100644 (file)
@@ -112,6 +112,7 @@ int perf_session__set_kallsyms_ref_reloc_sym(struct map **maps,
                                             u64 addr);
 
 void mem_bswap_64(void *src, int byte_size);
+void perf_event__attr_swap(struct perf_event_attr *attr);
 
 int perf_session__create_kernel_maps(struct perf_session *self);
 
@@ -167,8 +168,12 @@ static inline int perf_session__parse_sample(struct perf_session *session,
 struct perf_evsel *perf_session__find_first_evtype(struct perf_session *session,
                                            unsigned int type);
 
-void perf_session__print_symbols(union perf_event *event,
+void perf_session__print_ip(union perf_event *event,
                                 struct perf_sample *sample,
-                                struct perf_session *session);
+                                struct perf_session *session,
+                                int print_sym, int print_dso);
+
+int perf_session__cpu_bitmap(struct perf_session *session,
+                            const char *cpu_list, unsigned long *cpu_bitmap);
 
 #endif /* __PERF_SESSION_H */
index f44fa541d56e67c6bb6c976e78123e99657ffbee..401e220566fdab96d87cb1ae24e5a66dfb92992b 100644 (file)
@@ -15,95 +15,6 @@ char * field_sep;
 
 LIST_HEAD(hist_entry__sort_list);
 
-static int hist_entry__thread_snprintf(struct hist_entry *self, char *bf,
-                                      size_t size, unsigned int width);
-static int hist_entry__comm_snprintf(struct hist_entry *self, char *bf,
-                                    size_t size, unsigned int width);
-static int hist_entry__dso_snprintf(struct hist_entry *self, char *bf,
-                                   size_t size, unsigned int width);
-static int hist_entry__sym_snprintf(struct hist_entry *self, char *bf,
-                                   size_t size, unsigned int width);
-static int hist_entry__parent_snprintf(struct hist_entry *self, char *bf,
-                                      size_t size, unsigned int width);
-static int hist_entry__cpu_snprintf(struct hist_entry *self, char *bf,
-                                   size_t size, unsigned int width);
-
-struct sort_entry sort_thread = {
-       .se_header      = "Command:  Pid",
-       .se_cmp         = sort__thread_cmp,
-       .se_snprintf    = hist_entry__thread_snprintf,
-       .se_width_idx   = HISTC_THREAD,
-};
-
-struct sort_entry sort_comm = {
-       .se_header      = "Command",
-       .se_cmp         = sort__comm_cmp,
-       .se_collapse    = sort__comm_collapse,
-       .se_snprintf    = hist_entry__comm_snprintf,
-       .se_width_idx   = HISTC_COMM,
-};
-
-struct sort_entry sort_dso = {
-       .se_header      = "Shared Object",
-       .se_cmp         = sort__dso_cmp,
-       .se_snprintf    = hist_entry__dso_snprintf,
-       .se_width_idx   = HISTC_DSO,
-};
-
-struct sort_entry sort_sym = {
-       .se_header      = "Symbol",
-       .se_cmp         = sort__sym_cmp,
-       .se_snprintf    = hist_entry__sym_snprintf,
-       .se_width_idx   = HISTC_SYMBOL,
-};
-
-struct sort_entry sort_parent = {
-       .se_header      = "Parent symbol",
-       .se_cmp         = sort__parent_cmp,
-       .se_snprintf    = hist_entry__parent_snprintf,
-       .se_width_idx   = HISTC_PARENT,
-};
-struct sort_entry sort_cpu = {
-       .se_header      = "CPU",
-       .se_cmp         = sort__cpu_cmp,
-       .se_snprintf    = hist_entry__cpu_snprintf,
-       .se_width_idx   = HISTC_CPU,
-};
-
-struct sort_dimension {
-       const char              *name;
-       struct sort_entry       *entry;
-       int                     taken;
-};
-
-static struct sort_dimension sort_dimensions[] = {
-       { .name = "pid",        .entry = &sort_thread,  },
-       { .name = "comm",       .entry = &sort_comm,    },
-       { .name = "dso",        .entry = &sort_dso,     },
-       { .name = "symbol",     .entry = &sort_sym,     },
-       { .name = "parent",     .entry = &sort_parent,  },
-       { .name = "cpu",        .entry = &sort_cpu,     },
-};
-
-int64_t cmp_null(void *l, void *r)
-{
-       if (!l && !r)
-               return 0;
-       else if (!l)
-               return -1;
-       else
-               return 1;
-}
-
-/* --sort pid */
-
-int64_t
-sort__thread_cmp(struct hist_entry *left, struct hist_entry *right)
-{
-       return right->thread->pid - left->thread->pid;
-}
-
 static int repsep_snprintf(char *bf, size_t size, const char *fmt, ...)
 {
        int n;
@@ -125,6 +36,24 @@ static int repsep_snprintf(char *bf, size_t size, const char *fmt, ...)
        return n;
 }
 
+static int64_t cmp_null(void *l, void *r)
+{
+       if (!l && !r)
+               return 0;
+       else if (!l)
+               return -1;
+       else
+               return 1;
+}
+
+/* --sort pid */
+
+static int64_t
+sort__thread_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+       return right->thread->pid - left->thread->pid;
+}
+
 static int hist_entry__thread_snprintf(struct hist_entry *self, char *bf,
                                       size_t size, unsigned int width)
 {
@@ -132,15 +61,50 @@ static int hist_entry__thread_snprintf(struct hist_entry *self, char *bf,
                              self->thread->comm ?: "", self->thread->pid);
 }
 
+struct sort_entry sort_thread = {
+       .se_header      = "Command:  Pid",
+       .se_cmp         = sort__thread_cmp,
+       .se_snprintf    = hist_entry__thread_snprintf,
+       .se_width_idx   = HISTC_THREAD,
+};
+
+/* --sort comm */
+
+static int64_t
+sort__comm_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+       return right->thread->pid - left->thread->pid;
+}
+
+static int64_t
+sort__comm_collapse(struct hist_entry *left, struct hist_entry *right)
+{
+       char *comm_l = left->thread->comm;
+       char *comm_r = right->thread->comm;
+
+       if (!comm_l || !comm_r)
+               return cmp_null(comm_l, comm_r);
+
+       return strcmp(comm_l, comm_r);
+}
+
 static int hist_entry__comm_snprintf(struct hist_entry *self, char *bf,
                                     size_t size, unsigned int width)
 {
        return repsep_snprintf(bf, size, "%*s", width, self->thread->comm);
 }
 
+struct sort_entry sort_comm = {
+       .se_header      = "Command",
+       .se_cmp         = sort__comm_cmp,
+       .se_collapse    = sort__comm_collapse,
+       .se_snprintf    = hist_entry__comm_snprintf,
+       .se_width_idx   = HISTC_COMM,
+};
+
 /* --sort dso */
 
-int64_t
+static int64_t
 sort__dso_cmp(struct hist_entry *left, struct hist_entry *right)
 {
        struct dso *dso_l = left->ms.map ? left->ms.map->dso : NULL;
@@ -173,9 +137,16 @@ static int hist_entry__dso_snprintf(struct hist_entry *self, char *bf,
        return repsep_snprintf(bf, size, "%-*s", width, "[unknown]");
 }
 
+struct sort_entry sort_dso = {
+       .se_header      = "Shared Object",
+       .se_cmp         = sort__dso_cmp,
+       .se_snprintf    = hist_entry__dso_snprintf,
+       .se_width_idx   = HISTC_DSO,
+};
+
 /* --sort symbol */
 
-int64_t
+static int64_t
 sort__sym_cmp(struct hist_entry *left, struct hist_entry *right)
 {
        u64 ip_l, ip_r;
@@ -211,29 +182,16 @@ static int hist_entry__sym_snprintf(struct hist_entry *self, char *bf,
        return ret;
 }
 
-/* --sort comm */
-
-int64_t
-sort__comm_cmp(struct hist_entry *left, struct hist_entry *right)
-{
-       return right->thread->pid - left->thread->pid;
-}
-
-int64_t
-sort__comm_collapse(struct hist_entry *left, struct hist_entry *right)
-{
-       char *comm_l = left->thread->comm;
-       char *comm_r = right->thread->comm;
-
-       if (!comm_l || !comm_r)
-               return cmp_null(comm_l, comm_r);
-
-       return strcmp(comm_l, comm_r);
-}
+struct sort_entry sort_sym = {
+       .se_header      = "Symbol",
+       .se_cmp         = sort__sym_cmp,
+       .se_snprintf    = hist_entry__sym_snprintf,
+       .se_width_idx   = HISTC_SYMBOL,
+};
 
 /* --sort parent */
 
-int64_t
+static int64_t
 sort__parent_cmp(struct hist_entry *left, struct hist_entry *right)
 {
        struct symbol *sym_l = left->parent;
@@ -252,9 +210,16 @@ static int hist_entry__parent_snprintf(struct hist_entry *self, char *bf,
                              self->parent ? self->parent->name : "[other]");
 }
 
+struct sort_entry sort_parent = {
+       .se_header      = "Parent symbol",
+       .se_cmp         = sort__parent_cmp,
+       .se_snprintf    = hist_entry__parent_snprintf,
+       .se_width_idx   = HISTC_PARENT,
+};
+
 /* --sort cpu */
 
-int64_t
+static int64_t
 sort__cpu_cmp(struct hist_entry *left, struct hist_entry *right)
 {
        return right->cpu - left->cpu;
@@ -266,6 +231,28 @@ static int hist_entry__cpu_snprintf(struct hist_entry *self, char *bf,
        return repsep_snprintf(bf, size, "%-*d", width, self->cpu);
 }
 
+struct sort_entry sort_cpu = {
+       .se_header      = "CPU",
+       .se_cmp         = sort__cpu_cmp,
+       .se_snprintf    = hist_entry__cpu_snprintf,
+       .se_width_idx   = HISTC_CPU,
+};
+
+struct sort_dimension {
+       const char              *name;
+       struct sort_entry       *entry;
+       int                     taken;
+};
+
+static struct sort_dimension sort_dimensions[] = {
+       { .name = "pid",        .entry = &sort_thread,  },
+       { .name = "comm",       .entry = &sort_comm,    },
+       { .name = "dso",        .entry = &sort_dso,     },
+       { .name = "symbol",     .entry = &sort_sym,     },
+       { .name = "parent",     .entry = &sort_parent,  },
+       { .name = "cpu",        .entry = &sort_cpu,     },
+};
+
 int sort_dimension__add(const char *tok)
 {
        unsigned int i;
@@ -273,15 +260,9 @@ int sort_dimension__add(const char *tok)
        for (i = 0; i < ARRAY_SIZE(sort_dimensions); i++) {
                struct sort_dimension *sd = &sort_dimensions[i];
 
-               if (sd->taken)
-                       continue;
-
                if (strncasecmp(tok, sd->name, strlen(tok)))
                        continue;
 
-               if (sd->entry->se_collapse)
-                       sort__need_collapse = 1;
-
                if (sd->entry == &sort_parent) {
                        int ret = regcomp(&parent_regex, parent_pattern, REG_EXTENDED);
                        if (ret) {
@@ -294,6 +275,12 @@ int sort_dimension__add(const char *tok)
                        sort__has_parent = 1;
                }
 
+               if (sd->taken)
+                       return 0;
+
+               if (sd->entry->se_collapse)
+                       sort__need_collapse = 1;
+
                if (list_empty(&hist_entry__sort_list)) {
                        if (!strcmp(sd->name, "pid"))
                                sort__first_dimension = SORT_PID;
index 0b91053a7d11af888eea81a4c8de24fdd60ce6f8..77d0388ad415477d8be2faf57cbf6e3160f40079 100644 (file)
@@ -103,20 +103,6 @@ extern struct sort_entry sort_thread;
 extern struct list_head hist_entry__sort_list;
 
 void setup_sorting(const char * const usagestr[], const struct option *opts);
-
-extern size_t sort__thread_print(FILE *, struct hist_entry *, unsigned int);
-extern size_t sort__comm_print(FILE *, struct hist_entry *, unsigned int);
-extern size_t sort__dso_print(FILE *, struct hist_entry *, unsigned int);
-extern size_t sort__sym_print(FILE *, struct hist_entry *, unsigned int __used);
-extern int64_t cmp_null(void *, void *);
-extern int64_t sort__thread_cmp(struct hist_entry *, struct hist_entry *);
-extern int64_t sort__comm_cmp(struct hist_entry *, struct hist_entry *);
-extern int64_t sort__comm_collapse(struct hist_entry *, struct hist_entry *);
-extern int64_t sort__dso_cmp(struct hist_entry *, struct hist_entry *);
-extern int64_t sort__sym_cmp(struct hist_entry *, struct hist_entry *);
-extern int64_t sort__parent_cmp(struct hist_entry *, struct hist_entry *);
-int64_t sort__cpu_cmp(struct hist_entry *left, struct hist_entry *right);
-extern size_t sort__parent_print(FILE *, struct hist_entry *, unsigned int);
 extern int sort_dimension__add(const char *);
 void sort_entry__setup_elide(struct sort_entry *self, struct strlist *list,
                             const char *list_name, FILE *fp);
index b9a985dadd08f67851cc0483bb36bb8d3b014580..d5836382ff2cad7d00fd156d1296f99f8433f698 100644 (file)
@@ -294,3 +294,22 @@ bool strlazymatch(const char *str, const char *pat)
 {
        return __match_glob(str, pat, true);
 }
+
+/**
+ * strtailcmp - Compare the tail of two strings
+ * @s1: 1st string to be compared
+ * @s2: 2nd string to be compared
+ *
+ * Return 0 if whole of either string is same as another's tail part.
+ */
+int strtailcmp(const char *s1, const char *s2)
+{
+       int i1 = strlen(s1);
+       int i2 = strlen(s2);
+       while (--i1 >= 0 && --i2 >= 0) {
+               if (s1[i1] != s2[i2])
+                       return s1[i1] - s2[i2];
+       }
+       return 0;
+}
+
index 35729f4c40cb7a98e891a2013d4b6e812a24237e..3403f814ad72e79682b9a88ef9e1770188edb427 100644 (file)
@@ -183,106 +183,59 @@ int bigendian(void)
        return *ptr == 0x01020304;
 }
 
-static unsigned long long copy_file_fd(int fd)
+/* unfortunately, you can not stat debugfs or proc files for size */
+static void record_file(const char *file, size_t hdr_sz)
 {
        unsigned long long size = 0;
-       char buf[BUFSIZ];
-       int r;
-
-       do {
-               r = read(fd, buf, BUFSIZ);
-               if (r > 0) {
-                       size += r;
-                       write_or_die(buf, r);
-               }
-       } while (r > 0);
-
-       return size;
-}
-
-static unsigned long long copy_file(const char *file)
-{
-       unsigned long long size = 0;
-       int fd;
+       char buf[BUFSIZ], *sizep;
+       off_t hdr_pos = lseek(output_fd, 0, SEEK_CUR);
+       int r, fd;
 
        fd = open(file, O_RDONLY);
        if (fd < 0)
                die("Can't read '%s'", file);
-       size = copy_file_fd(fd);
-       close(fd);
 
-       return size;
-}
-
-static unsigned long get_size_fd(int fd)
-{
-       unsigned long long size = 0;
-       char buf[BUFSIZ];
-       int r;
+       /* put in zeros for file size, then fill true size later */
+       write_or_die(&size, hdr_sz);
 
        do {
                r = read(fd, buf, BUFSIZ);
-               if (r > 0)
+               if (r > 0) {
                        size += r;
+                       write_or_die(buf, r);
+               }
        } while (r > 0);
-
-       lseek(fd, 0, SEEK_SET);
-
-       return size;
-}
-
-static unsigned long get_size(const char *file)
-{
-       unsigned long long size = 0;
-       int fd;
-
-       fd = open(file, O_RDONLY);
-       if (fd < 0)
-               die("Can't read '%s'", file);
-       size = get_size_fd(fd);
        close(fd);
 
-       return size;
+       /* ugh, handle big-endian hdr_size == 4 */
+       sizep = (char*)&size;
+       if (bigendian())
+               sizep += sizeof(u64) - hdr_sz;
+
+       if (pwrite(output_fd, sizep, hdr_sz, hdr_pos) < 0)
+               die("writing to %s", output_file);
 }
 
 static void read_header_files(void)
 {
-       unsigned long long size, check_size;
        char *path;
-       int fd;
+       struct stat st;
 
        path = get_tracing_file("events/header_page");
-       fd = open(path, O_RDONLY);
-       if (fd < 0)
+       if (stat(path, &st) < 0)
                die("can't read '%s'", path);
 
-       /* unfortunately, you can not stat debugfs files for size */
-       size = get_size_fd(fd);
-
        write_or_die("header_page", 12);
-       write_or_die(&size, 8);
-       check_size = copy_file_fd(fd);
-       close(fd);
-
-       if (size != check_size)
-               die("wrong size for '%s' size=%lld read=%lld",
-                   path, size, check_size);
+       record_file(path, 8);
        put_tracing_file(path);
 
        path = get_tracing_file("events/header_event");
-       fd = open(path, O_RDONLY);
-       if (fd < 0)
+       if (stat(path, &st) < 0)
                die("can't read '%s'", path);
 
-       size = get_size_fd(fd);
-
        write_or_die("header_event", 13);
-       write_or_die(&size, 8);
-       check_size = copy_file_fd(fd);
-       if (size != check_size)
-               die("wrong size for '%s'", path);
+       record_file(path, 8);
        put_tracing_file(path);
-       close(fd);
 }
 
 static bool name_in_tp_list(char *sys, struct tracepoint_path *tps)
@@ -298,7 +251,6 @@ static bool name_in_tp_list(char *sys, struct tracepoint_path *tps)
 
 static void copy_event_system(const char *sys, struct tracepoint_path *tps)
 {
-       unsigned long long size, check_size;
        struct dirent *dent;
        struct stat st;
        char *format;
@@ -338,14 +290,8 @@ static void copy_event_system(const char *sys, struct tracepoint_path *tps)
                sprintf(format, "%s/%s/format", sys, dent->d_name);
                ret = stat(format, &st);
 
-               if (ret >= 0) {
-                       /* unfortunately, you can not stat debugfs files for size */
-                       size = get_size(format);
-                       write_or_die(&size, 8);
-                       check_size = copy_file(format);
-                       if (size != check_size)
-                               die("error in size of file '%s'", format);
-               }
+               if (ret >= 0)
+                       record_file(format, 8);
 
                free(format);
        }
@@ -426,7 +372,7 @@ static void read_event_files(struct tracepoint_path *tps)
 
 static void read_proc_kallsyms(void)
 {
-       unsigned int size, check_size;
+       unsigned int size;
        const char *path = "/proc/kallsyms";
        struct stat st;
        int ret;
@@ -438,17 +384,12 @@ static void read_proc_kallsyms(void)
                write_or_die(&size, 4);
                return;
        }
-       size = get_size(path);
-       write_or_die(&size, 4);
-       check_size = copy_file(path);
-       if (size != check_size)
-               die("error in size of file '%s'", path);
-
+       record_file(path, 4);
 }
 
 static void read_ftrace_printk(void)
 {
-       unsigned int size, check_size;
+       unsigned int size;
        char *path;
        struct stat st;
        int ret;
@@ -461,11 +402,8 @@ static void read_ftrace_printk(void)
                write_or_die(&size, 4);
                goto out;
        }
-       size = get_size(path);
-       write_or_die(&size, 4);
-       check_size = copy_file(path);
-       if (size != check_size)
-               die("error in size of file '%s'", path);
+       record_file(path, 4);
+
 out:
        put_tracing_file(path);
 }
index fc784284ac8be9070c1ab3681a216512150a3688..0128906bac88d258e9e100aeabae5abd462e465b 100644 (file)
@@ -238,6 +238,7 @@ char **argv_split(const char *str, int *argcp);
 void argv_free(char **argv);
 bool strglobmatch(const char *str, const char *pat);
 bool strlazymatch(const char *str, const char *pat);
+int strtailcmp(const char *s1, const char *s2);
 unsigned long convert_unit(unsigned long value, char *unit);
 int readn(int fd, void *buf, size_t size);