Merge branch 'linus' into tracing/hw-branch-tracing
[sfrench/cifs-2.6.git] / kernel / trace / trace.h
index 4d3d381bfd9537e3a43df767e30fb0d2991c66d0..9e15802cca9f5c81fb4723ac1e4b24e9a63e2a31 100644 (file)
@@ -9,6 +9,8 @@
 #include <linux/mmiotrace.h>
 #include <linux/ftrace.h>
 #include <trace/boot.h>
+#include <trace/kmemtrace.h>
+#include <trace/power.h>
 
 enum trace_type {
        __TRACE_FIRST_TYPE = 0,
@@ -16,9 +18,9 @@ enum trace_type {
        TRACE_FN,
        TRACE_CTX,
        TRACE_WAKE,
-       TRACE_CONT,
        TRACE_STACK,
        TRACE_PRINT,
+       TRACE_BPRINT,
        TRACE_SPECIAL,
        TRACE_MMIO_RW,
        TRACE_MMIO_MAP,
@@ -29,9 +31,14 @@ enum trace_type {
        TRACE_GRAPH_ENT,
        TRACE_USER_STACK,
        TRACE_HW_BRANCHES,
+       TRACE_SYSCALL_ENTER,
+       TRACE_SYSCALL_EXIT,
+       TRACE_KMEM_ALLOC,
+       TRACE_KMEM_FREE,
        TRACE_POWER,
+       TRACE_BLK,
 
-       __TRACE_LAST_TYPE
+       __TRACE_LAST_TYPE,
 };
 
 /*
@@ -42,7 +49,6 @@ enum trace_type {
  */
 struct trace_entry {
        unsigned char           type;
-       unsigned char           cpu;
        unsigned char           flags;
        unsigned char           preempt_count;
        int                     pid;
@@ -60,13 +66,13 @@ struct ftrace_entry {
 
 /* Function call entry */
 struct ftrace_graph_ent_entry {
-       struct trace_entry                      ent;
+       struct trace_entry              ent;
        struct ftrace_graph_ent         graph_ent;
 };
 
 /* Function return entry */
 struct ftrace_graph_ret_entry {
-       struct trace_entry                      ent;
+       struct trace_entry              ent;
        struct ftrace_graph_ret         ret;
 };
 extern struct tracer boot_tracer;
@@ -112,12 +118,18 @@ struct userstack_entry {
 };
 
 /*
- * ftrace_printk entry:
+ * trace_printk entry:
  */
+struct bprint_entry {
+       struct trace_entry      ent;
+       unsigned long           ip;
+       const char              *fmt;
+       u32                     buf[];
+};
+
 struct print_entry {
        struct trace_entry      ent;
        unsigned long           ip;
-       int                     depth;
        char                    buf[];
 };
 
@@ -170,15 +182,51 @@ struct trace_power {
        struct power_trace      state_data;
 };
 
+enum kmemtrace_type_id {
+       KMEMTRACE_TYPE_KMALLOC = 0,     /* kmalloc() or kfree(). */
+       KMEMTRACE_TYPE_CACHE,           /* kmem_cache_*(). */
+       KMEMTRACE_TYPE_PAGES,           /* __get_free_pages() and friends. */
+};
+
+struct kmemtrace_alloc_entry {
+       struct trace_entry      ent;
+       enum kmemtrace_type_id type_id;
+       unsigned long call_site;
+       const void *ptr;
+       size_t bytes_req;
+       size_t bytes_alloc;
+       gfp_t gfp_flags;
+       int node;
+};
+
+struct kmemtrace_free_entry {
+       struct trace_entry      ent;
+       enum kmemtrace_type_id type_id;
+       unsigned long call_site;
+       const void *ptr;
+};
+
+struct syscall_trace_enter {
+       struct trace_entry      ent;
+       int                     nr;
+       unsigned long           args[];
+};
+
+struct syscall_trace_exit {
+       struct trace_entry      ent;
+       int                     nr;
+       unsigned long           ret;
+};
+
+
 /*
  * trace_flag_type is an enumeration that holds different
  * states when a trace occurs. These are:
  *  IRQS_OFF           - interrupts were disabled
- *  IRQS_NOSUPPORT     - arch does not support irqs_disabled_flags
+ *  IRQS_NOSUPPORT     - arch does not support irqs_disabled_flags
  *  NEED_RESCED                - reschedule is requested
  *  HARDIRQ            - inside an interrupt handler
  *  SOFTIRQ            - inside a softirq handler
- *  CONT               - multiple entries hold the trace item
  */
 enum trace_flag_type {
        TRACE_FLAG_IRQS_OFF             = 0x01,
@@ -186,7 +234,6 @@ enum trace_flag_type {
        TRACE_FLAG_NEED_RESCHED         = 0x04,
        TRACE_FLAG_HARDIRQ              = 0x08,
        TRACE_FLAG_SOFTIRQ              = 0x10,
-       TRACE_FLAG_CONT                 = 0x20,
 };
 
 #define TRACE_BUF_SIZE         1024
@@ -198,6 +245,7 @@ enum trace_flag_type {
  */
 struct trace_array_cpu {
        atomic_t                disabled;
+       void                    *buffer_page;   /* ring buffer spare */
 
        /* these fields get copied into max-trace: */
        unsigned long           trace_idx;
@@ -262,10 +310,10 @@ extern void __ftrace_bad_type(void);
        do {                                                            \
                IF_ASSIGN(var, ent, struct ftrace_entry, TRACE_FN);     \
                IF_ASSIGN(var, ent, struct ctx_switch_entry, 0);        \
-               IF_ASSIGN(var, ent, struct trace_field_cont, TRACE_CONT); \
                IF_ASSIGN(var, ent, struct stack_entry, TRACE_STACK);   \
                IF_ASSIGN(var, ent, struct userstack_entry, TRACE_USER_STACK);\
                IF_ASSIGN(var, ent, struct print_entry, TRACE_PRINT);   \
+               IF_ASSIGN(var, ent, struct bprint_entry, TRACE_BPRINT); \
                IF_ASSIGN(var, ent, struct special_entry, 0);           \
                IF_ASSIGN(var, ent, struct trace_mmiotrace_rw,          \
                          TRACE_MMIO_RW);                               \
@@ -279,7 +327,15 @@ extern void __ftrace_bad_type(void);
                IF_ASSIGN(var, ent, struct ftrace_graph_ret_entry,      \
                          TRACE_GRAPH_RET);             \
                IF_ASSIGN(var, ent, struct hw_branch_entry, TRACE_HW_BRANCHES);\
-               IF_ASSIGN(var, ent, struct trace_power, TRACE_POWER); \
+               IF_ASSIGN(var, ent, struct trace_power, TRACE_POWER); \
+               IF_ASSIGN(var, ent, struct kmemtrace_alloc_entry,       \
+                         TRACE_KMEM_ALLOC);    \
+               IF_ASSIGN(var, ent, struct kmemtrace_free_entry,        \
+                         TRACE_KMEM_FREE);     \
+               IF_ASSIGN(var, ent, struct syscall_trace_enter,         \
+                         TRACE_SYSCALL_ENTER);                         \
+               IF_ASSIGN(var, ent, struct syscall_trace_exit,          \
+                         TRACE_SYSCALL_EXIT);                          \
                __ftrace_bad_type();                                    \
        } while (0)
 
@@ -287,7 +343,8 @@ extern void __ftrace_bad_type(void);
 enum print_line_t {
        TRACE_TYPE_PARTIAL_LINE = 0,    /* Retry after flushing the seq */
        TRACE_TYPE_HANDLED      = 1,
-       TRACE_TYPE_UNHANDLED    = 2     /* Relay to other output functions */
+       TRACE_TYPE_UNHANDLED    = 2,    /* Relay to other output functions */
+       TRACE_TYPE_NO_CONSUME   = 3     /* Handled but ask to not consume */
 };
 
 
@@ -297,8 +354,8 @@ enum print_line_t {
  * flags value in struct tracer_flags.
  */
 struct tracer_opt {
-       const char      *name; /* Will appear on the trace_options file */
-       u32             bit; /* Mask assigned in val field in tracer_flags */
+       const char      *name; /* Will appear on the trace_options file */
+       u32             bit; /* Mask assigned in val field in tracer_flags */
 };
 
 /*
@@ -307,28 +364,51 @@ struct tracer_opt {
  */
 struct tracer_flags {
        u32                     val;
-       struct tracer_opt       *opts;
+       struct tracer_opt       *opts;
 };
 
 /* Makes more easy to define a tracer opt */
 #define TRACER_OPT(s, b)       .name = #s, .bit = b
 
-/*
- * A specific tracer, represented by methods that operate on a trace array:
+
+/**
+ * struct tracer - a specific tracer and its callbacks to interact with debugfs
+ * @name: the name chosen to select it on the available_tracers file
+ * @init: called when one switches to this tracer (echo name > current_tracer)
+ * @reset: called when one switches to another tracer
+ * @start: called when tracing is unpaused (echo 1 > tracing_enabled)
+ * @stop: called when tracing is paused (echo 0 > tracing_enabled)
+ * @open: called when the trace file is opened
+ * @pipe_open: called when the trace_pipe file is opened
+ * @wait_pipe: override how the user waits for traces on trace_pipe
+ * @close: called when the trace file is released
+ * @read: override the default read callback on trace_pipe
+ * @splice_read: override the default splice_read callback on trace_pipe
+ * @selftest: selftest to run on boot (see trace_selftest.c)
+ * @print_headers: override the first lines that describe your columns
+ * @print_line: callback that prints a trace
+ * @set_flag: signals one of your private flags changed (trace_options file)
+ * @flags: your private flags
  */
 struct tracer {
        const char              *name;
-       /* Your tracer should raise a warning if init fails */
        int                     (*init)(struct trace_array *tr);
        void                    (*reset)(struct trace_array *tr);
        void                    (*start)(struct trace_array *tr);
        void                    (*stop)(struct trace_array *tr);
        void                    (*open)(struct trace_iterator *iter);
        void                    (*pipe_open)(struct trace_iterator *iter);
+       void                    (*wait_pipe)(struct trace_iterator *iter);
        void                    (*close)(struct trace_iterator *iter);
        ssize_t                 (*read)(struct trace_iterator *iter,
                                        struct file *filp, char __user *ubuf,
                                        size_t cnt, loff_t *ppos);
+       ssize_t                 (*splice_read)(struct trace_iterator *iter,
+                                              struct file *filp,
+                                              loff_t *ppos,
+                                              struct pipe_inode_info *pipe,
+                                              size_t len,
+                                              unsigned int flags);
 #ifdef CONFIG_FTRACE_STARTUP_TEST
        int                     (*selftest)(struct tracer *trace,
                                            struct trace_array *tr);
@@ -339,7 +419,8 @@ struct tracer {
        int                     (*set_flag)(u32 old_flags, u32 bit, int set);
        struct tracer           *next;
        int                     print_max;
-       struct tracer_flags     *flags;
+       struct tracer_flags     *flags;
+       struct tracer_stat      *stats;
 };
 
 struct trace_seq {
@@ -348,6 +429,16 @@ struct trace_seq {
        unsigned int            readpos;
 };
 
+static inline void
+trace_seq_init(struct trace_seq *s)
+{
+       s->len = 0;
+       s->readpos = 0;
+}
+
+
+#define TRACE_PIPE_ALL_CPU     -1
+
 /*
  * Trace iterator - used by printout routines who present trace
  * results to users and which routines might sleep, etc:
@@ -356,6 +447,8 @@ struct trace_iterator {
        struct trace_array      *tr;
        struct tracer           *trace;
        void                    *private;
+       int                     cpu_file;
+       struct mutex            mutex;
        struct ring_buffer_iter *buffer_iter[NR_CPUS];
 
        /* The below is zeroed out in pipe_read */
@@ -371,6 +464,7 @@ struct trace_iterator {
        cpumask_var_t           started;
 };
 
+int tracer_init(struct tracer *t, struct trace_array *tr);
 int tracing_is_enabled(void);
 void trace_wake_up(void);
 void tracing_reset(struct trace_array *tr, int cpu);
@@ -379,26 +473,50 @@ int tracing_open_generic(struct inode *inode, struct file *filp);
 struct dentry *tracing_init_dentry(void);
 void init_tracer_sysprof_debugfs(struct dentry *d_tracer);
 
+struct ring_buffer_event;
+
+struct ring_buffer_event *trace_buffer_lock_reserve(struct trace_array *tr,
+                                                   unsigned char type,
+                                                   unsigned long len,
+                                                   unsigned long flags,
+                                                   int pc);
+void trace_buffer_unlock_commit(struct trace_array *tr,
+                               struct ring_buffer_event *event,
+                               unsigned long flags, int pc);
+
+struct ring_buffer_event *
+trace_current_buffer_lock_reserve(unsigned char type, unsigned long len,
+                                 unsigned long flags, int pc);
+void trace_current_buffer_unlock_commit(struct ring_buffer_event *event,
+                                       unsigned long flags, int pc);
+void trace_nowake_buffer_unlock_commit(struct ring_buffer_event *event,
+                                       unsigned long flags, int pc);
+
 struct trace_entry *tracing_get_trace_entry(struct trace_array *tr,
                                                struct trace_array_cpu *data);
+
+struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
+                                         int *ent_cpu, u64 *ent_ts);
+
 void tracing_generic_entry_update(struct trace_entry *entry,
                                  unsigned long flags,
                                  int pc);
 
+void default_wait_pipe(struct trace_iterator *iter);
+void poll_wait_pipe(struct trace_iterator *iter);
+
 void ftrace(struct trace_array *tr,
                            struct trace_array_cpu *data,
                            unsigned long ip,
                            unsigned long parent_ip,
                            unsigned long flags, int pc);
 void tracing_sched_switch_trace(struct trace_array *tr,
-                               struct trace_array_cpu *data,
                                struct task_struct *prev,
                                struct task_struct *next,
                                unsigned long flags, int pc);
 void tracing_record_cmdline(struct task_struct *tsk);
 
 void tracing_sched_wakeup_trace(struct trace_array *tr,
-                               struct trace_array_cpu *data,
                                struct task_struct *wakee,
                                struct task_struct *cur,
                                unsigned long flags, int pc);
@@ -408,14 +526,12 @@ void trace_special(struct trace_array *tr,
                   unsigned long arg2,
                   unsigned long arg3, int pc);
 void trace_function(struct trace_array *tr,
-                   struct trace_array_cpu *data,
                    unsigned long ip,
                    unsigned long parent_ip,
                    unsigned long flags, int pc);
 
 void trace_graph_return(struct ftrace_graph_ret *trace);
 int trace_graph_entry(struct ftrace_graph_ent *trace);
-void trace_hw_branch(struct trace_array *tr, u64 from, u64 to);
 
 void tracing_start_cmdline_record(void);
 void tracing_stop_cmdline_record(void);
@@ -434,15 +550,11 @@ void update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu);
 void update_max_tr_single(struct trace_array *tr,
                          struct task_struct *tsk, int cpu);
 
-extern cycle_t ftrace_now(int cpu);
+void __trace_stack(struct trace_array *tr,
+                  unsigned long flags,
+                  int skip, int pc);
 
-#ifdef CONFIG_FUNCTION_TRACER
-void tracing_start_function_trace(void);
-void tracing_stop_function_trace(void);
-#else
-# define tracing_start_function_trace()                do { } while (0)
-# define tracing_stop_function_trace()         do { } while (0)
-#endif
+extern cycle_t ftrace_now(int cpu);
 
 #ifdef CONFIG_CONTEXT_SWITCH_TRACER
 typedef void
@@ -456,10 +568,10 @@ struct tracer_switch_ops {
        void                            *private;
        struct tracer_switch_ops        *next;
 };
-
-char *trace_find_cmdline(int pid);
 #endif /* CONFIG_CONTEXT_SWITCH_TRACER */
 
+extern void trace_find_cmdline(int pid, char comm[]);
+
 #ifdef CONFIG_DYNAMIC_FTRACE
 extern unsigned long ftrace_update_tot_cnt;
 #define DYN_FTRACE_TEST_NAME trace_selftest_dynamic_test_func
@@ -469,6 +581,8 @@ extern int DYN_FTRACE_TEST_NAME(void);
 #ifdef CONFIG_FTRACE_STARTUP_TEST
 extern int trace_selftest_startup_function(struct tracer *trace,
                                           struct trace_array *tr);
+extern int trace_selftest_startup_function_graph(struct tracer *trace,
+                                                struct trace_array *tr);
 extern int trace_selftest_startup_irqsoff(struct tracer *trace,
                                          struct trace_array *tr);
 extern int trace_selftest_startup_preemptoff(struct tracer *trace,
@@ -485,21 +599,16 @@ extern int trace_selftest_startup_sysprof(struct tracer *trace,
                                               struct trace_array *tr);
 extern int trace_selftest_startup_branch(struct tracer *trace,
                                         struct trace_array *tr);
+extern int trace_selftest_startup_hw_branches(struct tracer *trace,
+                                             struct trace_array *tr);
 #endif /* CONFIG_FTRACE_STARTUP_TEST */
 
 extern void *head_page(struct trace_array_cpu *data);
-extern int trace_seq_printf(struct trace_seq *s, const char *fmt, ...);
-extern void trace_seq_print_cont(struct trace_seq *s,
-                                struct trace_iterator *iter);
-
-extern int
-seq_print_ip_sym(struct trace_seq *s, unsigned long ip,
-               unsigned long sym_flags);
-extern ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf,
-                                size_t cnt);
 extern long ns2usecs(cycle_t nsec);
 extern int
-trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args);
+trace_vbprintk(unsigned long ip, const char *fmt, va_list args);
+extern int
+trace_vprintk(unsigned long ip, const char *fmt, va_list args);
 
 extern unsigned long trace_flags;
 
@@ -580,7 +689,11 @@ enum trace_iterator_flags {
        TRACE_ITER_ANNOTATE             = 0x2000,
        TRACE_ITER_USERSTACKTRACE       = 0x4000,
        TRACE_ITER_SYM_USEROBJ          = 0x8000,
-       TRACE_ITER_PRINTK_MSGONLY       = 0x10000
+       TRACE_ITER_PRINTK_MSGONLY       = 0x10000,
+       TRACE_ITER_CONTEXT_INFO         = 0x20000, /* Print pid/cpu/time */
+       TRACE_ITER_LATENCY_FMT          = 0x40000,
+       TRACE_ITER_GLOBAL_CLK           = 0x80000,
+       TRACE_ITER_SLEEP_TIME           = 0x100000,
 };
 
 /*
@@ -601,12 +714,12 @@ extern struct tracer nop_trace;
  * preempt_enable (after a disable), a schedule might take place
  * causing an infinite recursion.
  *
- * To prevent this, we read the need_recshed flag before
+ * To prevent this, we read the need_resched flag before
  * disabling preemption. When we want to enable preemption we
  * check the flag, if it is set, then we call preempt_enable_no_resched.
  * Otherwise, we call preempt_enable.
  *
- * The rational for doing the above is that if need resched is set
+ * The rational for doing the above is that if need_resched is set
  * and we have yet to reschedule, we are either in an atomic location
  * (where we do not need to check for scheduling) or we are inside
  * the scheduler and do not want to resched.
@@ -627,7 +740,7 @@ static inline int ftrace_preempt_disable(void)
  *
  * This is a scheduler safe way to enable preemption and not miss
  * any preemption checks. The disabled saved the state of preemption.
- * If resched is set, then we were either inside an atomic or
+ * If resched is set, then we are either inside an atomic or
  * are inside the scheduler (we would have already scheduled
  * otherwise). In this case, we do not want to call normal
  * preempt_enable, but preempt_enable_no_resched instead.
@@ -664,4 +777,118 @@ static inline void trace_branch_disable(void)
 }
 #endif /* CONFIG_BRANCH_TRACER */
 
+/* set ring buffers to default size if not already done so */
+int tracing_update_buffers(void);
+
+/* trace event type bit fields, not numeric */
+enum {
+       TRACE_EVENT_TYPE_PRINTF         = 1,
+       TRACE_EVENT_TYPE_RAW            = 2,
+};
+
+struct ftrace_event_field {
+       struct list_head        link;
+       char                    *name;
+       char                    *type;
+       int                     offset;
+       int                     size;
+};
+
+struct ftrace_event_call {
+       char                    *name;
+       char                    *system;
+       struct dentry           *dir;
+       int                     enabled;
+       int                     (*regfunc)(void);
+       void                    (*unregfunc)(void);
+       int                     id;
+       int                     (*raw_init)(void);
+       int                     (*show_format)(struct trace_seq *s);
+       int                     (*define_fields)(void);
+       struct list_head        fields;
+       struct filter_pred      **preds;
+
+#ifdef CONFIG_EVENT_PROFILE
+       atomic_t        profile_count;
+       int             (*profile_enable)(struct ftrace_event_call *);
+       void            (*profile_disable)(struct ftrace_event_call *);
+#endif
+};
+
+struct event_subsystem {
+       struct list_head        list;
+       const char              *name;
+       struct dentry           *entry;
+       struct filter_pred      **preds;
+};
+
+#define events_for_each(event)                                         \
+       for (event = __start_ftrace_events;                             \
+            (unsigned long)event < (unsigned long)__stop_ftrace_events; \
+            event++)
+
+#define MAX_FILTER_PRED 8
+
+struct filter_pred;
+
+typedef int (*filter_pred_fn_t) (struct filter_pred *pred, void *event);
+
+struct filter_pred {
+       filter_pred_fn_t fn;
+       u64 val;
+       char *str_val;
+       int str_len;
+       char *field_name;
+       int offset;
+       int not;
+       int or;
+       int compound;
+       int clear;
+};
+
+int trace_define_field(struct ftrace_event_call *call, char *type,
+                      char *name, int offset, int size);
+extern void filter_free_pred(struct filter_pred *pred);
+extern void filter_print_preds(struct filter_pred **preds,
+                              struct trace_seq *s);
+extern int filter_parse(char **pbuf, struct filter_pred *pred);
+extern int filter_add_pred(struct ftrace_event_call *call,
+                          struct filter_pred *pred);
+extern void filter_free_preds(struct ftrace_event_call *call);
+extern int filter_match_preds(struct ftrace_event_call *call, void *rec);
+extern void filter_free_subsystem_preds(struct event_subsystem *system);
+extern int filter_add_subsystem_pred(struct event_subsystem *system,
+                                    struct filter_pred *pred);
+
+void event_trace_printk(unsigned long ip, const char *fmt, ...);
+extern struct ftrace_event_call __start_ftrace_events[];
+extern struct ftrace_event_call __stop_ftrace_events[];
+
+#define for_each_event(event)                                          \
+       for (event = __start_ftrace_events;                             \
+            (unsigned long)event < (unsigned long)__stop_ftrace_events; \
+            event++)
+
+extern const char *__start___trace_bprintk_fmt[];
+extern const char *__stop___trace_bprintk_fmt[];
+
+/*
+ * The double __builtin_constant_p is because gcc will give us an error
+ * if we try to allocate the static variable to fmt if it is not a
+ * constant. Even with the outer if statement optimizing out.
+ */
+#define event_trace_printk(ip, fmt, args...)                           \
+do {                                                                   \
+       __trace_printk_check_format(fmt, ##args);                       \
+       tracing_record_cmdline(current);                                \
+       if (__builtin_constant_p(fmt)) {                                \
+               static const char *trace_printk_fmt                     \
+                 __attribute__((section("__trace_printk_fmt"))) =      \
+                       __builtin_constant_p(fmt) ? fmt : NULL;         \
+                                                                       \
+               __trace_bprintk(ip, trace_printk_fmt, ##args);          \
+       } else                                                          \
+               __trace_printk(ip, fmt, ##args);                        \
+} while (0)
+
 #endif /* _LINUX_KERNEL_TRACE_H */